From 0a8e1d566dff024c1601331322fbc407c4a6a586 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 3 Feb 2026 15:18:40 -0500 Subject: [PATCH 01/15] Validates chargeback data is generated and then push and retriieved from loki - uses synth data to calculate total cost via script - run "openstack rating summary get" to get total cost from loki - compares script_totals and Loki_Totals if same then job passes - Used Gemini and Cursor AI --- .gitignore | 1 + roles/telemetry_chargeback/.gitignore | 2 + roles/telemetry_chargeback/README.md | 115 ++++++- roles/telemetry_chargeback/defaults/main.yml | 27 ++ .../files/gen_db_summary.py | 321 ++++++++++++++++++ .../files/gen_synth_loki_data.py | 157 +++++++-- .../files/test_dyn_basic.yml | 154 +++++++++ .../files/test_static.yml | 57 ---- .../tasks/chargeback_tests.yml | 22 +- .../telemetry_chargeback/tasks/cleanup_ck.yml | 5 + .../tasks/flush_loki_data.yml | 52 +++ .../tasks/gen_synth_loki_data.yml | 59 ++-- .../tasks/ingest_loki_data.yml | 42 +++ .../tasks/load_loki_data.yml | 12 + .../telemetry_chargeback/tasks/loki_rate.yml | 29 ++ roles/telemetry_chargeback/tasks/main.yml | 57 +++- .../tasks/retrieve_loki_data.yml | 71 ++++ .../tasks/run_test_scenarios.yml | 53 +++ .../tasks/setup_loki_env.yml | 63 ++++ .../loki_data_templ.j2 | 0 roles/telemetry_chargeback/vars/main.yml | 20 +- 21 files changed, 1173 insertions(+), 146 deletions(-) create mode 100644 roles/telemetry_chargeback/.gitignore create mode 100644 roles/telemetry_chargeback/files/gen_db_summary.py create mode 100644 roles/telemetry_chargeback/files/test_dyn_basic.yml delete mode 100644 roles/telemetry_chargeback/files/test_static.yml create mode 100644 roles/telemetry_chargeback/tasks/cleanup_ck.yml create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/run_test_scenarios.yml create mode 100644 roles/telemetry_chargeback/tasks/setup_loki_env.yml rename roles/telemetry_chargeback/{template => templates}/loki_data_templ.j2 (100%) diff --git a/.gitignore b/.gitignore index 44dbcd64d..53e77bcaf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc .idea/ +.ansible/ diff --git a/roles/telemetry_chargeback/.gitignore b/roles/telemetry_chargeback/.gitignore new file mode 100644 index 000000000..7684dfb64 --- /dev/null +++ b/roles/telemetry_chargeback/.gitignore @@ -0,0 +1,2 @@ +files/_gen_synth_loki_metrics_totals.py +.ansible/ diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 192b72a3d..a721ffc93 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -5,7 +5,7 @@ The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** The role performs two main functions: 1. **CloudKitty Validation** - Enables and configures the CloudKitty hashmap rating module, then validates its state. -2. **Synthetic Data Generation** - Generates synthetic Loki log data for testing chargeback scenarios using a Python script and Jinja2 template. +2. **Synthetic Data Generation & Analysis** - Generates synthetic Loki log data for testing chargeback scenarios and calculates metric totals. The role automatically discovers and processes all scenario files matching `test_*.yml` in the `files/` directory. For each scenario it runs: generate synthetic data, compute syn-totals, ingest to Loki, flush Loki ingester memory, and get cost via CloudKitty rating summary (using begin/end from syn-totals). Retrieve-from-Loki is included in the load_loki_data flow. After all scenarios, the role runs cleanup (`cleanup_ck.yml`) to remove the local flush cert directory. Requirements ------------ @@ -15,7 +15,7 @@ It relies on the following being available on the target or control host: * The **OpenStack CLI client** must be installed and configured with administrative credentials. * Required Python libraries for the `openstack` CLI (e.g., `python3-openstackclient`). * Connectivity to the OpenStack API endpoint. -* **Python 3** with the following libraries for synthetic data generation: +* **Python 3** with the following libraries for synthetic data generation and analysis: * `PyYAML` * `Jinja2` @@ -23,6 +23,7 @@ It is expected to be run **after** a successful deployment and configuration of * **OpenStack:** A functional OpenStack cloud (RHOSO) environment. * **Cloudkitty:** The Cloudkitty service must be installed, configured, and running. +* **Loki / OpenShift (for ingest and flush):** When using ingest and flush tasks, the control host must have `oc` CLI access, and the Cloudkitty Loki stack (route, certificates, ingester) must be deployed. The role sets Loki push/query URLs and extracts certificates via `setup_loki_env.yml`. Role Variables -------------- @@ -30,33 +31,115 @@ The role uses the following variables to control the testing environment and exe ### User-Configurable Variables (defaults/main.yml) +These variables can be overridden when importing the role or set at the play level. Users can customize these based on their deployment environment and test requirements. + | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for the role. | +| `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | +| `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | +| `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | +| `local_cert_dir` | `{{ ansible_env.HOME }}/ci-framework-data/flush_certs` | Local directory for flush certs (removed by cleanup_ck.yml after the run). | +| `remote_cert_dir` | `osp-certs` | Directory inside the OpenStack pod for certs. | +| `cert_secret_name` | `cert-cloudkitty-client-internal` | OpenShift secret name for client certificates. | +| `client_secret` | `secret/cloudkitty-lokistack-gateway-client-http` | Secret for flush client certs. | +| `ca_configmap` | `cm/cloudkitty-lokistack-ca-bundle` | ConfigMap for CA bundle. | +| `logql_query` | `{service="cloudkitty"}` (overridable via `loki_query`) | LogQL query for Loki. | +| `cloudkitty_namespace` | `openstack` | OpenShift namespace for Cloudkitty/Loki resources. | +| `openstackpod` | `openstackclient` | OpenStack client pod name for exec/cp. | +| `lookback` | `6` | Days lookback for Loki query time range. | +| `limit` | `50` | Limit for Loki query results. | + +**Example: Overriding variables when importing the role** +```yaml +- name: "Run chargeback tests" + ansible.builtin.import_role: + name: telemetry_chargeback + vars: + cloudkitty_namespace: "my-custom-namespace" + lookback: 10 + cloudkitty_debug: true +``` ### Internal Variables (vars/main.yml) -These variables are used internally by the role and typically do not need to be modified. +These variables are used internally by the role and should not be modified. They use `role_path` for internal file/script references and define internal file naming conventions. | Variable | Default Value | Description | |----------|---------------|-------------| -| `logs_dir_zuul` | `/home/zuul/ci-framework-data/logs` | Remote directory for log files. | -| `artifacts_dir_zuul` | `/home/zuul/ci-framework-data/artifacts` | Directory for generated artifacts. | -| `ck_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | -| `ck_data_template` | `{{ role_path }}/template/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | -| `ck_data_config` | `{{ role_path }}/files/test_static.yml` | Path to the scenario configuration file. | -| `ck_output_file_local` | `{{ artifacts_dir_zuul }}/loki_synth_data.json` | Local path for generated synthetic data. | -| `ck_output_file_remote` | `{{ logs_dir_zuul }}/gen_loki_synth_data.log` | Remote destination for synthetic data. | +| `cloudkitty_scenario_dir` | `{{ role_path }}/files` | Directory containing scenario files (`test_*.yml`). | +| `cloudkitty_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | +| `cloudkitty_data_template` | `{{ role_path }}/templates/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | +| `cloudkitty_summary_script` | `{{ role_path }}/files/gen_db_summary.py` | Path to the summary script (gen_db_summary.py). | +| `cloudkitty_synth_data_suffix` | `-synth_data.json` | Suffix for generated synthetic data files. | +| `cloudkitty_loki_data_suffix` | `-loki_data.json` | Suffix for Loki query result JSON files. | +| `cloudkitty_synth_totals_metrics_suffix` | `-synth_metrics_summary.yml` | Suffix for generated metric totals files (from synthetic data). | +| `cloudkitty_loki_totals_metrics_suffix` | `-loki_metrics_summary.yml` | Suffix for metric totals computed from Loki-retrieved JSON (retrieve_loki_data task). | +| `cloudkitty_loki_totals_suffix` | `-rating.yml` | Suffix for CloudKitty rating summary output files (from loki_rate task). | + +**Note:** Loki push/query URLs are set dynamically in `setup_loki_env.yml` from the Cloudkitty Loki route. + +### Synthetic Data Scripts + +**gen_synth_loki_data.py** — Generates Loki-format JSON from a scenario YAML and template. The role invokes it with `-r` so that timestamps in the output are in **reverse** order (youngest first, oldest last). When run manually you can omit `-r` for chronological order (oldest first, youngest last). + +| Option | Description | +|--------|--------------| +| `--tmpl` | Path to the Jinja2 template (e.g. `loki_data_templ.j2`). | +| `-t`, `--test` | Path to the scenario YAML (e.g. `test_dyn_basic.yml`). | +| `-o`, `--output` | Path to the output JSON file. | +| `-p`, `--project-id` | Optional; overrides `groupby.project_id` in every log entry. | +| `-u`, `--user-id` | Optional; overrides `groupby.user_id` in every log entry. | +| `-r`, `--reverse` | Reverse timestamp order in JSON output (youngest first, oldest last). | +| `--debug` | Enable debug logging. | + +**gen_db_summary.py** (`cloudkitty_summary_script`) — Parses Loki-style JSON (streams or `data.result`), sorts entries by timestamp, and writes a YAML summary. This script is invoked by the role for **both** synthetic totals (in `gen_synth_loki_data.yml`) and Loki-retrieved totals (in `retrieve_loki_data.yml`). It applies rate calculations with support for `factor`, `offset`, and `mutate` transformations. + +| Option | Description | +|--------|--------------| +| `-j`, `--json` | Path to the input JSON file (required). | +| `-o`, `--output` | Path to the output YAML file (default: `_total.yml`). | +| `--debug` | Directory to write debug output (`_diff.txt` with one `[ts,log]` JSON per line). | + +Output YAML structure: + +* **time** — `begin_step` / `end_step`, each with `nanosec` (nanosecond timestamp), `begin`, `end` (ISO window strings from the log payload). The `nanosec` values are used for Loki query time range in `retrieve_loki_data.yml`. +* **data_log** — `total_timesteps`, `metrics_per_step`, `log_count`. +* **rate** — `by_types` (per-type `Rate` calculated as `Σ((qty_mutated * factor + offset) * price)`) and `total.Rating` (sum of all rates). + +### Dynamically Set Variables + +Set in **main.yml** from the OpenStack CLI (`openstack project show admin` / `openstack user show admin`): + +| Variable | Description | +|----------|-------------| +| `cloudkitty_project_id` | ID of the OpenStack project named `admin` (empty string if not found). Passed as `-p` to the synthetic data generator when non-empty. | +| `cloudkitty_user_id` | ID of the OpenStack user named `admin` (empty string if not found). Passed as `-u` to the synthetic data generator when non-empty. | + +Set in **gen_synth_loki_data.yml** for each scenario file during the loop: + +| Variable | Description | +|----------|-------------| +| `cloudkitty_data_file` | Local path for generated JSON data (`{{ artifacts_dir_zuul }}/{{ scenario_name }}-synth_data.json`) | +| `cloudkitty_synth_totals_file` | Local path for calculated metric totals (`{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_totals_suffix }}`) | +| `cloudkitty_test_file` | Path to the scenario configuration file (`{{ cloudkitty_scenario_dir }}/{{ scenario_name }}.yml`) | Scenario Configuration ---------------------- -The synthetic data generation is controlled by a YAML configuration file (`files/test_static.yml`). This file defines: +The synthetic data generation is controlled by YAML configuration files in the `files/` directory. Any file matching `test_*.yml` will be automatically discovered and processed. Files whose names start with an underscore (e.g. `_test_*.yml`) are **not** discovered by the role; they can be used as reference or for manual runs. + +Each scenario file defines: + +* **generation** — Time range configuration (days, step_seconds). +* **log_types** — List of log type definitions. Each entry has **type** (identifier and value in output), unit, description, qty, price, groupby, and metadata. The **groupby** dict typically includes dimension keys (e.g. id, user_id, project_id, tenant_id); the generator merges **date_fields** into groupby at run time. +* **required_fields** — Top-level keys required for each log type (e.g. type, unit, qty, price, groupby, metadata). +* **date_fields** — Date field names to merge into groupby (week_of_the_year, day_of_the_year, month, year). +* **loki_stream** — Loki stream configuration (service name). + +**groupby.id** should be consistent by metric type across scenario files so that the same type always uses the same id. -* **generation** - Time range configuration (days, step_seconds) -* **log_types** - List of log type definitions with name, type, unit, qty, price, groupby, and metadata -* **required_fields** - Fields required for validation -* **date_fields** - Date fields to add to groupby (week_of_the_year, day_of_the_year, month, year) -* **loki_stream** - Loki stream configuration (service name) +Scenario files matching `test_*.yml` in the `files/` directory are automatically discovered and processed. Files whose names start with an underscore are not auto-discovered. Dependencies ------------ diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..9cc04c8c7 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,29 @@ --- +# OpenStack CLI command openstack_cmd: "openstack" + +# Debug mode +cloudkitty_debug: false + +# Directory paths +logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" +artifacts_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/artifacts" +cert_dir: "{{ ansible_user_dir }}/ck-certs" +local_cert_dir: "{{ ansible_env.HOME }}/ci-framework-data/flush_certs" +remote_cert_dir: "osp-certs" + +# Cloudkitty certificates and secrets +cert_secret_name: "cert-cloudkitty-client-internal" +client_secret: "secret/cloudkitty-lokistack-gateway-client-http" +ca_configmap: "cm/cloudkitty-lokistack-ca-bundle" + +# LogQL Query +logql_query: "{{ loki_query | default('{service=\"cloudkitty\"}') }}" + +# OpenShift/Kubernetes settings +cloudkitty_namespace: "openstack" +openstackpod: "openstackclient" + +# Time window settings +lookback: 6 +limit: 50 diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py new file mode 100644 index 000000000..9234a64df --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +""" +Parse Loki JSON (or text) into [timestep, log_entry] pairs, then emit a YAML +summary: time, data_log, and rate (per-type Σ(qty×price) and total Rating). + +Same CLI as gen_synth_loki_metrics_totals.py (-j, -o, --debug). +""" +from __future__ import annotations + +import argparse +import json +import math +import sys +from collections import Counter, defaultdict +from pathlib import Path +from typing import Any, Optional + +import yaml + +REQUIRED_KEYS = frozenset( + {"start", "end", "type", "unit", "qty", "price", "groupby"} +) + + +def _valid_ts(s: str) -> bool: + return isinstance(s, str) and s.isdigit() and len(s) >= 19 + + +def _valid_entry(obj: dict) -> bool: + return REQUIRED_KEYS.issubset(obj.keys()) + + +def _try_pair(ts_str: str, log_str: str) -> Optional[tuple[str, str]]: + if not _valid_ts(ts_str) or not isinstance(log_str, str): + return None + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + return None + if isinstance(entry, dict) and _valid_entry(entry): + return (ts_str, log_str) + return None + + +def _extract_from_loki_json(data: dict) -> list[tuple[str, str]]: + streams = data.get("streams") + if streams is None: + streams = data.get("data", {}).get("result", []) + if not isinstance(streams, list): + return [] + pairs: list[tuple[str, str]] = [] + for stream in streams: + for val in stream.get("values", []): + if not isinstance(val, (list, tuple)) or len(val) < 2: + continue + p = _try_pair(val[0], val[1]) + if p: + pairs.append(p) + return pairs + + +def extract_and_sort(json_path: Path) -> list[tuple[str, str]]: + """ + Load JSON from json_path, extract [timestep, log_entry] pairs, + and return them sorted by timestep (ascending). + """ + raw = json_path.read_text(encoding="utf-8", errors="replace") + + # Parse as JSON (fail if invalid) + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + print( + f"Error: Invalid JSON in {json_path}: {e}", + file=sys.stderr + ) + sys.exit(1) + + # Extract from known Loki JSON structures + if not isinstance(data, dict): + print( + f"Error: Expected JSON object, got {type(data).__name__} in {json_path}", + file=sys.stderr + ) + sys.exit(1) + + pairs = _extract_from_loki_json(data) + + if not pairs: + print( + f"Error: No valid log entries found in {json_path}. " + "Expected structure: {{'streams': [...]}} or " + "{{'data': {{'result': [...]}}}}'", + file=sys.stderr + ) + sys.exit(1) + + pairs.sort(key=lambda p: int(p[0])) + return pairs + + +def _apply_mutate(qty: float, mutate: str) -> float: + """ + Apply mutate transformation to qty value. + + Args: + qty: The quantity value to transform. + mutate: The mutation type (NONE, CEIL, FLOOR, NUMBOOL, NOTNUMBOOL). + + Returns: + The transformed quantity. + """ + mutate_upper = mutate.upper() if isinstance(mutate, str) else "NONE" + + if mutate_upper == "CEIL": + return math.ceil(qty) + elif mutate_upper == "FLOOR": + return math.floor(qty) + elif mutate_upper == "NUMBOOL": + # If qty equals 0, leave it at 0. Else, set it to 1. + return 0.0 if qty == 0 else 1.0 + elif mutate_upper == "NOTNUMBOOL": + # If qty equals 0, set it to 1. Else, set it to 0. + return 1.0 if qty == 0 else 0.0 + else: # NONE or any unrecognized value + return qty + + +def _parse_numeric(value: Any, default: float = 0) -> float: + """ + Parse a numeric value, supporting fractions like '1/1048576'. + + This function handles the 'factor' field in scenario YAML files which uses + fraction notation (e.g., '1/1048576' to convert bytes to MiB) to match + CloudKitty/chargeback documentation standards. Without this parser, fraction + strings would cause ValueError when passed to float(), silently dropping + metrics from the output summary. + + Args: + value: The value to parse (can be number, string, or fraction string) + default: Default value if parsing fails + + Returns: + Parsed float value + """ + if value is None: + return default + + # If it's already a number, convert directly + if isinstance(value, (int, float)): + return float(value) + + # If it's a string, check for fraction notation (e.g., "1/1048576") + if isinstance(value, str): + value = value.strip() + if '/' in value: + try: + parts = value.split('/') + if len(parts) == 2: + numerator = float(parts[0].strip()) + denominator = float(parts[1].strip()) + if denominator != 0: + return numerator / denominator + except (ValueError, ZeroDivisionError): + pass + # Try direct conversion + try: + return float(value) + except ValueError: + pass + + return default + + +def aggregate_rates_by_type( + pairs: list[tuple[str, str]], +) -> tuple[dict, float]: + sums: defaultdict[str, float] = defaultdict(float) + for _, log_str in pairs: + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + continue + if not isinstance(entry, dict): + continue + mtype = entry.get("type") + if not isinstance(mtype, str) or not mtype: + mtype = "unknown" + try: + qty = _parse_numeric(entry.get("qty"), 0) + price = _parse_numeric(entry.get("price"), 0) + factor = _parse_numeric(entry.get("factor"), 1) + offset = _parse_numeric(entry.get("offset"), 0) + mutate = entry.get("mutate", "NONE") + except (TypeError, ValueError): + continue + + # Apply mutate transformation + qty_mutated = _apply_mutate(qty, mutate) + + # Apply factor and offset + qty_rate = qty_mutated * factor + offset + + # Calculate rate + sums[mtype] += qty_rate * price + by_types = {k: {"Rate": round(v, 4)} for k, v in sorted(sums.items())} + total = sum(sums.values()) + return by_types, total + + +def build_summary(pairs: list[tuple[str, str]]) -> dict[str, Any]: + log_count = len(pairs) + per_ts = Counter(ts for ts, _ in pairs) + n_ts = len(per_ts) + counts = list(per_ts.values()) + mps: Any = counts[0] if counts else 0 + if counts and len(set(counts)) > 1: + mps = "ERROR" + + if pairs: + first = json.loads(pairs[0][1]) + last = json.loads(pairs[-1][1]) + time_block = { + "begin_step": { + "nanosec": int(pairs[0][0]), + "begin": first.get("start"), + "end": first.get("end"), + }, + "end_step": { + "nanosec": int(pairs[-1][0]), + "begin": last.get("start"), + "end": last.get("end"), + }, + } + else: + empty = {"nanosec": None, "begin": None, "end": None} + time_block = {"begin_step": empty.copy(), "end_step": empty.copy()} + + by_types, total_r = aggregate_rates_by_type(pairs) + return { + "time": time_block, + "data_log": { + "total_timesteps": n_ts, + "metrics_per_step": mps, + "log_count": log_count, + }, + "rate": { + "by_types": by_types, + "total": {"Rating": round(total_r, 4)}, + }, + } + + +def write_yaml(path: Path, doc: dict[str, Any]) -> None: + with path.open("w", encoding="utf-8") as f: + f.write("---\n") + yaml.dump( + doc, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Summarize Loki JSON log entries to YAML (time, data_log, rate)." + ), + ) + parser.add_argument( + "-j", "--json", required=True, type=Path, help="Input JSON.", + ) + parser.add_argument( + "-o", + "--output", + type=Path, + default=None, + help="Output YAML (default: _total.yml).", + ) + parser.add_argument( + "--debug", + type=Path, + default=None, + metavar="DIR", + help=( + "If set, write _diff.txt with one [ts,log] JSON per line." + ), + ) + args = parser.parse_args() + + if not args.json.exists(): + print(f"Error: input file not found: {args.json}", file=sys.stderr) + sys.exit(1) + + stem = args.json.stem + out_path = args.output or (args.json.parent / f"{stem}_total.yml") + pairs = extract_and_sort(args.json) + + dbg = str(args.debug).strip() if args.debug is not None else "" + if dbg and dbg != ".": + args.debug.mkdir(parents=True, exist_ok=True) + dbg_file = args.debug / f"{args.json.stem}_diff.txt" + with dbg_file.open("w", encoding="utf-8") as f: + for ts, log_str in pairs: + print(json.dumps([ts, log_str], ensure_ascii=False), file=f) + + doc = build_summary(pairs) + write_yaml(out_path, doc) + + if doc["data_log"]["metrics_per_step"] == "ERROR": + per_ts = Counter(ts for ts, _ in pairs) + exp = next(iter(per_ts.values()), 0) + for ts in sorted(per_ts, key=int): + if per_ts[ts] != exp: + print(ts, per_ts[ts], file=sys.stdout) + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index f05796e29..263554dc6 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -2,13 +2,48 @@ import logging import argparse import json +import sys import yaml from datetime import datetime, timezone, timedelta from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List, Union from jinja2 import Environment +def _get_value_for_step( + values: List[Union[int, float]], + step_idx: int, + num_steps: int +) -> Union[int, float]: + """ + Get the appropriate value from a list based on the current step index. + + Values are distributed evenly across all steps. For example, if there are + 12 steps and 4 values, each value covers 3 steps: + - Steps 0-2: values[0] + - Steps 3-5: values[1] + - Steps 6-8: values[2] + - Steps 9-11: values[3] + + Args: + values: List of values to choose from. + step_idx: Current step index (0-based). + num_steps: Total number of steps. + + Returns: + The value corresponding to the current step. + """ + num_values = len(values) + if num_values == 1: + return values[0] + + # Calculate how many steps each value covers + steps_per_value = num_steps / num_values + # Determine which value index to use, clamping to valid range + value_idx = min(int(step_idx // steps_per_value), num_values - 1) + return values[value_idx] + + # --- Configure logging with a default level that can be changed --- logging.basicConfig( level=logging.INFO, @@ -73,7 +108,10 @@ def generate_loki_data( start_time: datetime, end_time: datetime, time_step_seconds: int, - config: Dict[str, Any] + config: Dict[str, Any], + project: Union[str, int, None] = None, + user: Union[str, int, None] = None, + reverse_timestamps: bool = False, ): """ Generate synthetic Loki log data by preparing a data list and rendering. @@ -85,6 +123,12 @@ def generate_loki_data( end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. config (Dict[str, Any]): Configuration dictionary loaded from file. + project: Optional value to inject as groupby.project in every + log entry in the output (overrides test_* file value when set). + user: Optional value to inject as groupby.user in every + log entry in the output (overrides test_* file value when set). + reverse_timestamps (bool): If True, reverse the order of timestamps + in the JSON output (newest first, oldest last). """ # Hardcoded constant for invalid timestamps invalid_timestamp = "INVALID_TIMESTAMP" @@ -175,37 +219,49 @@ def generate_loki_data( logger.error(f"Invalid log type configuration: {log_type_config}") raise ValueError("Each log type in log_types must be a dictionary") - log_type_name = log_type_config.get("name") - if not log_type_name: - logger.error("Each log type must have a 'name' field") - raise ValueError("Each log type must have a 'name' field") + # "type" is log-type identifier (dict key) and output value + type_key = log_type_config.get("type") + if not type_key: + logger.error("Each log type must have a 'type' field") + raise ValueError("Each log type must have a 'type' field") # Validate required fields - missing = [f for f in required_fields if f not in log_type_config] + # metadata is optional for generation; name is not a log-type field + required_for_item = [ + f for f in required_fields + if f not in ("name", "metadata") + ] + missing = [f for f in required_for_item if f not in log_type_config] if missing: logger.error( - f"Missing required fields in {log_type_name} config: {missing}" + f"Missing required fields in {type_key!r} config: {missing}" ) raise ValueError( - f"Missing required fields in {log_type_name}: {missing}" + f"Missing required fields in {type_key!r}: {missing}" ) # Build groupby from config groupby = log_type_config.get("groupby", {}) if not isinstance(groupby, dict): logger.error( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) raise ValueError( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) - log_types[log_type_name] = { - "type": log_type_config["type"], + # Ensure qty and price are lists for step-based distribution + qty_val = log_type_config["qty"] + price_val = log_type_config["price"] + qty_list = qty_val if isinstance(qty_val, list) else [qty_val] + price_list = price_val if isinstance(price_val, list) else [price_val] + + log_types[type_key] = { + "type": type_key, "unit": log_type_config["unit"], "description": log_type_config.get("description"), - "qty": log_type_config["qty"], - "price": log_type_config["price"], + "qty": qty_list, + "price": price_list, "groupby": groupby.copy(), "metadata": log_type_config.get("metadata", {}) } @@ -231,15 +287,21 @@ def tojson_preserve_order(obj): # --- Render the template in one pass with all the data --- logger.info("Rendering final output...") + if reverse_timestamps: + log_data_list.reverse() + logger.debug( + "Reversed timestamp order (newest first, oldest last)." + ) + + # Calculate total number of steps for value distribution + num_steps = len(log_data_list) + logger.debug(f"Total number of time steps: {num_steps}") + # Pre-calculate log types with date fields for each time step log_types_list = [] for idx, item in enumerate(log_data_list): - # For the last entry, use end_time to ensure it shows today's date - if idx == len(log_data_list) - 1: - dt = end_time - else: - epoch_seconds = item["nanoseconds"] / 1_000_000_000 - dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) iso_year, iso_week, _ = dt.isocalendar() day_of_year = dt.timetuple().tm_yday @@ -267,6 +329,17 @@ def tojson_preserve_order(obj): log_type_with_dates = log_type_data.copy() log_type_with_dates["groupby"] = log_type_data["groupby"].copy() log_type_with_dates["groupby"].update(date_fields) + if project is not None: + log_type_with_dates["groupby"]["project"] = project + if user is not None: + log_type_with_dates["groupby"]["user"] = user + # Select qty and price based on step index distribution + log_type_with_dates["qty"] = _get_value_for_step( + log_type_data["qty"], idx, num_steps + ) + log_type_with_dates["price"] = _get_value_for_step( + log_type_data["price"], idx, num_steps + ) log_types_with_dates[log_type_name] = log_type_with_dates log_types_list.append(log_types_with_dates) @@ -296,8 +369,19 @@ def tojson_preserve_order(obj): ) except IOError as e: logger.error(f"Failed to write to output file '{output_path}': {e}") - except Exception as e: - logger.error(f"An unexpected error occurred during file write: {e}") + raise + + # --- Step 5: Validate that the output is valid JSON --- + try: + with output_path.open('r') as f_in: + json.load(f_in) + logger.info("Output file validated as valid JSON.") + except json.JSONDecodeError as e: + logger.error( + f"Output file is not valid JSON: {e}. " + f"Delete '{output_path}' and fix the template or data." + ) + sys.exit(1) def main(): @@ -324,8 +408,30 @@ def main(): required=True, help="Path to the output file." ) + parser.add_argument( + "-p", "--project-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.project in " + "every log entry in the output (overrides value from test file)." + ) + parser.add_argument( + "-u", "--user-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.user in " + "every log entry in the output (overrides value from test file)." + ) # --- Optional Utility Arguments --- + parser.add_argument( + "-r", "--reverse", + action="store_true", + help="Reverse timestamp order in JSON output: newest first, " + "oldest last (default is oldest first, newest last)." + ) parser.add_argument( "--debug", action="store_true", @@ -362,7 +468,10 @@ def main(): start_time=start_time_utc, end_time=end_time_utc, time_step_seconds=step_seconds, - config=config + config=config, + project=args.project_id, + user=args.user_id, + reverse_timestamps=args.reverse, ) except FileNotFoundError: logger.error( diff --git a/roles/telemetry_chargeback/files/test_dyn_basic.yml b/roles/telemetry_chargeback/files/test_dyn_basic.yml new file mode 100644 index 000000000..791335fbc --- /dev/null +++ b/roles/telemetry_chargeback/files/test_dyn_basic.yml @@ -0,0 +1,154 @@ +--- +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 1 + step_seconds: 14400 + +# Log type definitions (single "type" = identifier and value pushed to output) +log_types: + - type: ceilometer_image_size + description: "Size of ceilometer image" + unit: MiB + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_image_test + description: "Size of ceilometer test" + unit: B +# factor: 1 + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_cpu + description: "max number of cpus used in time step" + unit: instance + alt_name: instance + qty: + - 1 + price: + - 5.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + flavor_name: null + flavor_id: null + mutate: NUMBOOL + + - type: ceilometer_ip_floating + description: null + unit: ip + qty: + - 5 + price: + - 1.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + state: null + mutate: NUMBOOL + + - type: ceilometer_disk_ephemeral_size + description: "Max at each timestep" + unit: GiB + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + type: null + + - type: ceilometer_disk_root_size + description: null + unit: GiB + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + type: null + + - type: ceilometer_network_outgoing_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + vm_instance: null + + - type: ceilometer_network_incoming_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + vm_instance: null + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml deleted file mode 100644 index f94a3c1d2..000000000 --- a/roles/telemetry_chargeback/files/test_static.yml +++ /dev/null @@ -1,57 +0,0 @@ -# Scenario configuration for synthetic Loki log data generation - -# Time range configuration -generation: - days: 1 - step_seconds: 7200 - -# Log type definitions -log_types: - - name: ceilometer_image_size - type: ceilometer_image_size - unit: MiB - description: null - qty: 20.6 - price: 0.02 - groupby: - id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 - project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 - user_id: null - metadata: - container_format: bare - disk_format: qcow2 - - - name: instance - type: instance - unit: instance - description: null - qty: 1.0 - price: 0.3 - groupby: - id: de168c31-ed44-4a1a-a079-51bd238a91d6 - project_id: 9cf5bcfc61a24682acc448af2d062ad2 - user_id: c29ab6e886354bbd88ee9899e62d1d40 - metadata: - flavor_name: m1.tiny - flavor_id: "1" - vcpus: "" - -# Required fields for validation (top-level fields only, not nested in groupby) -required_fields: - - type - - unit - - qty - - price - - groupby - - metadata - -# Date field names to add to groupby -date_fields: - - week_of_the_year - - day_of_the_year - - month - - year - -# Loki stream configuration -loki_stream: - service: cloudkitty diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index df07fb503..99ddcc44e 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -1,40 +1,42 @@ --- -- name: Enable Cloudkitty Module (hashmap) +- name: "Enable CloudKitty module (hashmap)" ansible.builtin.command: cmd: "{{ openstack_cmd }} rating module enable hashmap" register: enable_hashmap - changed_when: True + changed_when: true failed_when: enable_hashmap.rc != 0 -- name: Find the current value of hashmap +- name: "Find the current value of hashmap" ansible.builtin.shell: - cmd: "{{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + cmd: "set -o pipefail && {{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + args: + executable: /bin/bash register: get_hashmap_priority changed_when: false -- name: Change priority for CloudKitty hashmap module +- name: "Change priority for CloudKitty hashmap module" ansible.builtin.command: cmd: "{{ openstack_cmd }} rating module set priority hashmap 100" register: set_hashmap_priority when: get_hashmap_priority.stdout | trim != '100' failed_when: (set_hashmap_priority.rc | default(42)) >= 1 or get_hashmap_priority.stdout == "" - changed_when: True + changed_when: true -- name: Get status of all CloudKitty rating modules +- name: "Get status of all CloudKitty rating modules" ansible.builtin.command: cmd: "{{ openstack_cmd }} rating module list" changed_when: false register: module_list -- name: TEST Validate CloudKitty module states +- name: "TEST Validate CloudKitty module states" ansible.builtin.assert: that: - "'hashmap' in module_list.stdout" - "'True' in (module_list.stdout_lines | select('search', 'hashmap') | first)" - fail_msg: "FAILED: CloudKitty module validation failed . Module states are not as expected." + fail_msg: "FAILED: CloudKitty module validation failed. Module states are not as expected." success_msg: "SUCCESS: CloudKitty modules (hashmap=True) are configured correctly." -- name: TEST Set priority for CloudKitty hashmap module +- name: "TEST Set priority for CloudKitty hashmap module" ansible.builtin.assert: that: - "(get_hashmap_priority.stdout | trim == '100') or (set_hashmap_priority.rc | default(-1) == 0)" diff --git a/roles/telemetry_chargeback/tasks/cleanup_ck.yml b/roles/telemetry_chargeback/tasks/cleanup_ck.yml new file mode 100644 index 000000000..01407d155 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/cleanup_ck.yml @@ -0,0 +1,5 @@ +--- +- name: "Cleanup local certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: absent diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index e37b54c6b..ec80ca3cc 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -1,39 +1,40 @@ --- -- name: Check for preexisting output file +- name: "Set variables dynamically for {{ item }}" + ansible.builtin.set_fact: + cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ item }}.yml" + +- name: "Check for preexisting output file" ansible.builtin.stat: - path: "{{ ck_output_file_local }}" + path: "{{ cloudkitty_data_file }}" register: file_preexists -- name: TEST Generate Synthetic Data +- name: "Generate Synthetic Data for {{ item }}" ansible.builtin.command: cmd: > - python3 "{{ ck_synth_script }}" - --tmpl "{{ ck_data_template }}" - -t "{{ ck_data_config }}" - -o "{{ ck_output_file_local }}" + python3 "{{ cloudkitty_synth_script }}" + -r + --tmpl "{{ cloudkitty_data_template }}" + -t "{{ cloudkitty_test_file }}" + -o "{{ cloudkitty_data_file }}" + {% if cloudkitty_project_id is defined and cloudkitty_project_id %} -p "{{ cloudkitty_project_id }}"{% endif %} register: script_output - when: not file_preexists.stat.exists | bool + when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 -- name: Read the content of the file - ansible.builtin.slurp: - src: "{{ ck_output_file_local }}" - register: slurped_file - -- name: TEST Validate JSON format of synthetic data file - ansible.builtin.assert: - that: - # This filter will trigger a task failure if the string isn't valid JSON - - slurped_file.content | b64decode | from_json is defined - fail_msg: "The file does not contain valid JSON format." - success_msg: "JSON format validated successfully." - -- name: Print output_file_remote path - ansible.builtin.debug: - msg: "Synthetic data file: {{ ck_output_file_remote }}" +- name: "Generate chargeback rating from synthetic data file {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ cloudkitty_data_file }}" + -o "{{ cloudkitty_synth_totals_file }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + when: not file_preexists.stat.exists | bool + changed_when: synth_rating_info.rc == 0 -- name: Copy output file to remote host - ansible.builtin.copy: - src: "{{ ck_output_file_local }}" - dest: "{{ ck_output_file_remote }}" - mode: '0644' +- name: "Load metrics from YAML file" + ansible.builtin.include_vars: + file: "{{ cloudkitty_synth_totals_file }}" + name: synth_data_rates diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..a53751f3f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..a2a1e129f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,12 @@ +--- +- name: "Ingest CloudKitty data log for {{ item }}" + ansible.builtin.include_tasks: + file: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ item }}" + ansible.builtin.include_tasks: + file: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ item }}" + ansible.builtin.include_tasks: + file: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml new file mode 100644 index 000000000..b9cbd9843 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -0,0 +1,29 @@ +--- +- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" + register: cost_totals_by_type + changed_when: false + failed_when: cost_totals_by_type.rc != 0 + +- name: "**INFO** Print the rating by type {{ item }}" + ansible.builtin.debug: + var: cost_totals_by_type.stdout + +- name: "Output saved as yaml {{ item }}" + ansible.builtin.copy: + content: | + "{{ cost_totals_by_type.stdout }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" + mode: '0644' + +- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" + register: cost_totals_summary + changed_when: false + failed_when: cost_totals_summary.rc != 0 + +- name: "**INFO** Print the rating summary {{ item }}" + ansible.builtin.debug: + var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 98a94b233..e2f264834 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,6 +1,57 @@ --- -- name: "Validate Chargeback Feature" +- name: "Validate Chargeback Feature deployed correctly" ansible.builtin.include_tasks: "chargeback_tests.yml" -- name: "Generate Synthetic Data" - ansible.builtin.include_tasks: "gen_synth_loki_data.yml" +- name: "Setup Loki Environment" + ansible.builtin.include_tasks: "setup_loki_env.yml" + +- name: "CloudKitty debug ON/OFF" + ansible.builtin.set_fact: + cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" + +- name: "Get admin project ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} project show admin -f value -c id" + register: get_admin_project_id + changed_when: false + failed_when: false + +- name: "Set admin project ID for CI" + ansible.builtin.set_fact: + cloudkitty_project_id: "{{ (get_admin_project_id.stdout | trim) | default('') }}" + +- name: "Get admin user ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} user show admin -f value -c id" + register: get_admin_user_id + changed_when: false + failed_when: false + +- name: "Set admin user ID for CI" + ansible.builtin.set_fact: + cloudkitty_user_id: "{{ (get_admin_user_id.stdout | trim) | default('') }}" + +- name: "Find test files" + ansible.builtin.find: + paths: "{{ cloudkitty_scenario_dir }}" + patterns: "test_*.yml" + register: found_files_raw + +- name: "Extract only the filenames into a clean list" + ansible.builtin.set_fact: + found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" + +- name: "Run scenario file through workflow" + block: + - name: "Process and Loop if files exist" + ansible.builtin.include_tasks: run_test_scenarios.yml + loop: "{{ found_files }}" + when: found_files | length > 0 + + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml + + rescue: + - name: "Log failure" + ansible.builtin.debug: + msg: "Running test scenarios loop failed." diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..2f130e711 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ item }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ item }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ item }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml new file mode 100644 index 000000000..5addb4a22 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -0,0 +1,53 @@ +--- +- name: "Generate Synthetic Data for each file: {{ item }}" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ item }}" + ansible.builtin.include_tasks: "load_loki_data.yml" + +- name: "Get total rate from Loki: {{ item }}" + ansible.builtin.include_tasks: "loki_rate.yml" + +#### diff uploaded data totals vs download data totals +- name: "Check synthetic totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + register: synth_totals_stat + +- name: "Check Loki totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + register: loki_totals_stat + +- name: "TEST Totals files exist {{ item }}" + ansible.builtin.assert: + that: + - synth_totals_stat.stat.exists | default(false) + - loki_totals_stat.stat.exists | default(false) + fail_msg: | + FAILED! Required file(s) missing for scenario {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + success_msg: | + PASSED! Required file(s) exist {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + +- name: "Diff synthetic totals vs Loki totals {{ item }}" + ansible.builtin.command: + cmd: > + diff + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + register: yaml_diff + failed_when: false + changed_when: false + +- name: "TEST Compare synthetic data vs Loki data results {{ item }}" + ansible.builtin.assert: + that: + - yaml_diff.rc == 0 + fail_msg: | + FAILED! {{ item }} - Files differ: + {{ yaml_diff.stdout }} + success_msg: "PASSED! {{ item }} - Data totals are identical." diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml new file mode 100644 index 000000000..d0388913c --- /dev/null +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -0,0 +1,63 @@ +--- +# Setup Loki Environment + +# Dynamic URL's +- name: "Get Loki Public Route Host" + ansible.builtin.command: + cmd: | + oc get route cloudkitty-lokistack -n {{ cloudkitty_namespace }} -o "jsonpath={.spec.host}" + register: loki_route + changed_when: false + +- name: "Set Loki URLs" + ansible.builtin.set_fact: + # Base URL + loki_base_url: "https://{{ loki_route.stdout }}" + + # Internal Flush URL (Service DNS: https://..svc:3100/flush) + ingester_flush_url: "https://cloudkitty-lokistack-ingester-http.{{ cloudkitty_namespace }}.svc:3100/flush" + +- name: "Set Derived Loki URLs" + ansible.builtin.set_fact: + loki_push_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/push" + loki_query_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/query_range" + +- name: "Debug URLs" + ansible.builtin.debug: + msg: + - "Loki Route: {{ loki_base_url }}" + - "Push URL: {{ loki_push_url }}" + - "Flush URL: {{ ingester_flush_url }}" + - "Query URL: {{ loki_query_url }}" + +# Certs to Ingest & Retrieve data to/from Loki +- name: "Ensure Local Certificate Directory Exists" + ansible.builtin.file: + path: "{{ cert_dir }}" + state: directory + mode: '0755' + +- name: "Extract Certificates from OpenShift Secret" + ansible.builtin.command: + cmd: | + oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +# Certs to Flush data to Loki +# - name: Create a directory to extract certificates +# ansible.builtin.file: +# path: "{{ local_cert_dir }}" +# state: directory +# mode: '0755' + +- name: "Extract Client Certificates" + ansible.builtin.command: + cmd: | + oc extract {{ client_secret }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract CA Bundle" + ansible.builtin.command: + cmd: | + oc extract {{ ca_configmap }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true diff --git a/roles/telemetry_chargeback/template/loki_data_templ.j2 b/roles/telemetry_chargeback/templates/loki_data_templ.j2 similarity index 100% rename from roles/telemetry_chargeback/template/loki_data_templ.j2 rename to roles/telemetry_chargeback/templates/loki_data_templ.j2 diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 1014a6a9e..5815cc92d 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -1,9 +1,15 @@ --- -logs_dir_zuul: "/home/zuul/ci-framework-data/logs" -artifacts_dir_zuul: "/home/zuul/ci-framework-data/artifacts" +# Internal role variables - these use role_path and should not be overridden -ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" -ck_data_template: "{{ role_path }}/template/loki_data_templ.j2" -ck_data_config: "{{ role_path }}/files/test_static.yml" -ck_output_file_local: "{{ artifacts_dir_zuul }}/loki_synth_data.json" -ck_output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" +# Scenario and script paths (using role_path) +cloudkitty_scenario_dir: "{{ role_path }}/files" +cloudkitty_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +cloudkitty_data_template: "{{ role_path }}/templates/loki_data_templ.j2" +cloudkitty_summary_script: "{{ role_path }}/files/gen_db_summary.py" + +# File naming conventions (internal standardization) +cloudkitty_synth_data_suffix: "-synth_data.json" +cloudkitty_loki_data_suffix: "-loki_data.json" +cloudkitty_synth_totals_metrics_suffix: "-synth_metrics_summary.yml" +cloudkitty_loki_totals_metrics_suffix: "-loki_metrics_summary.yml" +cloudkitty_loki_totals_suffix: "-rating.yml" From 8f70d3db45211928db63be6b571a09d4d66d6fe8 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 3 Feb 2026 15:18:40 -0500 Subject: [PATCH 02/15] Validates chargeback data is generated and then push and retriieved from loki - uses synth data to calculate total cost via script - run "openstack rating summary get" to get total cost from loki - compares script_totals and Loki_Totals if same then job passes - Used Gemini and Cursor AI --- .gitignore | 1 + roles/telemetry_chargeback/.gitignore | 2 + roles/telemetry_chargeback/README.md | 115 ++++++- roles/telemetry_chargeback/defaults/main.yml | 27 ++ .../files/gen_db_summary.py | 321 ++++++++++++++++++ .../files/gen_synth_loki_data.py | 157 +++++++-- .../files/test_dyn_basic.yml | 154 +++++++++ .../files/test_static.yml | 57 ---- .../tasks/chargeback_tests.yml | 22 +- .../telemetry_chargeback/tasks/cleanup_ck.yml | 5 + .../tasks/flush_loki_data.yml | 52 +++ .../tasks/gen_synth_loki_data.yml | 59 ++-- .../tasks/ingest_loki_data.yml | 42 +++ .../tasks/load_loki_data.yml | 12 + .../telemetry_chargeback/tasks/loki_rate.yml | 29 ++ roles/telemetry_chargeback/tasks/main.yml | 57 +++- .../tasks/retrieve_loki_data.yml | 71 ++++ .../tasks/run_test_scenarios.yml | 53 +++ .../tasks/setup_loki_env.yml | 63 ++++ .../loki_data_templ.j2 | 0 roles/telemetry_chargeback/vars/main.yml | 20 +- 21 files changed, 1173 insertions(+), 146 deletions(-) create mode 100644 roles/telemetry_chargeback/.gitignore create mode 100644 roles/telemetry_chargeback/files/gen_db_summary.py create mode 100644 roles/telemetry_chargeback/files/test_dyn_basic.yml delete mode 100644 roles/telemetry_chargeback/files/test_static.yml create mode 100644 roles/telemetry_chargeback/tasks/cleanup_ck.yml create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/run_test_scenarios.yml create mode 100644 roles/telemetry_chargeback/tasks/setup_loki_env.yml rename roles/telemetry_chargeback/{template => templates}/loki_data_templ.j2 (100%) diff --git a/.gitignore b/.gitignore index 44dbcd64d..53e77bcaf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc .idea/ +.ansible/ diff --git a/roles/telemetry_chargeback/.gitignore b/roles/telemetry_chargeback/.gitignore new file mode 100644 index 000000000..7684dfb64 --- /dev/null +++ b/roles/telemetry_chargeback/.gitignore @@ -0,0 +1,2 @@ +files/_gen_synth_loki_metrics_totals.py +.ansible/ diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 192b72a3d..a721ffc93 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -5,7 +5,7 @@ The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** The role performs two main functions: 1. **CloudKitty Validation** - Enables and configures the CloudKitty hashmap rating module, then validates its state. -2. **Synthetic Data Generation** - Generates synthetic Loki log data for testing chargeback scenarios using a Python script and Jinja2 template. +2. **Synthetic Data Generation & Analysis** - Generates synthetic Loki log data for testing chargeback scenarios and calculates metric totals. The role automatically discovers and processes all scenario files matching `test_*.yml` in the `files/` directory. For each scenario it runs: generate synthetic data, compute syn-totals, ingest to Loki, flush Loki ingester memory, and get cost via CloudKitty rating summary (using begin/end from syn-totals). Retrieve-from-Loki is included in the load_loki_data flow. After all scenarios, the role runs cleanup (`cleanup_ck.yml`) to remove the local flush cert directory. Requirements ------------ @@ -15,7 +15,7 @@ It relies on the following being available on the target or control host: * The **OpenStack CLI client** must be installed and configured with administrative credentials. * Required Python libraries for the `openstack` CLI (e.g., `python3-openstackclient`). * Connectivity to the OpenStack API endpoint. -* **Python 3** with the following libraries for synthetic data generation: +* **Python 3** with the following libraries for synthetic data generation and analysis: * `PyYAML` * `Jinja2` @@ -23,6 +23,7 @@ It is expected to be run **after** a successful deployment and configuration of * **OpenStack:** A functional OpenStack cloud (RHOSO) environment. * **Cloudkitty:** The Cloudkitty service must be installed, configured, and running. +* **Loki / OpenShift (for ingest and flush):** When using ingest and flush tasks, the control host must have `oc` CLI access, and the Cloudkitty Loki stack (route, certificates, ingester) must be deployed. The role sets Loki push/query URLs and extracts certificates via `setup_loki_env.yml`. Role Variables -------------- @@ -30,33 +31,115 @@ The role uses the following variables to control the testing environment and exe ### User-Configurable Variables (defaults/main.yml) +These variables can be overridden when importing the role or set at the play level. Users can customize these based on their deployment environment and test requirements. + | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for the role. | +| `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | +| `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | +| `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | +| `local_cert_dir` | `{{ ansible_env.HOME }}/ci-framework-data/flush_certs` | Local directory for flush certs (removed by cleanup_ck.yml after the run). | +| `remote_cert_dir` | `osp-certs` | Directory inside the OpenStack pod for certs. | +| `cert_secret_name` | `cert-cloudkitty-client-internal` | OpenShift secret name for client certificates. | +| `client_secret` | `secret/cloudkitty-lokistack-gateway-client-http` | Secret for flush client certs. | +| `ca_configmap` | `cm/cloudkitty-lokistack-ca-bundle` | ConfigMap for CA bundle. | +| `logql_query` | `{service="cloudkitty"}` (overridable via `loki_query`) | LogQL query for Loki. | +| `cloudkitty_namespace` | `openstack` | OpenShift namespace for Cloudkitty/Loki resources. | +| `openstackpod` | `openstackclient` | OpenStack client pod name for exec/cp. | +| `lookback` | `6` | Days lookback for Loki query time range. | +| `limit` | `50` | Limit for Loki query results. | + +**Example: Overriding variables when importing the role** +```yaml +- name: "Run chargeback tests" + ansible.builtin.import_role: + name: telemetry_chargeback + vars: + cloudkitty_namespace: "my-custom-namespace" + lookback: 10 + cloudkitty_debug: true +``` ### Internal Variables (vars/main.yml) -These variables are used internally by the role and typically do not need to be modified. +These variables are used internally by the role and should not be modified. They use `role_path` for internal file/script references and define internal file naming conventions. | Variable | Default Value | Description | |----------|---------------|-------------| -| `logs_dir_zuul` | `/home/zuul/ci-framework-data/logs` | Remote directory for log files. | -| `artifacts_dir_zuul` | `/home/zuul/ci-framework-data/artifacts` | Directory for generated artifacts. | -| `ck_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | -| `ck_data_template` | `{{ role_path }}/template/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | -| `ck_data_config` | `{{ role_path }}/files/test_static.yml` | Path to the scenario configuration file. | -| `ck_output_file_local` | `{{ artifacts_dir_zuul }}/loki_synth_data.json` | Local path for generated synthetic data. | -| `ck_output_file_remote` | `{{ logs_dir_zuul }}/gen_loki_synth_data.log` | Remote destination for synthetic data. | +| `cloudkitty_scenario_dir` | `{{ role_path }}/files` | Directory containing scenario files (`test_*.yml`). | +| `cloudkitty_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | +| `cloudkitty_data_template` | `{{ role_path }}/templates/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | +| `cloudkitty_summary_script` | `{{ role_path }}/files/gen_db_summary.py` | Path to the summary script (gen_db_summary.py). | +| `cloudkitty_synth_data_suffix` | `-synth_data.json` | Suffix for generated synthetic data files. | +| `cloudkitty_loki_data_suffix` | `-loki_data.json` | Suffix for Loki query result JSON files. | +| `cloudkitty_synth_totals_metrics_suffix` | `-synth_metrics_summary.yml` | Suffix for generated metric totals files (from synthetic data). | +| `cloudkitty_loki_totals_metrics_suffix` | `-loki_metrics_summary.yml` | Suffix for metric totals computed from Loki-retrieved JSON (retrieve_loki_data task). | +| `cloudkitty_loki_totals_suffix` | `-rating.yml` | Suffix for CloudKitty rating summary output files (from loki_rate task). | + +**Note:** Loki push/query URLs are set dynamically in `setup_loki_env.yml` from the Cloudkitty Loki route. + +### Synthetic Data Scripts + +**gen_synth_loki_data.py** — Generates Loki-format JSON from a scenario YAML and template. The role invokes it with `-r` so that timestamps in the output are in **reverse** order (youngest first, oldest last). When run manually you can omit `-r` for chronological order (oldest first, youngest last). + +| Option | Description | +|--------|--------------| +| `--tmpl` | Path to the Jinja2 template (e.g. `loki_data_templ.j2`). | +| `-t`, `--test` | Path to the scenario YAML (e.g. `test_dyn_basic.yml`). | +| `-o`, `--output` | Path to the output JSON file. | +| `-p`, `--project-id` | Optional; overrides `groupby.project_id` in every log entry. | +| `-u`, `--user-id` | Optional; overrides `groupby.user_id` in every log entry. | +| `-r`, `--reverse` | Reverse timestamp order in JSON output (youngest first, oldest last). | +| `--debug` | Enable debug logging. | + +**gen_db_summary.py** (`cloudkitty_summary_script`) — Parses Loki-style JSON (streams or `data.result`), sorts entries by timestamp, and writes a YAML summary. This script is invoked by the role for **both** synthetic totals (in `gen_synth_loki_data.yml`) and Loki-retrieved totals (in `retrieve_loki_data.yml`). It applies rate calculations with support for `factor`, `offset`, and `mutate` transformations. + +| Option | Description | +|--------|--------------| +| `-j`, `--json` | Path to the input JSON file (required). | +| `-o`, `--output` | Path to the output YAML file (default: `_total.yml`). | +| `--debug` | Directory to write debug output (`_diff.txt` with one `[ts,log]` JSON per line). | + +Output YAML structure: + +* **time** — `begin_step` / `end_step`, each with `nanosec` (nanosecond timestamp), `begin`, `end` (ISO window strings from the log payload). The `nanosec` values are used for Loki query time range in `retrieve_loki_data.yml`. +* **data_log** — `total_timesteps`, `metrics_per_step`, `log_count`. +* **rate** — `by_types` (per-type `Rate` calculated as `Σ((qty_mutated * factor + offset) * price)`) and `total.Rating` (sum of all rates). + +### Dynamically Set Variables + +Set in **main.yml** from the OpenStack CLI (`openstack project show admin` / `openstack user show admin`): + +| Variable | Description | +|----------|-------------| +| `cloudkitty_project_id` | ID of the OpenStack project named `admin` (empty string if not found). Passed as `-p` to the synthetic data generator when non-empty. | +| `cloudkitty_user_id` | ID of the OpenStack user named `admin` (empty string if not found). Passed as `-u` to the synthetic data generator when non-empty. | + +Set in **gen_synth_loki_data.yml** for each scenario file during the loop: + +| Variable | Description | +|----------|-------------| +| `cloudkitty_data_file` | Local path for generated JSON data (`{{ artifacts_dir_zuul }}/{{ scenario_name }}-synth_data.json`) | +| `cloudkitty_synth_totals_file` | Local path for calculated metric totals (`{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_totals_suffix }}`) | +| `cloudkitty_test_file` | Path to the scenario configuration file (`{{ cloudkitty_scenario_dir }}/{{ scenario_name }}.yml`) | Scenario Configuration ---------------------- -The synthetic data generation is controlled by a YAML configuration file (`files/test_static.yml`). This file defines: +The synthetic data generation is controlled by YAML configuration files in the `files/` directory. Any file matching `test_*.yml` will be automatically discovered and processed. Files whose names start with an underscore (e.g. `_test_*.yml`) are **not** discovered by the role; they can be used as reference or for manual runs. + +Each scenario file defines: + +* **generation** — Time range configuration (days, step_seconds). +* **log_types** — List of log type definitions. Each entry has **type** (identifier and value in output), unit, description, qty, price, groupby, and metadata. The **groupby** dict typically includes dimension keys (e.g. id, user_id, project_id, tenant_id); the generator merges **date_fields** into groupby at run time. +* **required_fields** — Top-level keys required for each log type (e.g. type, unit, qty, price, groupby, metadata). +* **date_fields** — Date field names to merge into groupby (week_of_the_year, day_of_the_year, month, year). +* **loki_stream** — Loki stream configuration (service name). + +**groupby.id** should be consistent by metric type across scenario files so that the same type always uses the same id. -* **generation** - Time range configuration (days, step_seconds) -* **log_types** - List of log type definitions with name, type, unit, qty, price, groupby, and metadata -* **required_fields** - Fields required for validation -* **date_fields** - Date fields to add to groupby (week_of_the_year, day_of_the_year, month, year) -* **loki_stream** - Loki stream configuration (service name) +Scenario files matching `test_*.yml` in the `files/` directory are automatically discovered and processed. Files whose names start with an underscore are not auto-discovered. Dependencies ------------ diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..9cc04c8c7 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,29 @@ --- +# OpenStack CLI command openstack_cmd: "openstack" + +# Debug mode +cloudkitty_debug: false + +# Directory paths +logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" +artifacts_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/artifacts" +cert_dir: "{{ ansible_user_dir }}/ck-certs" +local_cert_dir: "{{ ansible_env.HOME }}/ci-framework-data/flush_certs" +remote_cert_dir: "osp-certs" + +# Cloudkitty certificates and secrets +cert_secret_name: "cert-cloudkitty-client-internal" +client_secret: "secret/cloudkitty-lokistack-gateway-client-http" +ca_configmap: "cm/cloudkitty-lokistack-ca-bundle" + +# LogQL Query +logql_query: "{{ loki_query | default('{service=\"cloudkitty\"}') }}" + +# OpenShift/Kubernetes settings +cloudkitty_namespace: "openstack" +openstackpod: "openstackclient" + +# Time window settings +lookback: 6 +limit: 50 diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py new file mode 100644 index 000000000..9234a64df --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +""" +Parse Loki JSON (or text) into [timestep, log_entry] pairs, then emit a YAML +summary: time, data_log, and rate (per-type Σ(qty×price) and total Rating). + +Same CLI as gen_synth_loki_metrics_totals.py (-j, -o, --debug). +""" +from __future__ import annotations + +import argparse +import json +import math +import sys +from collections import Counter, defaultdict +from pathlib import Path +from typing import Any, Optional + +import yaml + +REQUIRED_KEYS = frozenset( + {"start", "end", "type", "unit", "qty", "price", "groupby"} +) + + +def _valid_ts(s: str) -> bool: + return isinstance(s, str) and s.isdigit() and len(s) >= 19 + + +def _valid_entry(obj: dict) -> bool: + return REQUIRED_KEYS.issubset(obj.keys()) + + +def _try_pair(ts_str: str, log_str: str) -> Optional[tuple[str, str]]: + if not _valid_ts(ts_str) or not isinstance(log_str, str): + return None + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + return None + if isinstance(entry, dict) and _valid_entry(entry): + return (ts_str, log_str) + return None + + +def _extract_from_loki_json(data: dict) -> list[tuple[str, str]]: + streams = data.get("streams") + if streams is None: + streams = data.get("data", {}).get("result", []) + if not isinstance(streams, list): + return [] + pairs: list[tuple[str, str]] = [] + for stream in streams: + for val in stream.get("values", []): + if not isinstance(val, (list, tuple)) or len(val) < 2: + continue + p = _try_pair(val[0], val[1]) + if p: + pairs.append(p) + return pairs + + +def extract_and_sort(json_path: Path) -> list[tuple[str, str]]: + """ + Load JSON from json_path, extract [timestep, log_entry] pairs, + and return them sorted by timestep (ascending). + """ + raw = json_path.read_text(encoding="utf-8", errors="replace") + + # Parse as JSON (fail if invalid) + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + print( + f"Error: Invalid JSON in {json_path}: {e}", + file=sys.stderr + ) + sys.exit(1) + + # Extract from known Loki JSON structures + if not isinstance(data, dict): + print( + f"Error: Expected JSON object, got {type(data).__name__} in {json_path}", + file=sys.stderr + ) + sys.exit(1) + + pairs = _extract_from_loki_json(data) + + if not pairs: + print( + f"Error: No valid log entries found in {json_path}. " + "Expected structure: {{'streams': [...]}} or " + "{{'data': {{'result': [...]}}}}'", + file=sys.stderr + ) + sys.exit(1) + + pairs.sort(key=lambda p: int(p[0])) + return pairs + + +def _apply_mutate(qty: float, mutate: str) -> float: + """ + Apply mutate transformation to qty value. + + Args: + qty: The quantity value to transform. + mutate: The mutation type (NONE, CEIL, FLOOR, NUMBOOL, NOTNUMBOOL). + + Returns: + The transformed quantity. + """ + mutate_upper = mutate.upper() if isinstance(mutate, str) else "NONE" + + if mutate_upper == "CEIL": + return math.ceil(qty) + elif mutate_upper == "FLOOR": + return math.floor(qty) + elif mutate_upper == "NUMBOOL": + # If qty equals 0, leave it at 0. Else, set it to 1. + return 0.0 if qty == 0 else 1.0 + elif mutate_upper == "NOTNUMBOOL": + # If qty equals 0, set it to 1. Else, set it to 0. + return 1.0 if qty == 0 else 0.0 + else: # NONE or any unrecognized value + return qty + + +def _parse_numeric(value: Any, default: float = 0) -> float: + """ + Parse a numeric value, supporting fractions like '1/1048576'. + + This function handles the 'factor' field in scenario YAML files which uses + fraction notation (e.g., '1/1048576' to convert bytes to MiB) to match + CloudKitty/chargeback documentation standards. Without this parser, fraction + strings would cause ValueError when passed to float(), silently dropping + metrics from the output summary. + + Args: + value: The value to parse (can be number, string, or fraction string) + default: Default value if parsing fails + + Returns: + Parsed float value + """ + if value is None: + return default + + # If it's already a number, convert directly + if isinstance(value, (int, float)): + return float(value) + + # If it's a string, check for fraction notation (e.g., "1/1048576") + if isinstance(value, str): + value = value.strip() + if '/' in value: + try: + parts = value.split('/') + if len(parts) == 2: + numerator = float(parts[0].strip()) + denominator = float(parts[1].strip()) + if denominator != 0: + return numerator / denominator + except (ValueError, ZeroDivisionError): + pass + # Try direct conversion + try: + return float(value) + except ValueError: + pass + + return default + + +def aggregate_rates_by_type( + pairs: list[tuple[str, str]], +) -> tuple[dict, float]: + sums: defaultdict[str, float] = defaultdict(float) + for _, log_str in pairs: + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + continue + if not isinstance(entry, dict): + continue + mtype = entry.get("type") + if not isinstance(mtype, str) or not mtype: + mtype = "unknown" + try: + qty = _parse_numeric(entry.get("qty"), 0) + price = _parse_numeric(entry.get("price"), 0) + factor = _parse_numeric(entry.get("factor"), 1) + offset = _parse_numeric(entry.get("offset"), 0) + mutate = entry.get("mutate", "NONE") + except (TypeError, ValueError): + continue + + # Apply mutate transformation + qty_mutated = _apply_mutate(qty, mutate) + + # Apply factor and offset + qty_rate = qty_mutated * factor + offset + + # Calculate rate + sums[mtype] += qty_rate * price + by_types = {k: {"Rate": round(v, 4)} for k, v in sorted(sums.items())} + total = sum(sums.values()) + return by_types, total + + +def build_summary(pairs: list[tuple[str, str]]) -> dict[str, Any]: + log_count = len(pairs) + per_ts = Counter(ts for ts, _ in pairs) + n_ts = len(per_ts) + counts = list(per_ts.values()) + mps: Any = counts[0] if counts else 0 + if counts and len(set(counts)) > 1: + mps = "ERROR" + + if pairs: + first = json.loads(pairs[0][1]) + last = json.loads(pairs[-1][1]) + time_block = { + "begin_step": { + "nanosec": int(pairs[0][0]), + "begin": first.get("start"), + "end": first.get("end"), + }, + "end_step": { + "nanosec": int(pairs[-1][0]), + "begin": last.get("start"), + "end": last.get("end"), + }, + } + else: + empty = {"nanosec": None, "begin": None, "end": None} + time_block = {"begin_step": empty.copy(), "end_step": empty.copy()} + + by_types, total_r = aggregate_rates_by_type(pairs) + return { + "time": time_block, + "data_log": { + "total_timesteps": n_ts, + "metrics_per_step": mps, + "log_count": log_count, + }, + "rate": { + "by_types": by_types, + "total": {"Rating": round(total_r, 4)}, + }, + } + + +def write_yaml(path: Path, doc: dict[str, Any]) -> None: + with path.open("w", encoding="utf-8") as f: + f.write("---\n") + yaml.dump( + doc, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Summarize Loki JSON log entries to YAML (time, data_log, rate)." + ), + ) + parser.add_argument( + "-j", "--json", required=True, type=Path, help="Input JSON.", + ) + parser.add_argument( + "-o", + "--output", + type=Path, + default=None, + help="Output YAML (default: _total.yml).", + ) + parser.add_argument( + "--debug", + type=Path, + default=None, + metavar="DIR", + help=( + "If set, write _diff.txt with one [ts,log] JSON per line." + ), + ) + args = parser.parse_args() + + if not args.json.exists(): + print(f"Error: input file not found: {args.json}", file=sys.stderr) + sys.exit(1) + + stem = args.json.stem + out_path = args.output or (args.json.parent / f"{stem}_total.yml") + pairs = extract_and_sort(args.json) + + dbg = str(args.debug).strip() if args.debug is not None else "" + if dbg and dbg != ".": + args.debug.mkdir(parents=True, exist_ok=True) + dbg_file = args.debug / f"{args.json.stem}_diff.txt" + with dbg_file.open("w", encoding="utf-8") as f: + for ts, log_str in pairs: + print(json.dumps([ts, log_str], ensure_ascii=False), file=f) + + doc = build_summary(pairs) + write_yaml(out_path, doc) + + if doc["data_log"]["metrics_per_step"] == "ERROR": + per_ts = Counter(ts for ts, _ in pairs) + exp = next(iter(per_ts.values()), 0) + for ts in sorted(per_ts, key=int): + if per_ts[ts] != exp: + print(ts, per_ts[ts], file=sys.stdout) + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index f05796e29..263554dc6 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -2,13 +2,48 @@ import logging import argparse import json +import sys import yaml from datetime import datetime, timezone, timedelta from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List, Union from jinja2 import Environment +def _get_value_for_step( + values: List[Union[int, float]], + step_idx: int, + num_steps: int +) -> Union[int, float]: + """ + Get the appropriate value from a list based on the current step index. + + Values are distributed evenly across all steps. For example, if there are + 12 steps and 4 values, each value covers 3 steps: + - Steps 0-2: values[0] + - Steps 3-5: values[1] + - Steps 6-8: values[2] + - Steps 9-11: values[3] + + Args: + values: List of values to choose from. + step_idx: Current step index (0-based). + num_steps: Total number of steps. + + Returns: + The value corresponding to the current step. + """ + num_values = len(values) + if num_values == 1: + return values[0] + + # Calculate how many steps each value covers + steps_per_value = num_steps / num_values + # Determine which value index to use, clamping to valid range + value_idx = min(int(step_idx // steps_per_value), num_values - 1) + return values[value_idx] + + # --- Configure logging with a default level that can be changed --- logging.basicConfig( level=logging.INFO, @@ -73,7 +108,10 @@ def generate_loki_data( start_time: datetime, end_time: datetime, time_step_seconds: int, - config: Dict[str, Any] + config: Dict[str, Any], + project: Union[str, int, None] = None, + user: Union[str, int, None] = None, + reverse_timestamps: bool = False, ): """ Generate synthetic Loki log data by preparing a data list and rendering. @@ -85,6 +123,12 @@ def generate_loki_data( end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. config (Dict[str, Any]): Configuration dictionary loaded from file. + project: Optional value to inject as groupby.project in every + log entry in the output (overrides test_* file value when set). + user: Optional value to inject as groupby.user in every + log entry in the output (overrides test_* file value when set). + reverse_timestamps (bool): If True, reverse the order of timestamps + in the JSON output (newest first, oldest last). """ # Hardcoded constant for invalid timestamps invalid_timestamp = "INVALID_TIMESTAMP" @@ -175,37 +219,49 @@ def generate_loki_data( logger.error(f"Invalid log type configuration: {log_type_config}") raise ValueError("Each log type in log_types must be a dictionary") - log_type_name = log_type_config.get("name") - if not log_type_name: - logger.error("Each log type must have a 'name' field") - raise ValueError("Each log type must have a 'name' field") + # "type" is log-type identifier (dict key) and output value + type_key = log_type_config.get("type") + if not type_key: + logger.error("Each log type must have a 'type' field") + raise ValueError("Each log type must have a 'type' field") # Validate required fields - missing = [f for f in required_fields if f not in log_type_config] + # metadata is optional for generation; name is not a log-type field + required_for_item = [ + f for f in required_fields + if f not in ("name", "metadata") + ] + missing = [f for f in required_for_item if f not in log_type_config] if missing: logger.error( - f"Missing required fields in {log_type_name} config: {missing}" + f"Missing required fields in {type_key!r} config: {missing}" ) raise ValueError( - f"Missing required fields in {log_type_name}: {missing}" + f"Missing required fields in {type_key!r}: {missing}" ) # Build groupby from config groupby = log_type_config.get("groupby", {}) if not isinstance(groupby, dict): logger.error( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) raise ValueError( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) - log_types[log_type_name] = { - "type": log_type_config["type"], + # Ensure qty and price are lists for step-based distribution + qty_val = log_type_config["qty"] + price_val = log_type_config["price"] + qty_list = qty_val if isinstance(qty_val, list) else [qty_val] + price_list = price_val if isinstance(price_val, list) else [price_val] + + log_types[type_key] = { + "type": type_key, "unit": log_type_config["unit"], "description": log_type_config.get("description"), - "qty": log_type_config["qty"], - "price": log_type_config["price"], + "qty": qty_list, + "price": price_list, "groupby": groupby.copy(), "metadata": log_type_config.get("metadata", {}) } @@ -231,15 +287,21 @@ def tojson_preserve_order(obj): # --- Render the template in one pass with all the data --- logger.info("Rendering final output...") + if reverse_timestamps: + log_data_list.reverse() + logger.debug( + "Reversed timestamp order (newest first, oldest last)." + ) + + # Calculate total number of steps for value distribution + num_steps = len(log_data_list) + logger.debug(f"Total number of time steps: {num_steps}") + # Pre-calculate log types with date fields for each time step log_types_list = [] for idx, item in enumerate(log_data_list): - # For the last entry, use end_time to ensure it shows today's date - if idx == len(log_data_list) - 1: - dt = end_time - else: - epoch_seconds = item["nanoseconds"] / 1_000_000_000 - dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) iso_year, iso_week, _ = dt.isocalendar() day_of_year = dt.timetuple().tm_yday @@ -267,6 +329,17 @@ def tojson_preserve_order(obj): log_type_with_dates = log_type_data.copy() log_type_with_dates["groupby"] = log_type_data["groupby"].copy() log_type_with_dates["groupby"].update(date_fields) + if project is not None: + log_type_with_dates["groupby"]["project"] = project + if user is not None: + log_type_with_dates["groupby"]["user"] = user + # Select qty and price based on step index distribution + log_type_with_dates["qty"] = _get_value_for_step( + log_type_data["qty"], idx, num_steps + ) + log_type_with_dates["price"] = _get_value_for_step( + log_type_data["price"], idx, num_steps + ) log_types_with_dates[log_type_name] = log_type_with_dates log_types_list.append(log_types_with_dates) @@ -296,8 +369,19 @@ def tojson_preserve_order(obj): ) except IOError as e: logger.error(f"Failed to write to output file '{output_path}': {e}") - except Exception as e: - logger.error(f"An unexpected error occurred during file write: {e}") + raise + + # --- Step 5: Validate that the output is valid JSON --- + try: + with output_path.open('r') as f_in: + json.load(f_in) + logger.info("Output file validated as valid JSON.") + except json.JSONDecodeError as e: + logger.error( + f"Output file is not valid JSON: {e}. " + f"Delete '{output_path}' and fix the template or data." + ) + sys.exit(1) def main(): @@ -324,8 +408,30 @@ def main(): required=True, help="Path to the output file." ) + parser.add_argument( + "-p", "--project-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.project in " + "every log entry in the output (overrides value from test file)." + ) + parser.add_argument( + "-u", "--user-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.user in " + "every log entry in the output (overrides value from test file)." + ) # --- Optional Utility Arguments --- + parser.add_argument( + "-r", "--reverse", + action="store_true", + help="Reverse timestamp order in JSON output: newest first, " + "oldest last (default is oldest first, newest last)." + ) parser.add_argument( "--debug", action="store_true", @@ -362,7 +468,10 @@ def main(): start_time=start_time_utc, end_time=end_time_utc, time_step_seconds=step_seconds, - config=config + config=config, + project=args.project_id, + user=args.user_id, + reverse_timestamps=args.reverse, ) except FileNotFoundError: logger.error( diff --git a/roles/telemetry_chargeback/files/test_dyn_basic.yml b/roles/telemetry_chargeback/files/test_dyn_basic.yml new file mode 100644 index 000000000..cfe7adb18 --- /dev/null +++ b/roles/telemetry_chargeback/files/test_dyn_basic.yml @@ -0,0 +1,154 @@ +--- +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 1 + step_seconds: 14400 + +# Log type definitions (single "type" = identifier and value pushed to output) +log_types: + - type: ceilometer_image_size + description: "Size of ceilometer image" + unit: MiB + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_image_test + description: "Size of ceilometer test" + unit: B +# factor: 1 + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_cpu + description: "max number of cpus used in time step" + unit: instance + alt_name: instance + qty: + - 1 + price: + - 5.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + flavor_name: null + flavor_id: null + mutate: NUMBOOL + + - type: ceilometer_ip_floating + description: null + unit: ip + qty: + - 5 + price: + - 1.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + state: null + mutate: NUMBOOL + + - type: ceilometer_disk_ephemeral_size + description: "Max at each timestep" + unit: GiB + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + type: null + + - type: ceilometer_disk_root_size + description: null + unit: GiB + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + type: null + + - type: ceilometer_network_outgoing_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + vm_instance: null + + - type: ceilometer_network_incoming_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + vm_instance: null + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml deleted file mode 100644 index f94a3c1d2..000000000 --- a/roles/telemetry_chargeback/files/test_static.yml +++ /dev/null @@ -1,57 +0,0 @@ -# Scenario configuration for synthetic Loki log data generation - -# Time range configuration -generation: - days: 1 - step_seconds: 7200 - -# Log type definitions -log_types: - - name: ceilometer_image_size - type: ceilometer_image_size - unit: MiB - description: null - qty: 20.6 - price: 0.02 - groupby: - id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 - project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 - user_id: null - metadata: - container_format: bare - disk_format: qcow2 - - - name: instance - type: instance - unit: instance - description: null - qty: 1.0 - price: 0.3 - groupby: - id: de168c31-ed44-4a1a-a079-51bd238a91d6 - project_id: 9cf5bcfc61a24682acc448af2d062ad2 - user_id: c29ab6e886354bbd88ee9899e62d1d40 - metadata: - flavor_name: m1.tiny - flavor_id: "1" - vcpus: "" - -# Required fields for validation (top-level fields only, not nested in groupby) -required_fields: - - type - - unit - - qty - - price - - groupby - - metadata - -# Date field names to add to groupby -date_fields: - - week_of_the_year - - day_of_the_year - - month - - year - -# Loki stream configuration -loki_stream: - service: cloudkitty diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index df07fb503..99ddcc44e 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -1,40 +1,42 @@ --- -- name: Enable Cloudkitty Module (hashmap) +- name: "Enable CloudKitty module (hashmap)" ansible.builtin.command: cmd: "{{ openstack_cmd }} rating module enable hashmap" register: enable_hashmap - changed_when: True + changed_when: true failed_when: enable_hashmap.rc != 0 -- name: Find the current value of hashmap +- name: "Find the current value of hashmap" ansible.builtin.shell: - cmd: "{{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + cmd: "set -o pipefail && {{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + args: + executable: /bin/bash register: get_hashmap_priority changed_when: false -- name: Change priority for CloudKitty hashmap module +- name: "Change priority for CloudKitty hashmap module" ansible.builtin.command: cmd: "{{ openstack_cmd }} rating module set priority hashmap 100" register: set_hashmap_priority when: get_hashmap_priority.stdout | trim != '100' failed_when: (set_hashmap_priority.rc | default(42)) >= 1 or get_hashmap_priority.stdout == "" - changed_when: True + changed_when: true -- name: Get status of all CloudKitty rating modules +- name: "Get status of all CloudKitty rating modules" ansible.builtin.command: cmd: "{{ openstack_cmd }} rating module list" changed_when: false register: module_list -- name: TEST Validate CloudKitty module states +- name: "TEST Validate CloudKitty module states" ansible.builtin.assert: that: - "'hashmap' in module_list.stdout" - "'True' in (module_list.stdout_lines | select('search', 'hashmap') | first)" - fail_msg: "FAILED: CloudKitty module validation failed . Module states are not as expected." + fail_msg: "FAILED: CloudKitty module validation failed. Module states are not as expected." success_msg: "SUCCESS: CloudKitty modules (hashmap=True) are configured correctly." -- name: TEST Set priority for CloudKitty hashmap module +- name: "TEST Set priority for CloudKitty hashmap module" ansible.builtin.assert: that: - "(get_hashmap_priority.stdout | trim == '100') or (set_hashmap_priority.rc | default(-1) == 0)" diff --git a/roles/telemetry_chargeback/tasks/cleanup_ck.yml b/roles/telemetry_chargeback/tasks/cleanup_ck.yml new file mode 100644 index 000000000..01407d155 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/cleanup_ck.yml @@ -0,0 +1,5 @@ +--- +- name: "Cleanup local certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: absent diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index e37b54c6b..ec80ca3cc 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -1,39 +1,40 @@ --- -- name: Check for preexisting output file +- name: "Set variables dynamically for {{ item }}" + ansible.builtin.set_fact: + cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ item }}.yml" + +- name: "Check for preexisting output file" ansible.builtin.stat: - path: "{{ ck_output_file_local }}" + path: "{{ cloudkitty_data_file }}" register: file_preexists -- name: TEST Generate Synthetic Data +- name: "Generate Synthetic Data for {{ item }}" ansible.builtin.command: cmd: > - python3 "{{ ck_synth_script }}" - --tmpl "{{ ck_data_template }}" - -t "{{ ck_data_config }}" - -o "{{ ck_output_file_local }}" + python3 "{{ cloudkitty_synth_script }}" + -r + --tmpl "{{ cloudkitty_data_template }}" + -t "{{ cloudkitty_test_file }}" + -o "{{ cloudkitty_data_file }}" + {% if cloudkitty_project_id is defined and cloudkitty_project_id %} -p "{{ cloudkitty_project_id }}"{% endif %} register: script_output - when: not file_preexists.stat.exists | bool + when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 -- name: Read the content of the file - ansible.builtin.slurp: - src: "{{ ck_output_file_local }}" - register: slurped_file - -- name: TEST Validate JSON format of synthetic data file - ansible.builtin.assert: - that: - # This filter will trigger a task failure if the string isn't valid JSON - - slurped_file.content | b64decode | from_json is defined - fail_msg: "The file does not contain valid JSON format." - success_msg: "JSON format validated successfully." - -- name: Print output_file_remote path - ansible.builtin.debug: - msg: "Synthetic data file: {{ ck_output_file_remote }}" +- name: "Generate chargeback rating from synthetic data file {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ cloudkitty_data_file }}" + -o "{{ cloudkitty_synth_totals_file }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + when: not file_preexists.stat.exists | bool + changed_when: synth_rating_info.rc == 0 -- name: Copy output file to remote host - ansible.builtin.copy: - src: "{{ ck_output_file_local }}" - dest: "{{ ck_output_file_remote }}" - mode: '0644' +- name: "Load metrics from YAML file" + ansible.builtin.include_vars: + file: "{{ cloudkitty_synth_totals_file }}" + name: synth_data_rates diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..a53751f3f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..a2a1e129f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,12 @@ +--- +- name: "Ingest CloudKitty data log for {{ item }}" + ansible.builtin.include_tasks: + file: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ item }}" + ansible.builtin.include_tasks: + file: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ item }}" + ansible.builtin.include_tasks: + file: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml new file mode 100644 index 000000000..b9cbd9843 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -0,0 +1,29 @@ +--- +- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" + register: cost_totals_by_type + changed_when: false + failed_when: cost_totals_by_type.rc != 0 + +- name: "**INFO** Print the rating by type {{ item }}" + ansible.builtin.debug: + var: cost_totals_by_type.stdout + +- name: "Output saved as yaml {{ item }}" + ansible.builtin.copy: + content: | + "{{ cost_totals_by_type.stdout }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" + mode: '0644' + +- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" + register: cost_totals_summary + changed_when: false + failed_when: cost_totals_summary.rc != 0 + +- name: "**INFO** Print the rating summary {{ item }}" + ansible.builtin.debug: + var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 98a94b233..e2f264834 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,6 +1,57 @@ --- -- name: "Validate Chargeback Feature" +- name: "Validate Chargeback Feature deployed correctly" ansible.builtin.include_tasks: "chargeback_tests.yml" -- name: "Generate Synthetic Data" - ansible.builtin.include_tasks: "gen_synth_loki_data.yml" +- name: "Setup Loki Environment" + ansible.builtin.include_tasks: "setup_loki_env.yml" + +- name: "CloudKitty debug ON/OFF" + ansible.builtin.set_fact: + cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" + +- name: "Get admin project ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} project show admin -f value -c id" + register: get_admin_project_id + changed_when: false + failed_when: false + +- name: "Set admin project ID for CI" + ansible.builtin.set_fact: + cloudkitty_project_id: "{{ (get_admin_project_id.stdout | trim) | default('') }}" + +- name: "Get admin user ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} user show admin -f value -c id" + register: get_admin_user_id + changed_when: false + failed_when: false + +- name: "Set admin user ID for CI" + ansible.builtin.set_fact: + cloudkitty_user_id: "{{ (get_admin_user_id.stdout | trim) | default('') }}" + +- name: "Find test files" + ansible.builtin.find: + paths: "{{ cloudkitty_scenario_dir }}" + patterns: "test_*.yml" + register: found_files_raw + +- name: "Extract only the filenames into a clean list" + ansible.builtin.set_fact: + found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" + +- name: "Run scenario file through workflow" + block: + - name: "Process and Loop if files exist" + ansible.builtin.include_tasks: run_test_scenarios.yml + loop: "{{ found_files }}" + when: found_files | length > 0 + + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml + + rescue: + - name: "Log failure" + ansible.builtin.debug: + msg: "Running test scenarios loop failed." diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..2f130e711 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ item }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ item }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ item }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml new file mode 100644 index 000000000..5addb4a22 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -0,0 +1,53 @@ +--- +- name: "Generate Synthetic Data for each file: {{ item }}" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ item }}" + ansible.builtin.include_tasks: "load_loki_data.yml" + +- name: "Get total rate from Loki: {{ item }}" + ansible.builtin.include_tasks: "loki_rate.yml" + +#### diff uploaded data totals vs download data totals +- name: "Check synthetic totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + register: synth_totals_stat + +- name: "Check Loki totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + register: loki_totals_stat + +- name: "TEST Totals files exist {{ item }}" + ansible.builtin.assert: + that: + - synth_totals_stat.stat.exists | default(false) + - loki_totals_stat.stat.exists | default(false) + fail_msg: | + FAILED! Required file(s) missing for scenario {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + success_msg: | + PASSED! Required file(s) exist {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + +- name: "Diff synthetic totals vs Loki totals {{ item }}" + ansible.builtin.command: + cmd: > + diff + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + register: yaml_diff + failed_when: false + changed_when: false + +- name: "TEST Compare synthetic data vs Loki data results {{ item }}" + ansible.builtin.assert: + that: + - yaml_diff.rc == 0 + fail_msg: | + FAILED! {{ item }} - Files differ: + {{ yaml_diff.stdout }} + success_msg: "PASSED! {{ item }} - Data totals are identical." diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml new file mode 100644 index 000000000..d0388913c --- /dev/null +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -0,0 +1,63 @@ +--- +# Setup Loki Environment + +# Dynamic URL's +- name: "Get Loki Public Route Host" + ansible.builtin.command: + cmd: | + oc get route cloudkitty-lokistack -n {{ cloudkitty_namespace }} -o "jsonpath={.spec.host}" + register: loki_route + changed_when: false + +- name: "Set Loki URLs" + ansible.builtin.set_fact: + # Base URL + loki_base_url: "https://{{ loki_route.stdout }}" + + # Internal Flush URL (Service DNS: https://..svc:3100/flush) + ingester_flush_url: "https://cloudkitty-lokistack-ingester-http.{{ cloudkitty_namespace }}.svc:3100/flush" + +- name: "Set Derived Loki URLs" + ansible.builtin.set_fact: + loki_push_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/push" + loki_query_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/query_range" + +- name: "Debug URLs" + ansible.builtin.debug: + msg: + - "Loki Route: {{ loki_base_url }}" + - "Push URL: {{ loki_push_url }}" + - "Flush URL: {{ ingester_flush_url }}" + - "Query URL: {{ loki_query_url }}" + +# Certs to Ingest & Retrieve data to/from Loki +- name: "Ensure Local Certificate Directory Exists" + ansible.builtin.file: + path: "{{ cert_dir }}" + state: directory + mode: '0755' + +- name: "Extract Certificates from OpenShift Secret" + ansible.builtin.command: + cmd: | + oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +# Certs to Flush data to Loki +# - name: Create a directory to extract certificates +# ansible.builtin.file: +# path: "{{ local_cert_dir }}" +# state: directory +# mode: '0755' + +- name: "Extract Client Certificates" + ansible.builtin.command: + cmd: | + oc extract {{ client_secret }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract CA Bundle" + ansible.builtin.command: + cmd: | + oc extract {{ ca_configmap }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true diff --git a/roles/telemetry_chargeback/template/loki_data_templ.j2 b/roles/telemetry_chargeback/templates/loki_data_templ.j2 similarity index 100% rename from roles/telemetry_chargeback/template/loki_data_templ.j2 rename to roles/telemetry_chargeback/templates/loki_data_templ.j2 diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 1014a6a9e..5815cc92d 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -1,9 +1,15 @@ --- -logs_dir_zuul: "/home/zuul/ci-framework-data/logs" -artifacts_dir_zuul: "/home/zuul/ci-framework-data/artifacts" +# Internal role variables - these use role_path and should not be overridden -ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" -ck_data_template: "{{ role_path }}/template/loki_data_templ.j2" -ck_data_config: "{{ role_path }}/files/test_static.yml" -ck_output_file_local: "{{ artifacts_dir_zuul }}/loki_synth_data.json" -ck_output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" +# Scenario and script paths (using role_path) +cloudkitty_scenario_dir: "{{ role_path }}/files" +cloudkitty_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +cloudkitty_data_template: "{{ role_path }}/templates/loki_data_templ.j2" +cloudkitty_summary_script: "{{ role_path }}/files/gen_db_summary.py" + +# File naming conventions (internal standardization) +cloudkitty_synth_data_suffix: "-synth_data.json" +cloudkitty_loki_data_suffix: "-loki_data.json" +cloudkitty_synth_totals_metrics_suffix: "-synth_metrics_summary.yml" +cloudkitty_loki_totals_metrics_suffix: "-loki_metrics_summary.yml" +cloudkitty_loki_totals_suffix: "-rating.yml" From 296f765177698d5fa1e752e1971fa064db4b0ab8 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 16 Apr 2026 12:20:36 -0400 Subject: [PATCH 03/15] Review Changes --- roles/telemetry_chargeback/README.md | 3 +-- roles/telemetry_chargeback/files/gen_db_summary.py | 9 +++++---- roles/telemetry_chargeback/tasks/loki_rate.yml | 3 +-- roles/telemetry_chargeback/tasks/setup_loki_env.yml | 7 ------- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index a721ffc93..dfdfa9052 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -1,5 +1,5 @@ telemetry_chargeback -========= + The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** feature. These tests are specific to the Cloudkitty feature. Tests that are not specific to this feature (e.g., standard OpenStack deployment validation, basic networking) should be added to a common role. The role performs two main functions: @@ -36,7 +36,6 @@ These variables can be overridden when importing the role or set at the play lev | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | -| `cloudkitty_debug` | `false` | Enable debug mode for the role. | | `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | | `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | | `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py index 9234a64df..e14ab98d8 100644 --- a/roles/telemetry_chargeback/files/gen_db_summary.py +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -79,7 +79,8 @@ def extract_and_sort(json_path: Path) -> list[tuple[str, str]]: # Extract from known Loki JSON structures if not isinstance(data, dict): print( - f"Error: Expected JSON object, got {type(data).__name__} in {json_path}", + f"Error: Expected JSON object, got {type(data).__name__} " + f"in {json_path}", file=sys.stderr ) sys.exit(1) @@ -132,9 +133,9 @@ def _parse_numeric(value: Any, default: float = 0) -> float: This function handles the 'factor' field in scenario YAML files which uses fraction notation (e.g., '1/1048576' to convert bytes to MiB) to match - CloudKitty/chargeback documentation standards. Without this parser, fraction - strings would cause ValueError when passed to float(), silently dropping - metrics from the output summary. + CloudKitty/chargeback documentation standards. Without this parser, + fraction strings would cause ValueError when passed to float(), silently + dropping metrics from the output summary. Args: value: The value to parse (can be number, string, or fraction string) diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml index b9cbd9843..822585336 100644 --- a/roles/telemetry_chargeback/tasks/loki_rate.yml +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -12,8 +12,7 @@ - name: "Output saved as yaml {{ item }}" ansible.builtin.copy: - content: | - "{{ cost_totals_by_type.stdout }}" + content: "{{ cost_totals_by_type.stdout }}" dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" mode: '0644' diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml index d0388913c..e4a80250f 100644 --- a/roles/telemetry_chargeback/tasks/setup_loki_env.yml +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -43,13 +43,6 @@ oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} changed_when: true -# Certs to Flush data to Loki -# - name: Create a directory to extract certificates -# ansible.builtin.file: -# path: "{{ local_cert_dir }}" -# state: directory -# mode: '0755' - - name: "Extract Client Certificates" ansible.builtin.command: cmd: | From 581ddcf61712ea44fb5f5db936b755af4a80c74f Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 14 Apr 2026 15:44:24 -0400 Subject: [PATCH 04/15] Add rating validation tests for telemetry_chargeback role - calculate rating for each scenerio - compare calcualted rating to CK rating summary Assisted by Claude --- roles/telemetry_chargeback/README.md | 1 + .../files/gen_synth_loki_data.py | 6 +- .../files/test_dyn_basic.yml | 101 ++++++++++-------- .../telemetry_chargeback/tasks/loki_rate.yml | 2 +- .../tasks/retrieve_loki_data.yml | 4 +- .../templates/loki_data_templ.j2 | 5 +- roles/telemetry_chargeback/vars/main.yml | 2 - 7 files changed, 71 insertions(+), 50 deletions(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index a721ffc93..99f658f59 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -5,6 +5,7 @@ The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** The role performs two main functions: 1. **CloudKitty Validation** - Enables and configures the CloudKitty hashmap rating module, then validates its state. +<<<<<<< HEAD 2. **Synthetic Data Generation & Analysis** - Generates synthetic Loki log data for testing chargeback scenarios and calculates metric totals. The role automatically discovers and processes all scenario files matching `test_*.yml` in the `files/` directory. For each scenario it runs: generate synthetic data, compute syn-totals, ingest to Loki, flush Loki ingester memory, and get cost via CloudKitty rating summary (using begin/end from syn-totals). Retrieve-from-Loki is included in the load_loki_data flow. After all scenarios, the role runs cleanup (`cleanup_ck.yml`) to remove the local flush cert directory. Requirements diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index 263554dc6..e3e0f5e12 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -1,3 +1,4 @@ +### Generated by Cursor """Generate synthetic Loki log data from a Jinja2 template.""" import logging import argparse @@ -263,7 +264,10 @@ def generate_loki_data( "qty": qty_list, "price": price_list, "groupby": groupby.copy(), - "metadata": log_type_config.get("metadata", {}) + "metadata": log_type_config.get("metadata", {}), + "factor": log_type_config.get("factor", 1), + "offset": log_type_config.get("offset", 0), + "mutate": log_type_config.get("mutate", "NONE") } # --- Step 3: Load template and render --- diff --git a/roles/telemetry_chargeback/files/test_dyn_basic.yml b/roles/telemetry_chargeback/files/test_dyn_basic.yml index 791335fbc..683eec3c7 100644 --- a/roles/telemetry_chargeback/files/test_dyn_basic.yml +++ b/roles/telemetry_chargeback/files/test_dyn_basic.yml @@ -24,10 +24,9 @@ log_types: container_format: bare disk_format: qcow2 - - type: ceilometer_image_test + - type: ceilometer_image_size_2 description: "Size of ceilometer test" unit: B -# factor: 1 qty: - 10000 price: @@ -41,98 +40,114 @@ log_types: container_format: bare disk_format: qcow2 - - type: ceilometer_cpu - description: "max number of cpus used in time step" - unit: instance - alt_name: instance + - type: ceilometer_image_size_3 + description: "Size of ceilometer test" + unit: B qty: - - 1 + - 10000 price: - - 5.00 + - 0.10 groupby: resource: null user: null project: null - tenant: tenant-02 - flavor_name: null - flavor_id: null - mutate: NUMBOOL + tenant: tenant-01 + metadata: + container_format: bare - - type: ceilometer_ip_floating - description: null - unit: ip + - type: ceilometer_image_size_4 + description: "Size of ceilometer test" + unit: B qty: - - 5 + - 10000 price: - - 1.00 + - 0.10 groupby: resource: null user: null project: null tenant: tenant-01 metadata: - state: null - mutate: NUMBOOL + disk_format: qcow2 - - type: ceilometer_disk_ephemeral_size - description: "Max at each timestep" - unit: GiB + - type: ceilometer_image_size_5 + description: "Size of ceilometer test" + unit: B qty: - - 0.0 + - 10000 price: - - 0.0 + - 0.10 groupby: resource: null user: null project: null tenant: tenant-01 metadata: - type: null + container_format: null + disk_format: null - - type: ceilometer_disk_root_size - description: null - unit: GiB + - type: ceilometer_image_size_6 + description: "Size of ceilometer test" + unit: B qty: - - 0.0 + - 10000 price: - - 0.0 + - 0.10 groupby: resource: null user: null project: null - tenant: tenant-02 + tenant: tenant-01 metadata: type: null - - type: ceilometer_network_outgoing_bytes - description: null + - type: ceilometer_image_size_7 + description: "Size of ceilometer test" unit: B qty: - - 0.0 + - 10000 price: - - 0.0 + - 0.10 groupby: resource: null user: null project: null tenant: tenant-01 metadata: - vm_instance: null + type: null - - type: ceilometer_network_incoming_bytes - description: null - unit: B + - type: ceilometer_image_size_8 + description: "max number of cpus used in time step" + unit: instance + alt_name: instance qty: - - 0.0 + - 5 price: - - 0.0 + - 5.00 groupby: resource: null user: null project: null tenant: tenant-02 - metadata: - vm_instance: null + flavor_name: null + flavor_id: null + + - type: ceilometer_cpu + description: "max number of cpus used in time step" + unit: instance + alt_name: instance + qty: + - 1 + price: + - 5.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + flavor_name: null + flavor_id: null + mutate: NUMBOOL # Required fields for validation (top-level fields only, not nested in groupby) required_fields: diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml index b9cbd9843..cde26990b 100644 --- a/roles/telemetry_chargeback/tasks/loki_rate.yml +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -19,7 +19,7 @@ - name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" ansible.builtin.command: - cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -b \"{{ synth_data_rates.time.begin_step.begin }}\" -e \"{{ synth_data_rates.time.end_step.end }}\"" register: cost_totals_summary changed_when: false failed_when: cost_totals_summary.rc != 0 diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml index 2f130e711..adaa2b34a 100644 --- a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -67,5 +67,5 @@ -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" --debug "{{ cloudkitty_debug_dir }}" - register: synth_rating_info - changed_when: synth_rating_info.rc == 0 + register: loki_retrieved_summary_info + changed_when: loki_retrieved_summary_info.rc == 0 diff --git a/roles/telemetry_chargeback/templates/loki_data_templ.j2 b/roles/telemetry_chargeback/templates/loki_data_templ.j2 index b676f3013..c7f96f0c0 100644 --- a/roles/telemetry_chargeback/templates/loki_data_templ.j2 +++ b/roles/telemetry_chargeback/templates/loki_data_templ.j2 @@ -13,7 +13,10 @@ "qty": entry_data.qty, "price": entry_data.price, "groupby": entry_data.groupby, - "metadata": entry_data.metadata + "metadata": entry_data.metadata, + "factor": entry_data.factor, + "offset": entry_data.offset, + "mutate": entry_data.mutate } -%} [ "{{ item.nanoseconds }}", diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 5815cc92d..27010b5dc 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -1,6 +1,4 @@ --- -# Internal role variables - these use role_path and should not be overridden - # Scenario and script paths (using role_path) cloudkitty_scenario_dir: "{{ role_path }}/files" cloudkitty_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" From 80741b9033596adc83911994175067128b6c30da Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 3 Feb 2026 15:18:40 -0500 Subject: [PATCH 05/15] Validates chargeback data is generated and then push and retriieved from loki - uses synth data to calculate total cost via script - run "openstack rating summary get" to get total cost from loki - compares script_totals and Loki_Totals if same then job passes - Used Gemini and Cursor AI --- .gitignore | 1 + roles/telemetry_chargeback/.gitignore | 2 + roles/telemetry_chargeback/README.md | 115 ++++++- roles/telemetry_chargeback/defaults/main.yml | 27 ++ .../files/gen_db_summary.py | 321 ++++++++++++++++++ .../files/gen_synth_loki_data.py | 157 +++++++-- .../files/test_dyn_basic.yml | 154 +++++++++ .../files/test_static.yml | 57 ---- .../tasks/chargeback_tests.yml | 4 +- .../telemetry_chargeback/tasks/cleanup_ck.yml | 5 + .../tasks/flush_loki_data.yml | 52 +++ .../tasks/gen_synth_loki_data.yml | 59 ++-- .../tasks/ingest_loki_data.yml | 42 +++ .../tasks/load_loki_data.yml | 12 + .../telemetry_chargeback/tasks/loki_rate.yml | 29 ++ roles/telemetry_chargeback/tasks/main.yml | 57 +++- .../tasks/retrieve_loki_data.yml | 71 ++++ .../tasks/run_test_scenarios.yml | 53 +++ .../tasks/setup_loki_env.yml | 63 ++++ .../loki_data_templ.j2 | 0 roles/telemetry_chargeback/vars/main.yml | 20 +- 21 files changed, 1164 insertions(+), 137 deletions(-) create mode 100644 roles/telemetry_chargeback/.gitignore create mode 100644 roles/telemetry_chargeback/files/gen_db_summary.py create mode 100644 roles/telemetry_chargeback/files/test_dyn_basic.yml delete mode 100644 roles/telemetry_chargeback/files/test_static.yml create mode 100644 roles/telemetry_chargeback/tasks/cleanup_ck.yml create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/run_test_scenarios.yml create mode 100644 roles/telemetry_chargeback/tasks/setup_loki_env.yml rename roles/telemetry_chargeback/{template => templates}/loki_data_templ.j2 (100%) diff --git a/.gitignore b/.gitignore index 44dbcd64d..53e77bcaf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc .idea/ +.ansible/ diff --git a/roles/telemetry_chargeback/.gitignore b/roles/telemetry_chargeback/.gitignore new file mode 100644 index 000000000..7684dfb64 --- /dev/null +++ b/roles/telemetry_chargeback/.gitignore @@ -0,0 +1,2 @@ +files/_gen_synth_loki_metrics_totals.py +.ansible/ diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 192b72a3d..a721ffc93 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -5,7 +5,7 @@ The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** The role performs two main functions: 1. **CloudKitty Validation** - Enables and configures the CloudKitty hashmap rating module, then validates its state. -2. **Synthetic Data Generation** - Generates synthetic Loki log data for testing chargeback scenarios using a Python script and Jinja2 template. +2. **Synthetic Data Generation & Analysis** - Generates synthetic Loki log data for testing chargeback scenarios and calculates metric totals. The role automatically discovers and processes all scenario files matching `test_*.yml` in the `files/` directory. For each scenario it runs: generate synthetic data, compute syn-totals, ingest to Loki, flush Loki ingester memory, and get cost via CloudKitty rating summary (using begin/end from syn-totals). Retrieve-from-Loki is included in the load_loki_data flow. After all scenarios, the role runs cleanup (`cleanup_ck.yml`) to remove the local flush cert directory. Requirements ------------ @@ -15,7 +15,7 @@ It relies on the following being available on the target or control host: * The **OpenStack CLI client** must be installed and configured with administrative credentials. * Required Python libraries for the `openstack` CLI (e.g., `python3-openstackclient`). * Connectivity to the OpenStack API endpoint. -* **Python 3** with the following libraries for synthetic data generation: +* **Python 3** with the following libraries for synthetic data generation and analysis: * `PyYAML` * `Jinja2` @@ -23,6 +23,7 @@ It is expected to be run **after** a successful deployment and configuration of * **OpenStack:** A functional OpenStack cloud (RHOSO) environment. * **Cloudkitty:** The Cloudkitty service must be installed, configured, and running. +* **Loki / OpenShift (for ingest and flush):** When using ingest and flush tasks, the control host must have `oc` CLI access, and the Cloudkitty Loki stack (route, certificates, ingester) must be deployed. The role sets Loki push/query URLs and extracts certificates via `setup_loki_env.yml`. Role Variables -------------- @@ -30,33 +31,115 @@ The role uses the following variables to control the testing environment and exe ### User-Configurable Variables (defaults/main.yml) +These variables can be overridden when importing the role or set at the play level. Users can customize these based on their deployment environment and test requirements. + | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for the role. | +| `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | +| `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | +| `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | +| `local_cert_dir` | `{{ ansible_env.HOME }}/ci-framework-data/flush_certs` | Local directory for flush certs (removed by cleanup_ck.yml after the run). | +| `remote_cert_dir` | `osp-certs` | Directory inside the OpenStack pod for certs. | +| `cert_secret_name` | `cert-cloudkitty-client-internal` | OpenShift secret name for client certificates. | +| `client_secret` | `secret/cloudkitty-lokistack-gateway-client-http` | Secret for flush client certs. | +| `ca_configmap` | `cm/cloudkitty-lokistack-ca-bundle` | ConfigMap for CA bundle. | +| `logql_query` | `{service="cloudkitty"}` (overridable via `loki_query`) | LogQL query for Loki. | +| `cloudkitty_namespace` | `openstack` | OpenShift namespace for Cloudkitty/Loki resources. | +| `openstackpod` | `openstackclient` | OpenStack client pod name for exec/cp. | +| `lookback` | `6` | Days lookback for Loki query time range. | +| `limit` | `50` | Limit for Loki query results. | + +**Example: Overriding variables when importing the role** +```yaml +- name: "Run chargeback tests" + ansible.builtin.import_role: + name: telemetry_chargeback + vars: + cloudkitty_namespace: "my-custom-namespace" + lookback: 10 + cloudkitty_debug: true +``` ### Internal Variables (vars/main.yml) -These variables are used internally by the role and typically do not need to be modified. +These variables are used internally by the role and should not be modified. They use `role_path` for internal file/script references and define internal file naming conventions. | Variable | Default Value | Description | |----------|---------------|-------------| -| `logs_dir_zuul` | `/home/zuul/ci-framework-data/logs` | Remote directory for log files. | -| `artifacts_dir_zuul` | `/home/zuul/ci-framework-data/artifacts` | Directory for generated artifacts. | -| `ck_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | -| `ck_data_template` | `{{ role_path }}/template/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | -| `ck_data_config` | `{{ role_path }}/files/test_static.yml` | Path to the scenario configuration file. | -| `ck_output_file_local` | `{{ artifacts_dir_zuul }}/loki_synth_data.json` | Local path for generated synthetic data. | -| `ck_output_file_remote` | `{{ logs_dir_zuul }}/gen_loki_synth_data.log` | Remote destination for synthetic data. | +| `cloudkitty_scenario_dir` | `{{ role_path }}/files` | Directory containing scenario files (`test_*.yml`). | +| `cloudkitty_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | +| `cloudkitty_data_template` | `{{ role_path }}/templates/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | +| `cloudkitty_summary_script` | `{{ role_path }}/files/gen_db_summary.py` | Path to the summary script (gen_db_summary.py). | +| `cloudkitty_synth_data_suffix` | `-synth_data.json` | Suffix for generated synthetic data files. | +| `cloudkitty_loki_data_suffix` | `-loki_data.json` | Suffix for Loki query result JSON files. | +| `cloudkitty_synth_totals_metrics_suffix` | `-synth_metrics_summary.yml` | Suffix for generated metric totals files (from synthetic data). | +| `cloudkitty_loki_totals_metrics_suffix` | `-loki_metrics_summary.yml` | Suffix for metric totals computed from Loki-retrieved JSON (retrieve_loki_data task). | +| `cloudkitty_loki_totals_suffix` | `-rating.yml` | Suffix for CloudKitty rating summary output files (from loki_rate task). | + +**Note:** Loki push/query URLs are set dynamically in `setup_loki_env.yml` from the Cloudkitty Loki route. + +### Synthetic Data Scripts + +**gen_synth_loki_data.py** — Generates Loki-format JSON from a scenario YAML and template. The role invokes it with `-r` so that timestamps in the output are in **reverse** order (youngest first, oldest last). When run manually you can omit `-r` for chronological order (oldest first, youngest last). + +| Option | Description | +|--------|--------------| +| `--tmpl` | Path to the Jinja2 template (e.g. `loki_data_templ.j2`). | +| `-t`, `--test` | Path to the scenario YAML (e.g. `test_dyn_basic.yml`). | +| `-o`, `--output` | Path to the output JSON file. | +| `-p`, `--project-id` | Optional; overrides `groupby.project_id` in every log entry. | +| `-u`, `--user-id` | Optional; overrides `groupby.user_id` in every log entry. | +| `-r`, `--reverse` | Reverse timestamp order in JSON output (youngest first, oldest last). | +| `--debug` | Enable debug logging. | + +**gen_db_summary.py** (`cloudkitty_summary_script`) — Parses Loki-style JSON (streams or `data.result`), sorts entries by timestamp, and writes a YAML summary. This script is invoked by the role for **both** synthetic totals (in `gen_synth_loki_data.yml`) and Loki-retrieved totals (in `retrieve_loki_data.yml`). It applies rate calculations with support for `factor`, `offset`, and `mutate` transformations. + +| Option | Description | +|--------|--------------| +| `-j`, `--json` | Path to the input JSON file (required). | +| `-o`, `--output` | Path to the output YAML file (default: `_total.yml`). | +| `--debug` | Directory to write debug output (`_diff.txt` with one `[ts,log]` JSON per line). | + +Output YAML structure: + +* **time** — `begin_step` / `end_step`, each with `nanosec` (nanosecond timestamp), `begin`, `end` (ISO window strings from the log payload). The `nanosec` values are used for Loki query time range in `retrieve_loki_data.yml`. +* **data_log** — `total_timesteps`, `metrics_per_step`, `log_count`. +* **rate** — `by_types` (per-type `Rate` calculated as `Σ((qty_mutated * factor + offset) * price)`) and `total.Rating` (sum of all rates). + +### Dynamically Set Variables + +Set in **main.yml** from the OpenStack CLI (`openstack project show admin` / `openstack user show admin`): + +| Variable | Description | +|----------|-------------| +| `cloudkitty_project_id` | ID of the OpenStack project named `admin` (empty string if not found). Passed as `-p` to the synthetic data generator when non-empty. | +| `cloudkitty_user_id` | ID of the OpenStack user named `admin` (empty string if not found). Passed as `-u` to the synthetic data generator when non-empty. | + +Set in **gen_synth_loki_data.yml** for each scenario file during the loop: + +| Variable | Description | +|----------|-------------| +| `cloudkitty_data_file` | Local path for generated JSON data (`{{ artifacts_dir_zuul }}/{{ scenario_name }}-synth_data.json`) | +| `cloudkitty_synth_totals_file` | Local path for calculated metric totals (`{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_totals_suffix }}`) | +| `cloudkitty_test_file` | Path to the scenario configuration file (`{{ cloudkitty_scenario_dir }}/{{ scenario_name }}.yml`) | Scenario Configuration ---------------------- -The synthetic data generation is controlled by a YAML configuration file (`files/test_static.yml`). This file defines: +The synthetic data generation is controlled by YAML configuration files in the `files/` directory. Any file matching `test_*.yml` will be automatically discovered and processed. Files whose names start with an underscore (e.g. `_test_*.yml`) are **not** discovered by the role; they can be used as reference or for manual runs. + +Each scenario file defines: + +* **generation** — Time range configuration (days, step_seconds). +* **log_types** — List of log type definitions. Each entry has **type** (identifier and value in output), unit, description, qty, price, groupby, and metadata. The **groupby** dict typically includes dimension keys (e.g. id, user_id, project_id, tenant_id); the generator merges **date_fields** into groupby at run time. +* **required_fields** — Top-level keys required for each log type (e.g. type, unit, qty, price, groupby, metadata). +* **date_fields** — Date field names to merge into groupby (week_of_the_year, day_of_the_year, month, year). +* **loki_stream** — Loki stream configuration (service name). + +**groupby.id** should be consistent by metric type across scenario files so that the same type always uses the same id. -* **generation** - Time range configuration (days, step_seconds) -* **log_types** - List of log type definitions with name, type, unit, qty, price, groupby, and metadata -* **required_fields** - Fields required for validation -* **date_fields** - Date fields to add to groupby (week_of_the_year, day_of_the_year, month, year) -* **loki_stream** - Loki stream configuration (service name) +Scenario files matching `test_*.yml` in the `files/` directory are automatically discovered and processed. Files whose names start with an underscore are not auto-discovered. Dependencies ------------ diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..9cc04c8c7 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,29 @@ --- +# OpenStack CLI command openstack_cmd: "openstack" + +# Debug mode +cloudkitty_debug: false + +# Directory paths +logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" +artifacts_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/artifacts" +cert_dir: "{{ ansible_user_dir }}/ck-certs" +local_cert_dir: "{{ ansible_env.HOME }}/ci-framework-data/flush_certs" +remote_cert_dir: "osp-certs" + +# Cloudkitty certificates and secrets +cert_secret_name: "cert-cloudkitty-client-internal" +client_secret: "secret/cloudkitty-lokistack-gateway-client-http" +ca_configmap: "cm/cloudkitty-lokistack-ca-bundle" + +# LogQL Query +logql_query: "{{ loki_query | default('{service=\"cloudkitty\"}') }}" + +# OpenShift/Kubernetes settings +cloudkitty_namespace: "openstack" +openstackpod: "openstackclient" + +# Time window settings +lookback: 6 +limit: 50 diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py new file mode 100644 index 000000000..9234a64df --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +""" +Parse Loki JSON (or text) into [timestep, log_entry] pairs, then emit a YAML +summary: time, data_log, and rate (per-type Σ(qty×price) and total Rating). + +Same CLI as gen_synth_loki_metrics_totals.py (-j, -o, --debug). +""" +from __future__ import annotations + +import argparse +import json +import math +import sys +from collections import Counter, defaultdict +from pathlib import Path +from typing import Any, Optional + +import yaml + +REQUIRED_KEYS = frozenset( + {"start", "end", "type", "unit", "qty", "price", "groupby"} +) + + +def _valid_ts(s: str) -> bool: + return isinstance(s, str) and s.isdigit() and len(s) >= 19 + + +def _valid_entry(obj: dict) -> bool: + return REQUIRED_KEYS.issubset(obj.keys()) + + +def _try_pair(ts_str: str, log_str: str) -> Optional[tuple[str, str]]: + if not _valid_ts(ts_str) or not isinstance(log_str, str): + return None + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + return None + if isinstance(entry, dict) and _valid_entry(entry): + return (ts_str, log_str) + return None + + +def _extract_from_loki_json(data: dict) -> list[tuple[str, str]]: + streams = data.get("streams") + if streams is None: + streams = data.get("data", {}).get("result", []) + if not isinstance(streams, list): + return [] + pairs: list[tuple[str, str]] = [] + for stream in streams: + for val in stream.get("values", []): + if not isinstance(val, (list, tuple)) or len(val) < 2: + continue + p = _try_pair(val[0], val[1]) + if p: + pairs.append(p) + return pairs + + +def extract_and_sort(json_path: Path) -> list[tuple[str, str]]: + """ + Load JSON from json_path, extract [timestep, log_entry] pairs, + and return them sorted by timestep (ascending). + """ + raw = json_path.read_text(encoding="utf-8", errors="replace") + + # Parse as JSON (fail if invalid) + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + print( + f"Error: Invalid JSON in {json_path}: {e}", + file=sys.stderr + ) + sys.exit(1) + + # Extract from known Loki JSON structures + if not isinstance(data, dict): + print( + f"Error: Expected JSON object, got {type(data).__name__} in {json_path}", + file=sys.stderr + ) + sys.exit(1) + + pairs = _extract_from_loki_json(data) + + if not pairs: + print( + f"Error: No valid log entries found in {json_path}. " + "Expected structure: {{'streams': [...]}} or " + "{{'data': {{'result': [...]}}}}'", + file=sys.stderr + ) + sys.exit(1) + + pairs.sort(key=lambda p: int(p[0])) + return pairs + + +def _apply_mutate(qty: float, mutate: str) -> float: + """ + Apply mutate transformation to qty value. + + Args: + qty: The quantity value to transform. + mutate: The mutation type (NONE, CEIL, FLOOR, NUMBOOL, NOTNUMBOOL). + + Returns: + The transformed quantity. + """ + mutate_upper = mutate.upper() if isinstance(mutate, str) else "NONE" + + if mutate_upper == "CEIL": + return math.ceil(qty) + elif mutate_upper == "FLOOR": + return math.floor(qty) + elif mutate_upper == "NUMBOOL": + # If qty equals 0, leave it at 0. Else, set it to 1. + return 0.0 if qty == 0 else 1.0 + elif mutate_upper == "NOTNUMBOOL": + # If qty equals 0, set it to 1. Else, set it to 0. + return 1.0 if qty == 0 else 0.0 + else: # NONE or any unrecognized value + return qty + + +def _parse_numeric(value: Any, default: float = 0) -> float: + """ + Parse a numeric value, supporting fractions like '1/1048576'. + + This function handles the 'factor' field in scenario YAML files which uses + fraction notation (e.g., '1/1048576' to convert bytes to MiB) to match + CloudKitty/chargeback documentation standards. Without this parser, fraction + strings would cause ValueError when passed to float(), silently dropping + metrics from the output summary. + + Args: + value: The value to parse (can be number, string, or fraction string) + default: Default value if parsing fails + + Returns: + Parsed float value + """ + if value is None: + return default + + # If it's already a number, convert directly + if isinstance(value, (int, float)): + return float(value) + + # If it's a string, check for fraction notation (e.g., "1/1048576") + if isinstance(value, str): + value = value.strip() + if '/' in value: + try: + parts = value.split('/') + if len(parts) == 2: + numerator = float(parts[0].strip()) + denominator = float(parts[1].strip()) + if denominator != 0: + return numerator / denominator + except (ValueError, ZeroDivisionError): + pass + # Try direct conversion + try: + return float(value) + except ValueError: + pass + + return default + + +def aggregate_rates_by_type( + pairs: list[tuple[str, str]], +) -> tuple[dict, float]: + sums: defaultdict[str, float] = defaultdict(float) + for _, log_str in pairs: + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + continue + if not isinstance(entry, dict): + continue + mtype = entry.get("type") + if not isinstance(mtype, str) or not mtype: + mtype = "unknown" + try: + qty = _parse_numeric(entry.get("qty"), 0) + price = _parse_numeric(entry.get("price"), 0) + factor = _parse_numeric(entry.get("factor"), 1) + offset = _parse_numeric(entry.get("offset"), 0) + mutate = entry.get("mutate", "NONE") + except (TypeError, ValueError): + continue + + # Apply mutate transformation + qty_mutated = _apply_mutate(qty, mutate) + + # Apply factor and offset + qty_rate = qty_mutated * factor + offset + + # Calculate rate + sums[mtype] += qty_rate * price + by_types = {k: {"Rate": round(v, 4)} for k, v in sorted(sums.items())} + total = sum(sums.values()) + return by_types, total + + +def build_summary(pairs: list[tuple[str, str]]) -> dict[str, Any]: + log_count = len(pairs) + per_ts = Counter(ts for ts, _ in pairs) + n_ts = len(per_ts) + counts = list(per_ts.values()) + mps: Any = counts[0] if counts else 0 + if counts and len(set(counts)) > 1: + mps = "ERROR" + + if pairs: + first = json.loads(pairs[0][1]) + last = json.loads(pairs[-1][1]) + time_block = { + "begin_step": { + "nanosec": int(pairs[0][0]), + "begin": first.get("start"), + "end": first.get("end"), + }, + "end_step": { + "nanosec": int(pairs[-1][0]), + "begin": last.get("start"), + "end": last.get("end"), + }, + } + else: + empty = {"nanosec": None, "begin": None, "end": None} + time_block = {"begin_step": empty.copy(), "end_step": empty.copy()} + + by_types, total_r = aggregate_rates_by_type(pairs) + return { + "time": time_block, + "data_log": { + "total_timesteps": n_ts, + "metrics_per_step": mps, + "log_count": log_count, + }, + "rate": { + "by_types": by_types, + "total": {"Rating": round(total_r, 4)}, + }, + } + + +def write_yaml(path: Path, doc: dict[str, Any]) -> None: + with path.open("w", encoding="utf-8") as f: + f.write("---\n") + yaml.dump( + doc, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Summarize Loki JSON log entries to YAML (time, data_log, rate)." + ), + ) + parser.add_argument( + "-j", "--json", required=True, type=Path, help="Input JSON.", + ) + parser.add_argument( + "-o", + "--output", + type=Path, + default=None, + help="Output YAML (default: _total.yml).", + ) + parser.add_argument( + "--debug", + type=Path, + default=None, + metavar="DIR", + help=( + "If set, write _diff.txt with one [ts,log] JSON per line." + ), + ) + args = parser.parse_args() + + if not args.json.exists(): + print(f"Error: input file not found: {args.json}", file=sys.stderr) + sys.exit(1) + + stem = args.json.stem + out_path = args.output or (args.json.parent / f"{stem}_total.yml") + pairs = extract_and_sort(args.json) + + dbg = str(args.debug).strip() if args.debug is not None else "" + if dbg and dbg != ".": + args.debug.mkdir(parents=True, exist_ok=True) + dbg_file = args.debug / f"{args.json.stem}_diff.txt" + with dbg_file.open("w", encoding="utf-8") as f: + for ts, log_str in pairs: + print(json.dumps([ts, log_str], ensure_ascii=False), file=f) + + doc = build_summary(pairs) + write_yaml(out_path, doc) + + if doc["data_log"]["metrics_per_step"] == "ERROR": + per_ts = Counter(ts for ts, _ in pairs) + exp = next(iter(per_ts.values()), 0) + for ts in sorted(per_ts, key=int): + if per_ts[ts] != exp: + print(ts, per_ts[ts], file=sys.stdout) + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index f05796e29..263554dc6 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -2,13 +2,48 @@ import logging import argparse import json +import sys import yaml from datetime import datetime, timezone, timedelta from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List, Union from jinja2 import Environment +def _get_value_for_step( + values: List[Union[int, float]], + step_idx: int, + num_steps: int +) -> Union[int, float]: + """ + Get the appropriate value from a list based on the current step index. + + Values are distributed evenly across all steps. For example, if there are + 12 steps and 4 values, each value covers 3 steps: + - Steps 0-2: values[0] + - Steps 3-5: values[1] + - Steps 6-8: values[2] + - Steps 9-11: values[3] + + Args: + values: List of values to choose from. + step_idx: Current step index (0-based). + num_steps: Total number of steps. + + Returns: + The value corresponding to the current step. + """ + num_values = len(values) + if num_values == 1: + return values[0] + + # Calculate how many steps each value covers + steps_per_value = num_steps / num_values + # Determine which value index to use, clamping to valid range + value_idx = min(int(step_idx // steps_per_value), num_values - 1) + return values[value_idx] + + # --- Configure logging with a default level that can be changed --- logging.basicConfig( level=logging.INFO, @@ -73,7 +108,10 @@ def generate_loki_data( start_time: datetime, end_time: datetime, time_step_seconds: int, - config: Dict[str, Any] + config: Dict[str, Any], + project: Union[str, int, None] = None, + user: Union[str, int, None] = None, + reverse_timestamps: bool = False, ): """ Generate synthetic Loki log data by preparing a data list and rendering. @@ -85,6 +123,12 @@ def generate_loki_data( end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. config (Dict[str, Any]): Configuration dictionary loaded from file. + project: Optional value to inject as groupby.project in every + log entry in the output (overrides test_* file value when set). + user: Optional value to inject as groupby.user in every + log entry in the output (overrides test_* file value when set). + reverse_timestamps (bool): If True, reverse the order of timestamps + in the JSON output (newest first, oldest last). """ # Hardcoded constant for invalid timestamps invalid_timestamp = "INVALID_TIMESTAMP" @@ -175,37 +219,49 @@ def generate_loki_data( logger.error(f"Invalid log type configuration: {log_type_config}") raise ValueError("Each log type in log_types must be a dictionary") - log_type_name = log_type_config.get("name") - if not log_type_name: - logger.error("Each log type must have a 'name' field") - raise ValueError("Each log type must have a 'name' field") + # "type" is log-type identifier (dict key) and output value + type_key = log_type_config.get("type") + if not type_key: + logger.error("Each log type must have a 'type' field") + raise ValueError("Each log type must have a 'type' field") # Validate required fields - missing = [f for f in required_fields if f not in log_type_config] + # metadata is optional for generation; name is not a log-type field + required_for_item = [ + f for f in required_fields + if f not in ("name", "metadata") + ] + missing = [f for f in required_for_item if f not in log_type_config] if missing: logger.error( - f"Missing required fields in {log_type_name} config: {missing}" + f"Missing required fields in {type_key!r} config: {missing}" ) raise ValueError( - f"Missing required fields in {log_type_name}: {missing}" + f"Missing required fields in {type_key!r}: {missing}" ) # Build groupby from config groupby = log_type_config.get("groupby", {}) if not isinstance(groupby, dict): logger.error( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) raise ValueError( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) - log_types[log_type_name] = { - "type": log_type_config["type"], + # Ensure qty and price are lists for step-based distribution + qty_val = log_type_config["qty"] + price_val = log_type_config["price"] + qty_list = qty_val if isinstance(qty_val, list) else [qty_val] + price_list = price_val if isinstance(price_val, list) else [price_val] + + log_types[type_key] = { + "type": type_key, "unit": log_type_config["unit"], "description": log_type_config.get("description"), - "qty": log_type_config["qty"], - "price": log_type_config["price"], + "qty": qty_list, + "price": price_list, "groupby": groupby.copy(), "metadata": log_type_config.get("metadata", {}) } @@ -231,15 +287,21 @@ def tojson_preserve_order(obj): # --- Render the template in one pass with all the data --- logger.info("Rendering final output...") + if reverse_timestamps: + log_data_list.reverse() + logger.debug( + "Reversed timestamp order (newest first, oldest last)." + ) + + # Calculate total number of steps for value distribution + num_steps = len(log_data_list) + logger.debug(f"Total number of time steps: {num_steps}") + # Pre-calculate log types with date fields for each time step log_types_list = [] for idx, item in enumerate(log_data_list): - # For the last entry, use end_time to ensure it shows today's date - if idx == len(log_data_list) - 1: - dt = end_time - else: - epoch_seconds = item["nanoseconds"] / 1_000_000_000 - dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) iso_year, iso_week, _ = dt.isocalendar() day_of_year = dt.timetuple().tm_yday @@ -267,6 +329,17 @@ def tojson_preserve_order(obj): log_type_with_dates = log_type_data.copy() log_type_with_dates["groupby"] = log_type_data["groupby"].copy() log_type_with_dates["groupby"].update(date_fields) + if project is not None: + log_type_with_dates["groupby"]["project"] = project + if user is not None: + log_type_with_dates["groupby"]["user"] = user + # Select qty and price based on step index distribution + log_type_with_dates["qty"] = _get_value_for_step( + log_type_data["qty"], idx, num_steps + ) + log_type_with_dates["price"] = _get_value_for_step( + log_type_data["price"], idx, num_steps + ) log_types_with_dates[log_type_name] = log_type_with_dates log_types_list.append(log_types_with_dates) @@ -296,8 +369,19 @@ def tojson_preserve_order(obj): ) except IOError as e: logger.error(f"Failed to write to output file '{output_path}': {e}") - except Exception as e: - logger.error(f"An unexpected error occurred during file write: {e}") + raise + + # --- Step 5: Validate that the output is valid JSON --- + try: + with output_path.open('r') as f_in: + json.load(f_in) + logger.info("Output file validated as valid JSON.") + except json.JSONDecodeError as e: + logger.error( + f"Output file is not valid JSON: {e}. " + f"Delete '{output_path}' and fix the template or data." + ) + sys.exit(1) def main(): @@ -324,8 +408,30 @@ def main(): required=True, help="Path to the output file." ) + parser.add_argument( + "-p", "--project-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.project in " + "every log entry in the output (overrides value from test file)." + ) + parser.add_argument( + "-u", "--user-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.user in " + "every log entry in the output (overrides value from test file)." + ) # --- Optional Utility Arguments --- + parser.add_argument( + "-r", "--reverse", + action="store_true", + help="Reverse timestamp order in JSON output: newest first, " + "oldest last (default is oldest first, newest last)." + ) parser.add_argument( "--debug", action="store_true", @@ -362,7 +468,10 @@ def main(): start_time=start_time_utc, end_time=end_time_utc, time_step_seconds=step_seconds, - config=config + config=config, + project=args.project_id, + user=args.user_id, + reverse_timestamps=args.reverse, ) except FileNotFoundError: logger.error( diff --git a/roles/telemetry_chargeback/files/test_dyn_basic.yml b/roles/telemetry_chargeback/files/test_dyn_basic.yml new file mode 100644 index 000000000..cfe7adb18 --- /dev/null +++ b/roles/telemetry_chargeback/files/test_dyn_basic.yml @@ -0,0 +1,154 @@ +--- +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 1 + step_seconds: 14400 + +# Log type definitions (single "type" = identifier and value pushed to output) +log_types: + - type: ceilometer_image_size + description: "Size of ceilometer image" + unit: MiB + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_image_test + description: "Size of ceilometer test" + unit: B +# factor: 1 + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_cpu + description: "max number of cpus used in time step" + unit: instance + alt_name: instance + qty: + - 1 + price: + - 5.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + flavor_name: null + flavor_id: null + mutate: NUMBOOL + + - type: ceilometer_ip_floating + description: null + unit: ip + qty: + - 5 + price: + - 1.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + state: null + mutate: NUMBOOL + + - type: ceilometer_disk_ephemeral_size + description: "Max at each timestep" + unit: GiB + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + type: null + + - type: ceilometer_disk_root_size + description: null + unit: GiB + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + type: null + + - type: ceilometer_network_outgoing_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + vm_instance: null + + - type: ceilometer_network_incoming_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + vm_instance: null + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml deleted file mode 100644 index f94a3c1d2..000000000 --- a/roles/telemetry_chargeback/files/test_static.yml +++ /dev/null @@ -1,57 +0,0 @@ -# Scenario configuration for synthetic Loki log data generation - -# Time range configuration -generation: - days: 1 - step_seconds: 7200 - -# Log type definitions -log_types: - - name: ceilometer_image_size - type: ceilometer_image_size - unit: MiB - description: null - qty: 20.6 - price: 0.02 - groupby: - id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 - project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 - user_id: null - metadata: - container_format: bare - disk_format: qcow2 - - - name: instance - type: instance - unit: instance - description: null - qty: 1.0 - price: 0.3 - groupby: - id: de168c31-ed44-4a1a-a079-51bd238a91d6 - project_id: 9cf5bcfc61a24682acc448af2d062ad2 - user_id: c29ab6e886354bbd88ee9899e62d1d40 - metadata: - flavor_name: m1.tiny - flavor_id: "1" - vcpus: "" - -# Required fields for validation (top-level fields only, not nested in groupby) -required_fields: - - type - - unit - - qty - - price - - groupby - - metadata - -# Date field names to add to groupby -date_fields: - - week_of_the_year - - day_of_the_year - - month - - year - -# Loki stream configuration -loki_stream: - service: cloudkitty diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index 8519d7891..99ddcc44e 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -8,7 +8,9 @@ - name: "Find the current value of hashmap" ansible.builtin.shell: - cmd: "{{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + cmd: "set -o pipefail && {{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + args: + executable: /bin/bash register: get_hashmap_priority changed_when: false diff --git a/roles/telemetry_chargeback/tasks/cleanup_ck.yml b/roles/telemetry_chargeback/tasks/cleanup_ck.yml new file mode 100644 index 000000000..01407d155 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/cleanup_ck.yml @@ -0,0 +1,5 @@ +--- +- name: "Cleanup local certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: absent diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index e37b54c6b..ec80ca3cc 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -1,39 +1,40 @@ --- -- name: Check for preexisting output file +- name: "Set variables dynamically for {{ item }}" + ansible.builtin.set_fact: + cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ item }}.yml" + +- name: "Check for preexisting output file" ansible.builtin.stat: - path: "{{ ck_output_file_local }}" + path: "{{ cloudkitty_data_file }}" register: file_preexists -- name: TEST Generate Synthetic Data +- name: "Generate Synthetic Data for {{ item }}" ansible.builtin.command: cmd: > - python3 "{{ ck_synth_script }}" - --tmpl "{{ ck_data_template }}" - -t "{{ ck_data_config }}" - -o "{{ ck_output_file_local }}" + python3 "{{ cloudkitty_synth_script }}" + -r + --tmpl "{{ cloudkitty_data_template }}" + -t "{{ cloudkitty_test_file }}" + -o "{{ cloudkitty_data_file }}" + {% if cloudkitty_project_id is defined and cloudkitty_project_id %} -p "{{ cloudkitty_project_id }}"{% endif %} register: script_output - when: not file_preexists.stat.exists | bool + when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 -- name: Read the content of the file - ansible.builtin.slurp: - src: "{{ ck_output_file_local }}" - register: slurped_file - -- name: TEST Validate JSON format of synthetic data file - ansible.builtin.assert: - that: - # This filter will trigger a task failure if the string isn't valid JSON - - slurped_file.content | b64decode | from_json is defined - fail_msg: "The file does not contain valid JSON format." - success_msg: "JSON format validated successfully." - -- name: Print output_file_remote path - ansible.builtin.debug: - msg: "Synthetic data file: {{ ck_output_file_remote }}" +- name: "Generate chargeback rating from synthetic data file {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ cloudkitty_data_file }}" + -o "{{ cloudkitty_synth_totals_file }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + when: not file_preexists.stat.exists | bool + changed_when: synth_rating_info.rc == 0 -- name: Copy output file to remote host - ansible.builtin.copy: - src: "{{ ck_output_file_local }}" - dest: "{{ ck_output_file_remote }}" - mode: '0644' +- name: "Load metrics from YAML file" + ansible.builtin.include_vars: + file: "{{ cloudkitty_synth_totals_file }}" + name: synth_data_rates diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..a53751f3f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..a2a1e129f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,12 @@ +--- +- name: "Ingest CloudKitty data log for {{ item }}" + ansible.builtin.include_tasks: + file: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ item }}" + ansible.builtin.include_tasks: + file: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ item }}" + ansible.builtin.include_tasks: + file: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml new file mode 100644 index 000000000..b9cbd9843 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -0,0 +1,29 @@ +--- +- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" + register: cost_totals_by_type + changed_when: false + failed_when: cost_totals_by_type.rc != 0 + +- name: "**INFO** Print the rating by type {{ item }}" + ansible.builtin.debug: + var: cost_totals_by_type.stdout + +- name: "Output saved as yaml {{ item }}" + ansible.builtin.copy: + content: | + "{{ cost_totals_by_type.stdout }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" + mode: '0644' + +- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" + register: cost_totals_summary + changed_when: false + failed_when: cost_totals_summary.rc != 0 + +- name: "**INFO** Print the rating summary {{ item }}" + ansible.builtin.debug: + var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 98a94b233..e2f264834 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,6 +1,57 @@ --- -- name: "Validate Chargeback Feature" +- name: "Validate Chargeback Feature deployed correctly" ansible.builtin.include_tasks: "chargeback_tests.yml" -- name: "Generate Synthetic Data" - ansible.builtin.include_tasks: "gen_synth_loki_data.yml" +- name: "Setup Loki Environment" + ansible.builtin.include_tasks: "setup_loki_env.yml" + +- name: "CloudKitty debug ON/OFF" + ansible.builtin.set_fact: + cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" + +- name: "Get admin project ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} project show admin -f value -c id" + register: get_admin_project_id + changed_when: false + failed_when: false + +- name: "Set admin project ID for CI" + ansible.builtin.set_fact: + cloudkitty_project_id: "{{ (get_admin_project_id.stdout | trim) | default('') }}" + +- name: "Get admin user ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} user show admin -f value -c id" + register: get_admin_user_id + changed_when: false + failed_when: false + +- name: "Set admin user ID for CI" + ansible.builtin.set_fact: + cloudkitty_user_id: "{{ (get_admin_user_id.stdout | trim) | default('') }}" + +- name: "Find test files" + ansible.builtin.find: + paths: "{{ cloudkitty_scenario_dir }}" + patterns: "test_*.yml" + register: found_files_raw + +- name: "Extract only the filenames into a clean list" + ansible.builtin.set_fact: + found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" + +- name: "Run scenario file through workflow" + block: + - name: "Process and Loop if files exist" + ansible.builtin.include_tasks: run_test_scenarios.yml + loop: "{{ found_files }}" + when: found_files | length > 0 + + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml + + rescue: + - name: "Log failure" + ansible.builtin.debug: + msg: "Running test scenarios loop failed." diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..2f130e711 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ item }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ item }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ item }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml new file mode 100644 index 000000000..5addb4a22 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -0,0 +1,53 @@ +--- +- name: "Generate Synthetic Data for each file: {{ item }}" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ item }}" + ansible.builtin.include_tasks: "load_loki_data.yml" + +- name: "Get total rate from Loki: {{ item }}" + ansible.builtin.include_tasks: "loki_rate.yml" + +#### diff uploaded data totals vs download data totals +- name: "Check synthetic totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + register: synth_totals_stat + +- name: "Check Loki totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + register: loki_totals_stat + +- name: "TEST Totals files exist {{ item }}" + ansible.builtin.assert: + that: + - synth_totals_stat.stat.exists | default(false) + - loki_totals_stat.stat.exists | default(false) + fail_msg: | + FAILED! Required file(s) missing for scenario {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + success_msg: | + PASSED! Required file(s) exist {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + +- name: "Diff synthetic totals vs Loki totals {{ item }}" + ansible.builtin.command: + cmd: > + diff + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + register: yaml_diff + failed_when: false + changed_when: false + +- name: "TEST Compare synthetic data vs Loki data results {{ item }}" + ansible.builtin.assert: + that: + - yaml_diff.rc == 0 + fail_msg: | + FAILED! {{ item }} - Files differ: + {{ yaml_diff.stdout }} + success_msg: "PASSED! {{ item }} - Data totals are identical." diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml new file mode 100644 index 000000000..d0388913c --- /dev/null +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -0,0 +1,63 @@ +--- +# Setup Loki Environment + +# Dynamic URL's +- name: "Get Loki Public Route Host" + ansible.builtin.command: + cmd: | + oc get route cloudkitty-lokistack -n {{ cloudkitty_namespace }} -o "jsonpath={.spec.host}" + register: loki_route + changed_when: false + +- name: "Set Loki URLs" + ansible.builtin.set_fact: + # Base URL + loki_base_url: "https://{{ loki_route.stdout }}" + + # Internal Flush URL (Service DNS: https://..svc:3100/flush) + ingester_flush_url: "https://cloudkitty-lokistack-ingester-http.{{ cloudkitty_namespace }}.svc:3100/flush" + +- name: "Set Derived Loki URLs" + ansible.builtin.set_fact: + loki_push_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/push" + loki_query_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/query_range" + +- name: "Debug URLs" + ansible.builtin.debug: + msg: + - "Loki Route: {{ loki_base_url }}" + - "Push URL: {{ loki_push_url }}" + - "Flush URL: {{ ingester_flush_url }}" + - "Query URL: {{ loki_query_url }}" + +# Certs to Ingest & Retrieve data to/from Loki +- name: "Ensure Local Certificate Directory Exists" + ansible.builtin.file: + path: "{{ cert_dir }}" + state: directory + mode: '0755' + +- name: "Extract Certificates from OpenShift Secret" + ansible.builtin.command: + cmd: | + oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +# Certs to Flush data to Loki +# - name: Create a directory to extract certificates +# ansible.builtin.file: +# path: "{{ local_cert_dir }}" +# state: directory +# mode: '0755' + +- name: "Extract Client Certificates" + ansible.builtin.command: + cmd: | + oc extract {{ client_secret }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract CA Bundle" + ansible.builtin.command: + cmd: | + oc extract {{ ca_configmap }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true diff --git a/roles/telemetry_chargeback/template/loki_data_templ.j2 b/roles/telemetry_chargeback/templates/loki_data_templ.j2 similarity index 100% rename from roles/telemetry_chargeback/template/loki_data_templ.j2 rename to roles/telemetry_chargeback/templates/loki_data_templ.j2 diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 1014a6a9e..5815cc92d 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -1,9 +1,15 @@ --- -logs_dir_zuul: "/home/zuul/ci-framework-data/logs" -artifacts_dir_zuul: "/home/zuul/ci-framework-data/artifacts" +# Internal role variables - these use role_path and should not be overridden -ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" -ck_data_template: "{{ role_path }}/template/loki_data_templ.j2" -ck_data_config: "{{ role_path }}/files/test_static.yml" -ck_output_file_local: "{{ artifacts_dir_zuul }}/loki_synth_data.json" -ck_output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" +# Scenario and script paths (using role_path) +cloudkitty_scenario_dir: "{{ role_path }}/files" +cloudkitty_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +cloudkitty_data_template: "{{ role_path }}/templates/loki_data_templ.j2" +cloudkitty_summary_script: "{{ role_path }}/files/gen_db_summary.py" + +# File naming conventions (internal standardization) +cloudkitty_synth_data_suffix: "-synth_data.json" +cloudkitty_loki_data_suffix: "-loki_data.json" +cloudkitty_synth_totals_metrics_suffix: "-synth_metrics_summary.yml" +cloudkitty_loki_totals_metrics_suffix: "-loki_metrics_summary.yml" +cloudkitty_loki_totals_suffix: "-rating.yml" From 24409c5f0a67c9aefefb0486da21ef456a06009f Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 16 Apr 2026 12:20:36 -0400 Subject: [PATCH 06/15] Review Changes --- roles/telemetry_chargeback/README.md | 3 +-- roles/telemetry_chargeback/files/gen_db_summary.py | 9 +++++---- roles/telemetry_chargeback/tasks/loki_rate.yml | 3 +-- roles/telemetry_chargeback/tasks/setup_loki_env.yml | 7 ------- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index a721ffc93..dfdfa9052 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -1,5 +1,5 @@ telemetry_chargeback -========= + The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** feature. These tests are specific to the Cloudkitty feature. Tests that are not specific to this feature (e.g., standard OpenStack deployment validation, basic networking) should be added to a common role. The role performs two main functions: @@ -36,7 +36,6 @@ These variables can be overridden when importing the role or set at the play lev | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | -| `cloudkitty_debug` | `false` | Enable debug mode for the role. | | `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | | `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | | `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py index 9234a64df..e14ab98d8 100644 --- a/roles/telemetry_chargeback/files/gen_db_summary.py +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -79,7 +79,8 @@ def extract_and_sort(json_path: Path) -> list[tuple[str, str]]: # Extract from known Loki JSON structures if not isinstance(data, dict): print( - f"Error: Expected JSON object, got {type(data).__name__} in {json_path}", + f"Error: Expected JSON object, got {type(data).__name__} " + f"in {json_path}", file=sys.stderr ) sys.exit(1) @@ -132,9 +133,9 @@ def _parse_numeric(value: Any, default: float = 0) -> float: This function handles the 'factor' field in scenario YAML files which uses fraction notation (e.g., '1/1048576' to convert bytes to MiB) to match - CloudKitty/chargeback documentation standards. Without this parser, fraction - strings would cause ValueError when passed to float(), silently dropping - metrics from the output summary. + CloudKitty/chargeback documentation standards. Without this parser, + fraction strings would cause ValueError when passed to float(), silently + dropping metrics from the output summary. Args: value: The value to parse (can be number, string, or fraction string) diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml index b9cbd9843..822585336 100644 --- a/roles/telemetry_chargeback/tasks/loki_rate.yml +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -12,8 +12,7 @@ - name: "Output saved as yaml {{ item }}" ansible.builtin.copy: - content: | - "{{ cost_totals_by_type.stdout }}" + content: "{{ cost_totals_by_type.stdout }}" dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" mode: '0644' diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml index d0388913c..e4a80250f 100644 --- a/roles/telemetry_chargeback/tasks/setup_loki_env.yml +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -43,13 +43,6 @@ oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} changed_when: true -# Certs to Flush data to Loki -# - name: Create a directory to extract certificates -# ansible.builtin.file: -# path: "{{ local_cert_dir }}" -# state: directory -# mode: '0755' - - name: "Extract Client Certificates" ansible.builtin.command: cmd: | From 9ddc1441d526b093ccb1ba5eeec371d8a5eb25e9 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 3 Feb 2026 15:18:40 -0500 Subject: [PATCH 07/15] Updating README file and rebasing - uses synth data to calculate total cost via script - run "openstack rating summary get" to get total cost from loki - compares script_totals and Loki_Totals if same then job passes - Used Gemini and Cursor AI --- .gitignore | 1 + roles/telemetry_chargeback/.gitignore | 1 + roles/telemetry_chargeback/README.md | 117 ++++++- roles/telemetry_chargeback/defaults/main.yml | 27 ++ .../files/gen_db_summary.py | 321 ++++++++++++++++++ .../files/gen_synth_loki_data.py | 157 +++++++-- .../files/test_dyn_basic.yml | 154 +++++++++ .../files/test_static.yml | 57 ---- .../tasks/chargeback_tests.yml | 4 +- .../telemetry_chargeback/tasks/cleanup_ck.yml | 5 + .../tasks/flush_loki_data.yml | 52 +++ .../tasks/gen_synth_loki_data.yml | 59 ++-- .../tasks/ingest_loki_data.yml | 42 +++ .../tasks/load_loki_data.yml | 12 + .../telemetry_chargeback/tasks/loki_rate.yml | 29 ++ roles/telemetry_chargeback/tasks/main.yml | 57 +++- .../tasks/retrieve_loki_data.yml | 71 ++++ .../tasks/run_test_scenarios.yml | 53 +++ .../tasks/setup_loki_env.yml | 63 ++++ roles/telemetry_chargeback/vars/main.yml | 18 +- 20 files changed, 1162 insertions(+), 138 deletions(-) create mode 100644 roles/telemetry_chargeback/.gitignore create mode 100644 roles/telemetry_chargeback/files/gen_db_summary.py create mode 100644 roles/telemetry_chargeback/files/test_dyn_basic.yml delete mode 100644 roles/telemetry_chargeback/files/test_static.yml create mode 100644 roles/telemetry_chargeback/tasks/cleanup_ck.yml create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/run_test_scenarios.yml create mode 100644 roles/telemetry_chargeback/tasks/setup_loki_env.yml diff --git a/.gitignore b/.gitignore index 44dbcd64d..53e77bcaf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc .idea/ +.ansible/ diff --git a/roles/telemetry_chargeback/.gitignore b/roles/telemetry_chargeback/.gitignore new file mode 100644 index 000000000..424bd2624 --- /dev/null +++ b/roles/telemetry_chargeback/.gitignore @@ -0,0 +1 @@ +.ansible/ diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 192b72a3d..80aad6a61 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -1,11 +1,11 @@ telemetry_chargeback -========= + The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** feature. These tests are specific to the Cloudkitty feature. Tests that are not specific to this feature (e.g., standard OpenStack deployment validation, basic networking) should be added to a common role. The role performs two main functions: 1. **CloudKitty Validation** - Enables and configures the CloudKitty hashmap rating module, then validates its state. -2. **Synthetic Data Generation** - Generates synthetic Loki log data for testing chargeback scenarios using a Python script and Jinja2 template. +2. **Synthetic Data Generation & Analysis** - Generates synthetic Loki log data for testing chargeback scenarios and calculates metric totals. The role automatically discovers and processes all scenario files matching `test_*.yml` in the `files/` directory. For each scenario it runs: generate synthetic data, compute syn-totals, ingest to Loki, flush Loki ingester memory, and get cost via CloudKitty rating summary (using begin/end from syn-totals). Retrieve-from-Loki is included in the load_loki_data flow. After all scenarios, the role runs cleanup (`cleanup_ck.yml`) to remove the local flush cert directory. Requirements ------------ @@ -15,7 +15,7 @@ It relies on the following being available on the target or control host: * The **OpenStack CLI client** must be installed and configured with administrative credentials. * Required Python libraries for the `openstack` CLI (e.g., `python3-openstackclient`). * Connectivity to the OpenStack API endpoint. -* **Python 3** with the following libraries for synthetic data generation: +* **Python 3** with the following libraries for synthetic data generation and analysis: * `PyYAML` * `Jinja2` @@ -23,6 +23,7 @@ It is expected to be run **after** a successful deployment and configuration of * **OpenStack:** A functional OpenStack cloud (RHOSO) environment. * **Cloudkitty:** The Cloudkitty service must be installed, configured, and running. +* **Loki / OpenShift (for ingest and flush):** When using ingest and flush tasks, the control host must have `oc` CLI access, and the Cloudkitty Loki stack (route, certificates, ingester) must be deployed. The role sets Loki push/query URLs and extracts certificates via `setup_loki_env.yml`. Role Variables -------------- @@ -30,33 +31,115 @@ The role uses the following variables to control the testing environment and exe ### User-Configurable Variables (defaults/main.yml) +These variables can be overridden when importing the role or set at the play level. Users can customize these based on their deployment environment and test requirements. + | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for the role. | +| `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | +| `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | +| `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | +| `local_cert_dir` | `{{ ansible_env.HOME }}/ci-framework-data/flush_certs` | Local directory for flush certs (removed by cleanup_ck.yml after the run). | +| `remote_cert_dir` | `osp-certs` | Directory inside the OpenStack pod for certs. | +| `cert_secret_name` | `cert-cloudkitty-client-internal` | OpenShift secret name for client certificates. | +| `client_secret` | `secret/cloudkitty-lokistack-gateway-client-http` | Secret for flush client certs. | +| `ca_configmap` | `cm/cloudkitty-lokistack-ca-bundle` | ConfigMap for CA bundle. | +| `logql_query` | `{service="cloudkitty"}` (overridable via `loki_query`) | LogQL query for Loki. | +| `cloudkitty_namespace` | `openstack` | OpenShift namespace for Cloudkitty/Loki resources. | +| `openstackpod` | `openstackclient` | OpenStack client pod name for exec/cp. | +| `lookback` | `6` | Days lookback for Loki query time range. | +| `limit` | `50` | Limit for Loki query results. | + +**Example: Overriding variables when importing the role** +```yaml +- name: "Run chargeback tests" + ansible.builtin.import_role: + name: telemetry_chargeback + vars: + cloudkitty_namespace: "my-custom-namespace" + lookback: 10 + cloudkitty_debug: true +``` ### Internal Variables (vars/main.yml) -These variables are used internally by the role and typically do not need to be modified. +These variables are used internally by the role and should not be modified. They use `role_path` for internal file/script references and define internal file naming conventions. | Variable | Default Value | Description | |----------|---------------|-------------| -| `logs_dir_zuul` | `/home/zuul/ci-framework-data/logs` | Remote directory for log files. | -| `artifacts_dir_zuul` | `/home/zuul/ci-framework-data/artifacts` | Directory for generated artifacts. | -| `ck_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | -| `ck_data_template` | `{{ role_path }}/template/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | -| `ck_data_config` | `{{ role_path }}/files/test_static.yml` | Path to the scenario configuration file. | -| `ck_output_file_local` | `{{ artifacts_dir_zuul }}/loki_synth_data.json` | Local path for generated synthetic data. | -| `ck_output_file_remote` | `{{ logs_dir_zuul }}/gen_loki_synth_data.log` | Remote destination for synthetic data. | +| `cloudkitty_scenario_dir` | `{{ role_path }}/files` | Directory containing scenario files (`test_*.yml`). | +| `cloudkitty_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | +| `cloudkitty_data_template` | `{{ role_path }}/templates/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | +| `cloudkitty_summary_script` | `{{ role_path }}/files/gen_db_summary.py` | Path to the summary script (gen_db_summary.py). | +| `cloudkitty_synth_data_suffix` | `-synth_data.json` | Suffix for generated synthetic data files. | +| `cloudkitty_loki_data_suffix` | `-loki_data.json` | Suffix for Loki query result JSON files. | +| `cloudkitty_synth_totals_metrics_suffix` | `-synth_metrics_summary.yml` | Suffix for generated metric totals files (from synthetic data). | +| `cloudkitty_loki_totals_metrics_suffix` | `-loki_metrics_summary.yml` | Suffix for metric totals computed from Loki-retrieved JSON (retrieve_loki_data task). | +| `cloudkitty_loki_totals_suffix` | `-rating.yml` | Suffix for CloudKitty rating summary output files (from loki_rate task). | + +**Note:** Loki push/query URLs are set dynamically in `setup_loki_env.yml` from the Cloudkitty Loki route. + +### Synthetic Data Scripts + +**gen_synth_loki_data.py** — Generates Loki-format JSON from a scenario YAML and template. The role invokes it with `-r` so that timestamps in the output are in **reverse** order (youngest first, oldest last). When run manually you can omit `-r` for chronological order (oldest first, youngest last). + +| Option | Description | +|--------|--------------| +| `--tmpl` | Path to the Jinja2 template (e.g. `loki_data_templ.j2`). | +| `-t`, `--test` | Path to the scenario YAML (e.g. `test_dyn_basic.yml`). | +| `-o`, `--output` | Path to the output JSON file. | +| `-p`, `--project-id` | Optional; overrides `groupby.project_id` in every log entry. | +| `-u`, `--user-id` | Optional; overrides `groupby.user_id` in every log entry. | +| `-r`, `--reverse` | Reverse timestamp order in JSON output (youngest first, oldest last). | +| `--debug` | Enable debug logging. | + +**gen_db_summary.py** (`cloudkitty_summary_script`) — Parses Loki-style JSON (streams or `data.result`), sorts entries by timestamp, and writes a YAML summary. This script is invoked by the role for **both** synthetic totals (in `gen_synth_loki_data.yml`) and Loki-retrieved totals (in `retrieve_loki_data.yml`). It applies rate calculations with support for `factor`, `offset`, and `mutate` transformations. + +| Option | Description | +|--------|--------------| +| `-j`, `--json` | Path to the input JSON file (required). | +| `-o`, `--output` | Path to the output YAML file (default: `_total.yml`). | +| `--debug` | Directory to write debug output (`_diff.txt` with one `[ts,log]` JSON per line). | + +Output YAML structure: + +* **time** — `begin_step` / `end_step`, each with `nanosec` (nanosecond timestamp), `begin`, `end` (ISO window strings from the log payload). The `nanosec` values are used for Loki query time range in `retrieve_loki_data.yml`. +* **data_log** — `total_timesteps`, `metrics_per_step`, `log_count`. +* **rate** — `by_types` (per-type `Rate` calculated as `Σ((qty_mutated * factor + offset) * price)`) and `total.Rating` (sum of all rates). + +### Dynamically Set Variables + +Set in **main.yml** from the OpenStack CLI (`openstack project show admin` / `openstack user show admin`): + +| Variable | Description | +|----------|-------------| +| `cloudkitty_project_id` | ID of the OpenStack project named `admin` (empty string if not found). Passed as `-p` to the synthetic data generator when non-empty. | +| `cloudkitty_user_id` | ID of the OpenStack user named `admin` (empty string if not found). Passed as `-u` to the synthetic data generator when non-empty. | + +Set in **gen_synth_loki_data.yml** for each scenario file during the loop: + +| Variable | Description | +|----------|-------------| +| `cloudkitty_data_file` | Local path for generated JSON data (`{{ artifacts_dir_zuul }}/{{ scenario_name }}-synth_data.json`) | +| `cloudkitty_synth_totals_file` | Local path for calculated metric totals (`{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_totals_suffix }}`) | +| `cloudkitty_test_file` | Path to the scenario configuration file (`{{ cloudkitty_scenario_dir }}/{{ scenario_name }}.yml`) | Scenario Configuration ---------------------- -The synthetic data generation is controlled by a YAML configuration file (`files/test_static.yml`). This file defines: +The synthetic data generation is controlled by YAML configuration files in the `files/` directory. Any file matching `test_*.yml` will be automatically discovered and processed. Files whose names start with an underscore (e.g. `_test_*.yml`) are **not** discovered by the role; they can be used as reference or for manual runs. + +Each scenario file defines: + +* **generation** — Time range configuration (days, step_seconds). +* **log_types** — List of log type definitions. Each entry has **type** (identifier and value in output), unit, description, qty, price, groupby, and metadata. The **groupby** dict typically includes dimension keys (e.g. id, user_id, project_id, tenant_id); the generator merges **date_fields** into groupby at run time. +* **required_fields** — Top-level keys required for each log type (e.g. type, unit, qty, price, groupby, metadata). +* **date_fields** — Date field names to merge into groupby (week_of_the_year, day_of_the_year, month, year). +* **loki_stream** — Loki stream configuration (service name). + +**groupby.id** should be consistent by metric type across scenario files so that the same type always uses the same id. -* **generation** - Time range configuration (days, step_seconds) -* **log_types** - List of log type definitions with name, type, unit, qty, price, groupby, and metadata -* **required_fields** - Fields required for validation -* **date_fields** - Date fields to add to groupby (week_of_the_year, day_of_the_year, month, year) -* **loki_stream** - Loki stream configuration (service name) +Scenario files matching `test_*.yml` in the `files/` directory are automatically discovered and processed. Files whose names start with an underscore are not auto-discovered. Dependencies ------------ diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..9cc04c8c7 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,29 @@ --- +# OpenStack CLI command openstack_cmd: "openstack" + +# Debug mode +cloudkitty_debug: false + +# Directory paths +logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" +artifacts_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/artifacts" +cert_dir: "{{ ansible_user_dir }}/ck-certs" +local_cert_dir: "{{ ansible_env.HOME }}/ci-framework-data/flush_certs" +remote_cert_dir: "osp-certs" + +# Cloudkitty certificates and secrets +cert_secret_name: "cert-cloudkitty-client-internal" +client_secret: "secret/cloudkitty-lokistack-gateway-client-http" +ca_configmap: "cm/cloudkitty-lokistack-ca-bundle" + +# LogQL Query +logql_query: "{{ loki_query | default('{service=\"cloudkitty\"}') }}" + +# OpenShift/Kubernetes settings +cloudkitty_namespace: "openstack" +openstackpod: "openstackclient" + +# Time window settings +lookback: 6 +limit: 50 diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py new file mode 100644 index 000000000..9234a64df --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +""" +Parse Loki JSON (or text) into [timestep, log_entry] pairs, then emit a YAML +summary: time, data_log, and rate (per-type Σ(qty×price) and total Rating). + +Same CLI as gen_synth_loki_metrics_totals.py (-j, -o, --debug). +""" +from __future__ import annotations + +import argparse +import json +import math +import sys +from collections import Counter, defaultdict +from pathlib import Path +from typing import Any, Optional + +import yaml + +REQUIRED_KEYS = frozenset( + {"start", "end", "type", "unit", "qty", "price", "groupby"} +) + + +def _valid_ts(s: str) -> bool: + return isinstance(s, str) and s.isdigit() and len(s) >= 19 + + +def _valid_entry(obj: dict) -> bool: + return REQUIRED_KEYS.issubset(obj.keys()) + + +def _try_pair(ts_str: str, log_str: str) -> Optional[tuple[str, str]]: + if not _valid_ts(ts_str) or not isinstance(log_str, str): + return None + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + return None + if isinstance(entry, dict) and _valid_entry(entry): + return (ts_str, log_str) + return None + + +def _extract_from_loki_json(data: dict) -> list[tuple[str, str]]: + streams = data.get("streams") + if streams is None: + streams = data.get("data", {}).get("result", []) + if not isinstance(streams, list): + return [] + pairs: list[tuple[str, str]] = [] + for stream in streams: + for val in stream.get("values", []): + if not isinstance(val, (list, tuple)) or len(val) < 2: + continue + p = _try_pair(val[0], val[1]) + if p: + pairs.append(p) + return pairs + + +def extract_and_sort(json_path: Path) -> list[tuple[str, str]]: + """ + Load JSON from json_path, extract [timestep, log_entry] pairs, + and return them sorted by timestep (ascending). + """ + raw = json_path.read_text(encoding="utf-8", errors="replace") + + # Parse as JSON (fail if invalid) + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + print( + f"Error: Invalid JSON in {json_path}: {e}", + file=sys.stderr + ) + sys.exit(1) + + # Extract from known Loki JSON structures + if not isinstance(data, dict): + print( + f"Error: Expected JSON object, got {type(data).__name__} in {json_path}", + file=sys.stderr + ) + sys.exit(1) + + pairs = _extract_from_loki_json(data) + + if not pairs: + print( + f"Error: No valid log entries found in {json_path}. " + "Expected structure: {{'streams': [...]}} or " + "{{'data': {{'result': [...]}}}}'", + file=sys.stderr + ) + sys.exit(1) + + pairs.sort(key=lambda p: int(p[0])) + return pairs + + +def _apply_mutate(qty: float, mutate: str) -> float: + """ + Apply mutate transformation to qty value. + + Args: + qty: The quantity value to transform. + mutate: The mutation type (NONE, CEIL, FLOOR, NUMBOOL, NOTNUMBOOL). + + Returns: + The transformed quantity. + """ + mutate_upper = mutate.upper() if isinstance(mutate, str) else "NONE" + + if mutate_upper == "CEIL": + return math.ceil(qty) + elif mutate_upper == "FLOOR": + return math.floor(qty) + elif mutate_upper == "NUMBOOL": + # If qty equals 0, leave it at 0. Else, set it to 1. + return 0.0 if qty == 0 else 1.0 + elif mutate_upper == "NOTNUMBOOL": + # If qty equals 0, set it to 1. Else, set it to 0. + return 1.0 if qty == 0 else 0.0 + else: # NONE or any unrecognized value + return qty + + +def _parse_numeric(value: Any, default: float = 0) -> float: + """ + Parse a numeric value, supporting fractions like '1/1048576'. + + This function handles the 'factor' field in scenario YAML files which uses + fraction notation (e.g., '1/1048576' to convert bytes to MiB) to match + CloudKitty/chargeback documentation standards. Without this parser, fraction + strings would cause ValueError when passed to float(), silently dropping + metrics from the output summary. + + Args: + value: The value to parse (can be number, string, or fraction string) + default: Default value if parsing fails + + Returns: + Parsed float value + """ + if value is None: + return default + + # If it's already a number, convert directly + if isinstance(value, (int, float)): + return float(value) + + # If it's a string, check for fraction notation (e.g., "1/1048576") + if isinstance(value, str): + value = value.strip() + if '/' in value: + try: + parts = value.split('/') + if len(parts) == 2: + numerator = float(parts[0].strip()) + denominator = float(parts[1].strip()) + if denominator != 0: + return numerator / denominator + except (ValueError, ZeroDivisionError): + pass + # Try direct conversion + try: + return float(value) + except ValueError: + pass + + return default + + +def aggregate_rates_by_type( + pairs: list[tuple[str, str]], +) -> tuple[dict, float]: + sums: defaultdict[str, float] = defaultdict(float) + for _, log_str in pairs: + try: + entry = json.loads(log_str) + except json.JSONDecodeError: + continue + if not isinstance(entry, dict): + continue + mtype = entry.get("type") + if not isinstance(mtype, str) or not mtype: + mtype = "unknown" + try: + qty = _parse_numeric(entry.get("qty"), 0) + price = _parse_numeric(entry.get("price"), 0) + factor = _parse_numeric(entry.get("factor"), 1) + offset = _parse_numeric(entry.get("offset"), 0) + mutate = entry.get("mutate", "NONE") + except (TypeError, ValueError): + continue + + # Apply mutate transformation + qty_mutated = _apply_mutate(qty, mutate) + + # Apply factor and offset + qty_rate = qty_mutated * factor + offset + + # Calculate rate + sums[mtype] += qty_rate * price + by_types = {k: {"Rate": round(v, 4)} for k, v in sorted(sums.items())} + total = sum(sums.values()) + return by_types, total + + +def build_summary(pairs: list[tuple[str, str]]) -> dict[str, Any]: + log_count = len(pairs) + per_ts = Counter(ts for ts, _ in pairs) + n_ts = len(per_ts) + counts = list(per_ts.values()) + mps: Any = counts[0] if counts else 0 + if counts and len(set(counts)) > 1: + mps = "ERROR" + + if pairs: + first = json.loads(pairs[0][1]) + last = json.loads(pairs[-1][1]) + time_block = { + "begin_step": { + "nanosec": int(pairs[0][0]), + "begin": first.get("start"), + "end": first.get("end"), + }, + "end_step": { + "nanosec": int(pairs[-1][0]), + "begin": last.get("start"), + "end": last.get("end"), + }, + } + else: + empty = {"nanosec": None, "begin": None, "end": None} + time_block = {"begin_step": empty.copy(), "end_step": empty.copy()} + + by_types, total_r = aggregate_rates_by_type(pairs) + return { + "time": time_block, + "data_log": { + "total_timesteps": n_ts, + "metrics_per_step": mps, + "log_count": log_count, + }, + "rate": { + "by_types": by_types, + "total": {"Rating": round(total_r, 4)}, + }, + } + + +def write_yaml(path: Path, doc: dict[str, Any]) -> None: + with path.open("w", encoding="utf-8") as f: + f.write("---\n") + yaml.dump( + doc, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Summarize Loki JSON log entries to YAML (time, data_log, rate)." + ), + ) + parser.add_argument( + "-j", "--json", required=True, type=Path, help="Input JSON.", + ) + parser.add_argument( + "-o", + "--output", + type=Path, + default=None, + help="Output YAML (default: _total.yml).", + ) + parser.add_argument( + "--debug", + type=Path, + default=None, + metavar="DIR", + help=( + "If set, write _diff.txt with one [ts,log] JSON per line." + ), + ) + args = parser.parse_args() + + if not args.json.exists(): + print(f"Error: input file not found: {args.json}", file=sys.stderr) + sys.exit(1) + + stem = args.json.stem + out_path = args.output or (args.json.parent / f"{stem}_total.yml") + pairs = extract_and_sort(args.json) + + dbg = str(args.debug).strip() if args.debug is not None else "" + if dbg and dbg != ".": + args.debug.mkdir(parents=True, exist_ok=True) + dbg_file = args.debug / f"{args.json.stem}_diff.txt" + with dbg_file.open("w", encoding="utf-8") as f: + for ts, log_str in pairs: + print(json.dumps([ts, log_str], ensure_ascii=False), file=f) + + doc = build_summary(pairs) + write_yaml(out_path, doc) + + if doc["data_log"]["metrics_per_step"] == "ERROR": + per_ts = Counter(ts for ts, _ in pairs) + exp = next(iter(per_ts.values()), 0) + for ts in sorted(per_ts, key=int): + if per_ts[ts] != exp: + print(ts, per_ts[ts], file=sys.stdout) + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index f05796e29..263554dc6 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -2,13 +2,48 @@ import logging import argparse import json +import sys import yaml from datetime import datetime, timezone, timedelta from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List, Union from jinja2 import Environment +def _get_value_for_step( + values: List[Union[int, float]], + step_idx: int, + num_steps: int +) -> Union[int, float]: + """ + Get the appropriate value from a list based on the current step index. + + Values are distributed evenly across all steps. For example, if there are + 12 steps and 4 values, each value covers 3 steps: + - Steps 0-2: values[0] + - Steps 3-5: values[1] + - Steps 6-8: values[2] + - Steps 9-11: values[3] + + Args: + values: List of values to choose from. + step_idx: Current step index (0-based). + num_steps: Total number of steps. + + Returns: + The value corresponding to the current step. + """ + num_values = len(values) + if num_values == 1: + return values[0] + + # Calculate how many steps each value covers + steps_per_value = num_steps / num_values + # Determine which value index to use, clamping to valid range + value_idx = min(int(step_idx // steps_per_value), num_values - 1) + return values[value_idx] + + # --- Configure logging with a default level that can be changed --- logging.basicConfig( level=logging.INFO, @@ -73,7 +108,10 @@ def generate_loki_data( start_time: datetime, end_time: datetime, time_step_seconds: int, - config: Dict[str, Any] + config: Dict[str, Any], + project: Union[str, int, None] = None, + user: Union[str, int, None] = None, + reverse_timestamps: bool = False, ): """ Generate synthetic Loki log data by preparing a data list and rendering. @@ -85,6 +123,12 @@ def generate_loki_data( end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. config (Dict[str, Any]): Configuration dictionary loaded from file. + project: Optional value to inject as groupby.project in every + log entry in the output (overrides test_* file value when set). + user: Optional value to inject as groupby.user in every + log entry in the output (overrides test_* file value when set). + reverse_timestamps (bool): If True, reverse the order of timestamps + in the JSON output (newest first, oldest last). """ # Hardcoded constant for invalid timestamps invalid_timestamp = "INVALID_TIMESTAMP" @@ -175,37 +219,49 @@ def generate_loki_data( logger.error(f"Invalid log type configuration: {log_type_config}") raise ValueError("Each log type in log_types must be a dictionary") - log_type_name = log_type_config.get("name") - if not log_type_name: - logger.error("Each log type must have a 'name' field") - raise ValueError("Each log type must have a 'name' field") + # "type" is log-type identifier (dict key) and output value + type_key = log_type_config.get("type") + if not type_key: + logger.error("Each log type must have a 'type' field") + raise ValueError("Each log type must have a 'type' field") # Validate required fields - missing = [f for f in required_fields if f not in log_type_config] + # metadata is optional for generation; name is not a log-type field + required_for_item = [ + f for f in required_fields + if f not in ("name", "metadata") + ] + missing = [f for f in required_for_item if f not in log_type_config] if missing: logger.error( - f"Missing required fields in {log_type_name} config: {missing}" + f"Missing required fields in {type_key!r} config: {missing}" ) raise ValueError( - f"Missing required fields in {log_type_name}: {missing}" + f"Missing required fields in {type_key!r}: {missing}" ) # Build groupby from config groupby = log_type_config.get("groupby", {}) if not isinstance(groupby, dict): logger.error( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) raise ValueError( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) - log_types[log_type_name] = { - "type": log_type_config["type"], + # Ensure qty and price are lists for step-based distribution + qty_val = log_type_config["qty"] + price_val = log_type_config["price"] + qty_list = qty_val if isinstance(qty_val, list) else [qty_val] + price_list = price_val if isinstance(price_val, list) else [price_val] + + log_types[type_key] = { + "type": type_key, "unit": log_type_config["unit"], "description": log_type_config.get("description"), - "qty": log_type_config["qty"], - "price": log_type_config["price"], + "qty": qty_list, + "price": price_list, "groupby": groupby.copy(), "metadata": log_type_config.get("metadata", {}) } @@ -231,15 +287,21 @@ def tojson_preserve_order(obj): # --- Render the template in one pass with all the data --- logger.info("Rendering final output...") + if reverse_timestamps: + log_data_list.reverse() + logger.debug( + "Reversed timestamp order (newest first, oldest last)." + ) + + # Calculate total number of steps for value distribution + num_steps = len(log_data_list) + logger.debug(f"Total number of time steps: {num_steps}") + # Pre-calculate log types with date fields for each time step log_types_list = [] for idx, item in enumerate(log_data_list): - # For the last entry, use end_time to ensure it shows today's date - if idx == len(log_data_list) - 1: - dt = end_time - else: - epoch_seconds = item["nanoseconds"] / 1_000_000_000 - dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) iso_year, iso_week, _ = dt.isocalendar() day_of_year = dt.timetuple().tm_yday @@ -267,6 +329,17 @@ def tojson_preserve_order(obj): log_type_with_dates = log_type_data.copy() log_type_with_dates["groupby"] = log_type_data["groupby"].copy() log_type_with_dates["groupby"].update(date_fields) + if project is not None: + log_type_with_dates["groupby"]["project"] = project + if user is not None: + log_type_with_dates["groupby"]["user"] = user + # Select qty and price based on step index distribution + log_type_with_dates["qty"] = _get_value_for_step( + log_type_data["qty"], idx, num_steps + ) + log_type_with_dates["price"] = _get_value_for_step( + log_type_data["price"], idx, num_steps + ) log_types_with_dates[log_type_name] = log_type_with_dates log_types_list.append(log_types_with_dates) @@ -296,8 +369,19 @@ def tojson_preserve_order(obj): ) except IOError as e: logger.error(f"Failed to write to output file '{output_path}': {e}") - except Exception as e: - logger.error(f"An unexpected error occurred during file write: {e}") + raise + + # --- Step 5: Validate that the output is valid JSON --- + try: + with output_path.open('r') as f_in: + json.load(f_in) + logger.info("Output file validated as valid JSON.") + except json.JSONDecodeError as e: + logger.error( + f"Output file is not valid JSON: {e}. " + f"Delete '{output_path}' and fix the template or data." + ) + sys.exit(1) def main(): @@ -324,8 +408,30 @@ def main(): required=True, help="Path to the output file." ) + parser.add_argument( + "-p", "--project-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.project in " + "every log entry in the output (overrides value from test file)." + ) + parser.add_argument( + "-u", "--user-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.user in " + "every log entry in the output (overrides value from test file)." + ) # --- Optional Utility Arguments --- + parser.add_argument( + "-r", "--reverse", + action="store_true", + help="Reverse timestamp order in JSON output: newest first, " + "oldest last (default is oldest first, newest last)." + ) parser.add_argument( "--debug", action="store_true", @@ -362,7 +468,10 @@ def main(): start_time=start_time_utc, end_time=end_time_utc, time_step_seconds=step_seconds, - config=config + config=config, + project=args.project_id, + user=args.user_id, + reverse_timestamps=args.reverse, ) except FileNotFoundError: logger.error( diff --git a/roles/telemetry_chargeback/files/test_dyn_basic.yml b/roles/telemetry_chargeback/files/test_dyn_basic.yml new file mode 100644 index 000000000..cfe7adb18 --- /dev/null +++ b/roles/telemetry_chargeback/files/test_dyn_basic.yml @@ -0,0 +1,154 @@ +--- +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 1 + step_seconds: 14400 + +# Log type definitions (single "type" = identifier and value pushed to output) +log_types: + - type: ceilometer_image_size + description: "Size of ceilometer image" + unit: MiB + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_image_test + description: "Size of ceilometer test" + unit: B +# factor: 1 + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + container_format: bare + disk_format: qcow2 + + - type: ceilometer_cpu + description: "max number of cpus used in time step" + unit: instance + alt_name: instance + qty: + - 1 + price: + - 5.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + flavor_name: null + flavor_id: null + mutate: NUMBOOL + + - type: ceilometer_ip_floating + description: null + unit: ip + qty: + - 5 + price: + - 1.00 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + state: null + mutate: NUMBOOL + + - type: ceilometer_disk_ephemeral_size + description: "Max at each timestep" + unit: GiB + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + type: null + + - type: ceilometer_disk_root_size + description: null + unit: GiB + qty: + - 10000 + price: + - 0.10 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + type: null + + - type: ceilometer_network_outgoing_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-01 + metadata: + vm_instance: null + + - type: ceilometer_network_incoming_bytes + description: null + unit: B + qty: + - 0.0 + price: + - 0.0 + groupby: + resource: null + user: null + project: null + tenant: tenant-02 + metadata: + vm_instance: null + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml deleted file mode 100644 index f94a3c1d2..000000000 --- a/roles/telemetry_chargeback/files/test_static.yml +++ /dev/null @@ -1,57 +0,0 @@ -# Scenario configuration for synthetic Loki log data generation - -# Time range configuration -generation: - days: 1 - step_seconds: 7200 - -# Log type definitions -log_types: - - name: ceilometer_image_size - type: ceilometer_image_size - unit: MiB - description: null - qty: 20.6 - price: 0.02 - groupby: - id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 - project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 - user_id: null - metadata: - container_format: bare - disk_format: qcow2 - - - name: instance - type: instance - unit: instance - description: null - qty: 1.0 - price: 0.3 - groupby: - id: de168c31-ed44-4a1a-a079-51bd238a91d6 - project_id: 9cf5bcfc61a24682acc448af2d062ad2 - user_id: c29ab6e886354bbd88ee9899e62d1d40 - metadata: - flavor_name: m1.tiny - flavor_id: "1" - vcpus: "" - -# Required fields for validation (top-level fields only, not nested in groupby) -required_fields: - - type - - unit - - qty - - price - - groupby - - metadata - -# Date field names to add to groupby -date_fields: - - week_of_the_year - - day_of_the_year - - month - - year - -# Loki stream configuration -loki_stream: - service: cloudkitty diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index 8519d7891..99ddcc44e 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -8,7 +8,9 @@ - name: "Find the current value of hashmap" ansible.builtin.shell: - cmd: "{{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + cmd: "set -o pipefail && {{ openstack_cmd }} rating module get hashmap -c Priority -f csv | tail -n +2" + args: + executable: /bin/bash register: get_hashmap_priority changed_when: false diff --git a/roles/telemetry_chargeback/tasks/cleanup_ck.yml b/roles/telemetry_chargeback/tasks/cleanup_ck.yml new file mode 100644 index 000000000..01407d155 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/cleanup_ck.yml @@ -0,0 +1,5 @@ +--- +- name: "Cleanup local certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: absent diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index e37b54c6b..ec80ca3cc 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -1,39 +1,40 @@ --- -- name: Check for preexisting output file +- name: "Set variables dynamically for {{ item }}" + ansible.builtin.set_fact: + cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ item }}.yml" + +- name: "Check for preexisting output file" ansible.builtin.stat: - path: "{{ ck_output_file_local }}" + path: "{{ cloudkitty_data_file }}" register: file_preexists -- name: TEST Generate Synthetic Data +- name: "Generate Synthetic Data for {{ item }}" ansible.builtin.command: cmd: > - python3 "{{ ck_synth_script }}" - --tmpl "{{ ck_data_template }}" - -t "{{ ck_data_config }}" - -o "{{ ck_output_file_local }}" + python3 "{{ cloudkitty_synth_script }}" + -r + --tmpl "{{ cloudkitty_data_template }}" + -t "{{ cloudkitty_test_file }}" + -o "{{ cloudkitty_data_file }}" + {% if cloudkitty_project_id is defined and cloudkitty_project_id %} -p "{{ cloudkitty_project_id }}"{% endif %} register: script_output - when: not file_preexists.stat.exists | bool + when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 -- name: Read the content of the file - ansible.builtin.slurp: - src: "{{ ck_output_file_local }}" - register: slurped_file - -- name: TEST Validate JSON format of synthetic data file - ansible.builtin.assert: - that: - # This filter will trigger a task failure if the string isn't valid JSON - - slurped_file.content | b64decode | from_json is defined - fail_msg: "The file does not contain valid JSON format." - success_msg: "JSON format validated successfully." - -- name: Print output_file_remote path - ansible.builtin.debug: - msg: "Synthetic data file: {{ ck_output_file_remote }}" +- name: "Generate chargeback rating from synthetic data file {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ cloudkitty_data_file }}" + -o "{{ cloudkitty_synth_totals_file }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + when: not file_preexists.stat.exists | bool + changed_when: synth_rating_info.rc == 0 -- name: Copy output file to remote host - ansible.builtin.copy: - src: "{{ ck_output_file_local }}" - dest: "{{ ck_output_file_remote }}" - mode: '0644' +- name: "Load metrics from YAML file" + ansible.builtin.include_vars: + file: "{{ cloudkitty_synth_totals_file }}" + name: synth_data_rates diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..a53751f3f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..a2a1e129f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,12 @@ +--- +- name: "Ingest CloudKitty data log for {{ item }}" + ansible.builtin.include_tasks: + file: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ item }}" + ansible.builtin.include_tasks: + file: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ item }}" + ansible.builtin.include_tasks: + file: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml new file mode 100644 index 000000000..b9cbd9843 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -0,0 +1,29 @@ +--- +- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" + register: cost_totals_by_type + changed_when: false + failed_when: cost_totals_by_type.rc != 0 + +- name: "**INFO** Print the rating by type {{ item }}" + ansible.builtin.debug: + var: cost_totals_by_type.stdout + +- name: "Output saved as yaml {{ item }}" + ansible.builtin.copy: + content: | + "{{ cost_totals_by_type.stdout }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" + mode: '0644' + +- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" + register: cost_totals_summary + changed_when: false + failed_when: cost_totals_summary.rc != 0 + +- name: "**INFO** Print the rating summary {{ item }}" + ansible.builtin.debug: + var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 98a94b233..e2f264834 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,6 +1,57 @@ --- -- name: "Validate Chargeback Feature" +- name: "Validate Chargeback Feature deployed correctly" ansible.builtin.include_tasks: "chargeback_tests.yml" -- name: "Generate Synthetic Data" - ansible.builtin.include_tasks: "gen_synth_loki_data.yml" +- name: "Setup Loki Environment" + ansible.builtin.include_tasks: "setup_loki_env.yml" + +- name: "CloudKitty debug ON/OFF" + ansible.builtin.set_fact: + cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" + +- name: "Get admin project ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} project show admin -f value -c id" + register: get_admin_project_id + changed_when: false + failed_when: false + +- name: "Set admin project ID for CI" + ansible.builtin.set_fact: + cloudkitty_project_id: "{{ (get_admin_project_id.stdout | trim) | default('') }}" + +- name: "Get admin user ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} user show admin -f value -c id" + register: get_admin_user_id + changed_when: false + failed_when: false + +- name: "Set admin user ID for CI" + ansible.builtin.set_fact: + cloudkitty_user_id: "{{ (get_admin_user_id.stdout | trim) | default('') }}" + +- name: "Find test files" + ansible.builtin.find: + paths: "{{ cloudkitty_scenario_dir }}" + patterns: "test_*.yml" + register: found_files_raw + +- name: "Extract only the filenames into a clean list" + ansible.builtin.set_fact: + found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" + +- name: "Run scenario file through workflow" + block: + - name: "Process and Loop if files exist" + ansible.builtin.include_tasks: run_test_scenarios.yml + loop: "{{ found_files }}" + when: found_files | length > 0 + + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml + + rescue: + - name: "Log failure" + ansible.builtin.debug: + msg: "Running test scenarios loop failed." diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..2f130e711 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ item }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ item }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ item }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml new file mode 100644 index 000000000..5addb4a22 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -0,0 +1,53 @@ +--- +- name: "Generate Synthetic Data for each file: {{ item }}" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ item }}" + ansible.builtin.include_tasks: "load_loki_data.yml" + +- name: "Get total rate from Loki: {{ item }}" + ansible.builtin.include_tasks: "loki_rate.yml" + +#### diff uploaded data totals vs download data totals +- name: "Check synthetic totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + register: synth_totals_stat + +- name: "Check Loki totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + register: loki_totals_stat + +- name: "TEST Totals files exist {{ item }}" + ansible.builtin.assert: + that: + - synth_totals_stat.stat.exists | default(false) + - loki_totals_stat.stat.exists | default(false) + fail_msg: | + FAILED! Required file(s) missing for scenario {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + success_msg: | + PASSED! Required file(s) exist {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + +- name: "Diff synthetic totals vs Loki totals {{ item }}" + ansible.builtin.command: + cmd: > + diff + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + register: yaml_diff + failed_when: false + changed_when: false + +- name: "TEST Compare synthetic data vs Loki data results {{ item }}" + ansible.builtin.assert: + that: + - yaml_diff.rc == 0 + fail_msg: | + FAILED! {{ item }} - Files differ: + {{ yaml_diff.stdout }} + success_msg: "PASSED! {{ item }} - Data totals are identical." diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml new file mode 100644 index 000000000..d0388913c --- /dev/null +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -0,0 +1,63 @@ +--- +# Setup Loki Environment + +# Dynamic URL's +- name: "Get Loki Public Route Host" + ansible.builtin.command: + cmd: | + oc get route cloudkitty-lokistack -n {{ cloudkitty_namespace }} -o "jsonpath={.spec.host}" + register: loki_route + changed_when: false + +- name: "Set Loki URLs" + ansible.builtin.set_fact: + # Base URL + loki_base_url: "https://{{ loki_route.stdout }}" + + # Internal Flush URL (Service DNS: https://..svc:3100/flush) + ingester_flush_url: "https://cloudkitty-lokistack-ingester-http.{{ cloudkitty_namespace }}.svc:3100/flush" + +- name: "Set Derived Loki URLs" + ansible.builtin.set_fact: + loki_push_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/push" + loki_query_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/query_range" + +- name: "Debug URLs" + ansible.builtin.debug: + msg: + - "Loki Route: {{ loki_base_url }}" + - "Push URL: {{ loki_push_url }}" + - "Flush URL: {{ ingester_flush_url }}" + - "Query URL: {{ loki_query_url }}" + +# Certs to Ingest & Retrieve data to/from Loki +- name: "Ensure Local Certificate Directory Exists" + ansible.builtin.file: + path: "{{ cert_dir }}" + state: directory + mode: '0755' + +- name: "Extract Certificates from OpenShift Secret" + ansible.builtin.command: + cmd: | + oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +# Certs to Flush data to Loki +# - name: Create a directory to extract certificates +# ansible.builtin.file: +# path: "{{ local_cert_dir }}" +# state: directory +# mode: '0755' + +- name: "Extract Client Certificates" + ansible.builtin.command: + cmd: | + oc extract {{ client_secret }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract CA Bundle" + ansible.builtin.command: + cmd: | + oc extract {{ ca_configmap }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 178154d89..27010b5dc 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -1,9 +1,13 @@ --- -logs_dir_zuul: "/home/zuul/ci-framework-data/logs" -artifacts_dir_zuul: "/home/zuul/ci-framework-data/artifacts" +# Scenario and script paths (using role_path) +cloudkitty_scenario_dir: "{{ role_path }}/files" +cloudkitty_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +cloudkitty_data_template: "{{ role_path }}/templates/loki_data_templ.j2" +cloudkitty_summary_script: "{{ role_path }}/files/gen_db_summary.py" -ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" -ck_data_template: "{{ role_path }}/templates/loki_data_templ.j2" -ck_data_config: "{{ role_path }}/files/test_static.yml" -ck_output_file_local: "{{ artifacts_dir_zuul }}/loki_synth_data.json" -ck_output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" +# File naming conventions (internal standardization) +cloudkitty_synth_data_suffix: "-synth_data.json" +cloudkitty_loki_data_suffix: "-loki_data.json" +cloudkitty_synth_totals_metrics_suffix: "-synth_metrics_summary.yml" +cloudkitty_loki_totals_metrics_suffix: "-loki_metrics_summary.yml" +cloudkitty_loki_totals_suffix: "-rating.yml" From 99b320f121c4156164ae973158232b3bc1e910ea Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 16 Apr 2026 12:20:36 -0400 Subject: [PATCH 08/15] Review Changes --- roles/telemetry_chargeback/README.md | 1 - roles/telemetry_chargeback/files/gen_db_summary.py | 9 +++++---- roles/telemetry_chargeback/tasks/loki_rate.yml | 3 +-- roles/telemetry_chargeback/tasks/setup_loki_env.yml | 7 ------- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 80aad6a61..dfdfa9052 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -36,7 +36,6 @@ These variables can be overridden when importing the role or set at the play lev | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | -| `cloudkitty_debug` | `false` | Enable debug mode for the role. | | `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | | `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | | `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py index 9234a64df..e14ab98d8 100644 --- a/roles/telemetry_chargeback/files/gen_db_summary.py +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -79,7 +79,8 @@ def extract_and_sort(json_path: Path) -> list[tuple[str, str]]: # Extract from known Loki JSON structures if not isinstance(data, dict): print( - f"Error: Expected JSON object, got {type(data).__name__} in {json_path}", + f"Error: Expected JSON object, got {type(data).__name__} " + f"in {json_path}", file=sys.stderr ) sys.exit(1) @@ -132,9 +133,9 @@ def _parse_numeric(value: Any, default: float = 0) -> float: This function handles the 'factor' field in scenario YAML files which uses fraction notation (e.g., '1/1048576' to convert bytes to MiB) to match - CloudKitty/chargeback documentation standards. Without this parser, fraction - strings would cause ValueError when passed to float(), silently dropping - metrics from the output summary. + CloudKitty/chargeback documentation standards. Without this parser, + fraction strings would cause ValueError when passed to float(), silently + dropping metrics from the output summary. Args: value: The value to parse (can be number, string, or fraction string) diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml index b9cbd9843..822585336 100644 --- a/roles/telemetry_chargeback/tasks/loki_rate.yml +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -12,8 +12,7 @@ - name: "Output saved as yaml {{ item }}" ansible.builtin.copy: - content: | - "{{ cost_totals_by_type.stdout }}" + content: "{{ cost_totals_by_type.stdout }}" dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" mode: '0644' diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml index d0388913c..e4a80250f 100644 --- a/roles/telemetry_chargeback/tasks/setup_loki_env.yml +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -43,13 +43,6 @@ oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} changed_when: true -# Certs to Flush data to Loki -# - name: Create a directory to extract certificates -# ansible.builtin.file: -# path: "{{ local_cert_dir }}" -# state: directory -# mode: '0755' - - name: "Extract Client Certificates" ansible.builtin.command: cmd: | From 6c10d0456445f169884c6bf1ec4de19a3f627a4e Mon Sep 17 00:00:00 2001 From: ayefimov Date: Mon, 20 Apr 2026 09:23:26 -0400 Subject: [PATCH 09/15] Removing internal variables from README --- roles/telemetry_chargeback/README.md | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index dfdfa9052..954ea622e 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -61,24 +61,6 @@ These variables can be overridden when importing the role or set at the play lev cloudkitty_debug: true ``` -### Internal Variables (vars/main.yml) - -These variables are used internally by the role and should not be modified. They use `role_path` for internal file/script references and define internal file naming conventions. - -| Variable | Default Value | Description | -|----------|---------------|-------------| -| `cloudkitty_scenario_dir` | `{{ role_path }}/files` | Directory containing scenario files (`test_*.yml`). | -| `cloudkitty_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | -| `cloudkitty_data_template` | `{{ role_path }}/templates/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | -| `cloudkitty_summary_script` | `{{ role_path }}/files/gen_db_summary.py` | Path to the summary script (gen_db_summary.py). | -| `cloudkitty_synth_data_suffix` | `-synth_data.json` | Suffix for generated synthetic data files. | -| `cloudkitty_loki_data_suffix` | `-loki_data.json` | Suffix for Loki query result JSON files. | -| `cloudkitty_synth_totals_metrics_suffix` | `-synth_metrics_summary.yml` | Suffix for generated metric totals files (from synthetic data). | -| `cloudkitty_loki_totals_metrics_suffix` | `-loki_metrics_summary.yml` | Suffix for metric totals computed from Loki-retrieved JSON (retrieve_loki_data task). | -| `cloudkitty_loki_totals_suffix` | `-rating.yml` | Suffix for CloudKitty rating summary output files (from loki_rate task). | - -**Note:** Loki push/query URLs are set dynamically in `setup_loki_env.yml` from the Cloudkitty Loki route. - ### Synthetic Data Scripts **gen_synth_loki_data.py** — Generates Loki-format JSON from a scenario YAML and template. The role invokes it with `-r` so that timestamps in the output are in **reverse** order (youngest first, oldest last). When run manually you can omit `-r` for chronological order (oldest first, youngest last). @@ -93,7 +75,7 @@ These variables are used internally by the role and should not be modified. They | `-r`, `--reverse` | Reverse timestamp order in JSON output (youngest first, oldest last). | | `--debug` | Enable debug logging. | -**gen_db_summary.py** (`cloudkitty_summary_script`) — Parses Loki-style JSON (streams or `data.result`), sorts entries by timestamp, and writes a YAML summary. This script is invoked by the role for **both** synthetic totals (in `gen_synth_loki_data.yml`) and Loki-retrieved totals (in `retrieve_loki_data.yml`). It applies rate calculations with support for `factor`, `offset`, and `mutate` transformations. +**gen_db_summary.py** — Parses Loki-style JSON (streams or `data.result`), sorts entries by timestamp, and writes a YAML summary. This script is invoked by the role for **both** synthetic totals (in `gen_synth_loki_data.yml`) and Loki-retrieved totals (in `retrieve_loki_data.yml`). It applies rate calculations with support for `factor`, `offset`, and `mutate` transformations. | Option | Description | |--------|--------------| @@ -121,8 +103,8 @@ Set in **gen_synth_loki_data.yml** for each scenario file during the loop: | Variable | Description | |----------|-------------| | `cloudkitty_data_file` | Local path for generated JSON data (`{{ artifacts_dir_zuul }}/{{ scenario_name }}-synth_data.json`) | -| `cloudkitty_synth_totals_file` | Local path for calculated metric totals (`{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_totals_suffix }}`) | -| `cloudkitty_test_file` | Path to the scenario configuration file (`{{ cloudkitty_scenario_dir }}/{{ scenario_name }}.yml`) | +| `cloudkitty_synth_totals_file` | Local path for calculated metric totals (`{{ artifacts_dir_zuul }}/{{ scenario_name }}-synth_metrics_summary.yml`) | +| `cloudkitty_test_file` | Path to the scenario configuration file (`{{ role_path }}/files/{{ scenario_name }}.yml`) | Scenario Configuration ---------------------- From d5a3ec6c48822abb4efafb3b02586d2cf9d11c63 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 3 Feb 2026 15:18:40 -0500 Subject: [PATCH 10/15] Validates chargeback data is generated and then push and retriieved from loki - uses synth data to calculate total cost via script - run "openstack rating summary get" to get total cost from loki - compares script_totals and Loki_Totals if same then job passes - Used Gemini and Cursor AI --- roles/telemetry_chargeback/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 954ea622e..fde62542e 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -36,6 +36,7 @@ These variables can be overridden when importing the role or set at the play lev | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for the role. | | `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | | `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | | `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | From da2f3df76eaa2beee8543cdd7f27105672c323d4 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 16 Apr 2026 12:20:36 -0400 Subject: [PATCH 11/15] Review Changes --- roles/telemetry_chargeback/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index fde62542e..954ea622e 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -36,7 +36,6 @@ These variables can be overridden when importing the role or set at the play lev | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | -| `cloudkitty_debug` | `false` | Enable debug mode for the role. | | `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | | `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | | `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | From 1d35938a4c30eff952d27cf614dd097820fc62d1 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Fri, 17 Apr 2026 16:29:11 +0100 Subject: [PATCH 12/15] Delete roles/telemetry_chargeback/.gitignore (#353) * Validates chargeback data is generated and then push and retriieved from loki - uses synth data to calculate total cost via script - run "openstack rating summary get" to get total cost from loki - compares script_totals and Loki_Totals if same then job passes - Used Gemini and Cursor AI * Review Changes * Delete roles/telemetry_chargeback/.gitignore --------- Co-authored-by: ayefimov From 8ed2835059f6ca866a378918b8eda6bbfc2e6cf7 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 3 Feb 2026 15:18:40 -0500 Subject: [PATCH 13/15] Validates chargeback data is generated and then push and retriieved from loki - uses synth data to calculate total cost via script - run "openstack rating summary get" to get total cost from loki - compares script_totals and Loki_Totals if same then job passes - Used Gemini and Cursor AI --- roles/telemetry_chargeback/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 954ea622e..fde62542e 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -36,6 +36,7 @@ These variables can be overridden when importing the role or set at the play lev | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for the role. | | `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | | `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts. | | `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Local directory for extracted ingest/query certs. | From d2bbe9c5cf04f50c9346e35bbc704a4939b0ead7 Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Wed, 15 Apr 2026 23:13:51 -0400 Subject: [PATCH 14/15] Method to select telemetry_chargeback scenarios to run that do not require changing the role --- roles/telemetry_chargeback/defaults/main.yml | 3 +++ roles/telemetry_chargeback/tasks/main.yml | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 9cc04c8c7..1e64535ce 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -27,3 +27,6 @@ openstackpod: "openstackclient" # Time window settings lookback: 6 limit: 50 + +# List of test scenario files to run +cloudkitty_test_scenarios: [] diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index e2f264834..3b07d7bf0 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -36,10 +36,17 @@ paths: "{{ cloudkitty_scenario_dir }}" patterns: "test_*.yml" register: found_files_raw + when: cloudkitty_test_scenarios | length == 0 -- name: "Extract only the filenames into a clean list" +- name: "Set scenario list from discovered files" ansible.builtin.set_fact: found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" + when: cloudkitty_test_scenarios | length == 0 + +- name: "Set scenario list from user-provided variable" + ansible.builtin.set_fact: + found_files: "{{ cloudkitty_test_scenarios }}" + when: cloudkitty_test_scenarios | length > 0 - name: "Run scenario file through workflow" block: From 1b3e3349e359829b7355fd373f7149baa440379d Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Fri, 17 Apr 2026 19:52:35 -0400 Subject: [PATCH 15/15] Use Ansible blocks to group conditional scenario selection logic Address review feedback: consolidate repeated when conditions into blocks and skip the entire workflow when no scenarios are found. --- roles/telemetry_chargeback/tasks/main.yml | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 3b07d7bf0..c8963bb4b 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -31,29 +31,30 @@ ansible.builtin.set_fact: cloudkitty_user_id: "{{ (get_admin_user_id.stdout | trim) | default('') }}" -- name: "Find test files" - ansible.builtin.find: - paths: "{{ cloudkitty_scenario_dir }}" - patterns: "test_*.yml" - register: found_files_raw - when: cloudkitty_test_scenarios | length == 0 - -- name: "Set scenario list from discovered files" - ansible.builtin.set_fact: - found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" +- name: "Auto-discover test scenarios" when: cloudkitty_test_scenarios | length == 0 + block: + - name: "Find test files" + ansible.builtin.find: + paths: "{{ cloudkitty_scenario_dir }}" + patterns: "test_*.yml" + register: found_files_raw + + - name: "Set scenario list from discovered files" + ansible.builtin.set_fact: + found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" - name: "Set scenario list from user-provided variable" ansible.builtin.set_fact: found_files: "{{ cloudkitty_test_scenarios }}" - when: cloudkitty_test_scenarios | length > 0 + when: cloudkitty_test_scenarios | length > 0 - name: "Run scenario file through workflow" + when: found_files | length > 0 block: - name: "Process and Loop if files exist" ansible.builtin.include_tasks: run_test_scenarios.yml loop: "{{ found_files }}" - when: found_files | length > 0 - name: "Cleanup after job run" ansible.builtin.include_tasks: cleanup_ck.yml