diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 352b58d2f..c5d582a4f 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -33,24 +33,39 @@ The role uses the following variables to control the testing environment and exe | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for CloudKitty database dumps. | +| `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | +| `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts and test output. | +| `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Directory for CloudKitty client certificates. | +| `local_cert_dir` | `{{ ansible_env.HOME }}/ci-framework-data/flush_certs` | Local directory for certificate extraction. | +| `cloudkitty_namespace` | `openstack` | Kubernetes namespace where CloudKitty is deployed. | + +How It Works +------------ -### Internal Variables (vars/main.yml) +The role executes the following workflow: -These variables are used internally by the role and typically do not need to be modified. +1. **CloudKitty Validation** - Enables the hashmap rating module and sets its priority to 100. +2. **Loki Environment Setup** - Extracts Loki route information and certificates from the OpenShift cluster. +3. **Admin Credentials** - Retrieves admin project ID and user ID for test data generation. +4. **Scenario Discovery** - Finds all `test_*.yml` scenario files in the scenario directory. +5. **Scenario Loop** - For each scenario file found (exposed as `{{ scenario_name }}`): + - Generates synthetic Loki log data based on the scenario configuration + - Calculates expected chargeback metrics from the generated data + - Loads the metrics for validation +6. **Cleanup** - Removes temporary certificate directories. -| Variable | Default Value | Description | -|----------|---------------|-------------| -| `logs_dir_zuul` | `/home/zuul/ci-framework-data/logs` | Remote directory for log files. | -| `artifacts_dir_zuul` | `/home/zuul/ci-framework-data/artifacts` | Directory for generated artifacts. | -| `cloudkitty_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | -| `cloudkitty_data_template` | `{{ role_path }}/templates/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | -| `ck_data_config` | `{{ role_path }}/files/test_static.yml` | Path to the scenario configuration file. | -| `ck_output_file_local` | `{{ artifacts_dir_zuul }}/loki_synth_data.json` | Local path for generated synthetic data. | -| `ck_output_file_remote` | `{{ logs_dir_zuul }}/gen_loki_synth_data.log` | Remote destination for synthetic data. | +The role uses `{{ scenario_name }}` as the loop variable when processing multiple test scenarios, making it easy to track which scenario is currently being executed. Scenario Configuration ---------------------- -The synthetic data generation is controlled by a YAML configuration file (`files/test_static.yml`). This file defines: +The synthetic data generation is controlled by YAML configuration files in the `files/` directory. Any file matching the pattern `test_*.yml` will be automatically discovered and executed. + +**Available scenarios:** +- `test_static.yml` - Static test scenario with predefined values +- `test_dyn_basic.yml` - Dynamic test scenario with variable values over time + +Each scenario file defines: * **generation** - Time range configuration (days, step_seconds) * **log_types** - List of log type definitions with name, type, unit, qty, price, groupby, and metadata @@ -58,6 +73,21 @@ The synthetic data generation is controlled by a YAML configuration file (`files * **date_fields** - Date fields to add to groupby (week_of_the_year, day_of_the_year, month, year) * **loki_stream** - Loki stream configuration (service name) +### Data Generation Script Options + +The `gen_synth_loki_data.py` script supports the following options: + +* `--tmpl` - Path to the Jinja2 template file (required) +* `-t, --test` - Path to the scenario YAML file (required) +* `-o, --output` - Path for the output JSON file (required) +* `-p, --project-id` - Optional project ID to override the scenario file value +* `-u, --user-id` - Optional user ID to override the scenario file value +* `--ascending` - Sort timestamps in ascending order (oldest first, newest last) +* `--descending` - Sort timestamps in descending order (newest first, oldest last) - **default** +* `--debug` - Enable debug logging + +By default, the script generates data in descending order (newest timestamps first), which is the expected format for Loki ingestion. + Dependencies ------------ This role has no direct hard dependencies on other Ansible roles. diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..8ea562f6c 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,30 @@ --- +# OpenStack CLI command openstack_cmd: "openstack" + +# Directory paths +logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" +artifacts_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/artifacts" +cert_dir: "{{ ansible_user_dir }}/ck-certs" +local_cert_dir: "{{ ansible_env.HOME }}/ci-framework-data/flush_certs" +remote_cert_dir: "osp-certs" + +# Debug mode set +cloudkitty_debug: false +cloudkitty_debug_dir: "{{ artifacts_dir_zuul + '/debug_ck_db' }}" + +# Cloudkitty certificates and secrets +cert_secret_name: "cert-cloudkitty-client-internal" +client_secret: "secret/cloudkitty-lokistack-gateway-client-http" +ca_configmap: "cm/cloudkitty-lokistack-ca-bundle" + +# LogQL Query +logql_query: "{{ loki_query | default('{service=\"cloudkitty\"}') }}" + +# OpenShift/Kubernetes settings +cloudkitty_namespace: "openstack" +openstackpod: "openstackclient" + +# Time window settings +lookback: 6 +limit: 50 diff --git a/roles/telemetry_chargeback/files/gen_db_summary.py b/roles/telemetry_chargeback/files/gen_db_summary.py index e14ab98d8..07882f184 100644 --- a/roles/telemetry_chargeback/files/gen_db_summary.py +++ b/roles/telemetry_chargeback/files/gen_db_summary.py @@ -119,7 +119,7 @@ def _apply_mutate(qty: float, mutate: str) -> float: return math.floor(qty) elif mutate_upper == "NUMBOOL": # If qty equals 0, leave it at 0. Else, set it to 1. - return 0.0 if qty == 0 else 1.0 + return 0.0 if abs(qty) < 1e-9 else 1.0 elif mutate_upper == "NOTNUMBOOL": # If qty equals 0, set it to 1. Else, set it to 0. return 1.0 if qty == 0 else 0.0 @@ -175,8 +175,9 @@ def _parse_numeric(value: Any, default: float = 0) -> float: def aggregate_rates_by_type( pairs: list[tuple[str, str]], -) -> tuple[dict, float]: - sums: defaultdict[str, float] = defaultdict(float) +) -> tuple[dict, float, dict]: + rate_sums: defaultdict[str, float] = defaultdict(float) + qty_sums: defaultdict[str, float] = defaultdict(float) for _, log_str in pairs: try: entry = json.loads(log_str) @@ -196,17 +197,26 @@ def aggregate_rates_by_type( except (TypeError, ValueError): continue - # Apply mutate transformation + # Track raw qty sum (before any transformation) + qty_sums[mtype] += qty + + # Apply mutate transformation for rating calculation qty_mutated = _apply_mutate(qty, mutate) # Apply factor and offset qty_rate = qty_mutated * factor + offset # Calculate rate - sums[mtype] += qty_rate * price - by_types = {k: {"Rate": round(v, 4)} for k, v in sorted(sums.items())} - total = sum(sums.values()) - return by_types, total + rate_sums[mtype] += qty_rate * price + + by_types = { + k: {"Rate": round(v, 4)} for k, v in sorted(rate_sums.items()) + } + qty_by_types = { + k: {"qty_sum": round(v, 4)} for k, v in sorted(qty_sums.items()) + } + total = sum(rate_sums.values()) + return by_types, total, qty_by_types def build_summary(pairs: list[tuple[str, str]]) -> dict[str, Any]: @@ -237,17 +247,35 @@ def build_summary(pairs: list[tuple[str, str]]) -> dict[str, Any]: empty = {"nanosec": None, "begin": None, "end": None} time_block = {"begin_step": empty.copy(), "end_step": empty.copy()} - by_types, total_r = aggregate_rates_by_type(pairs) + # Get aggregated data by type + by_types, total_r, qty_by_types = aggregate_rates_by_type(pairs) + + # Get overall time range for by_type entries + begin_time = first.get("start") if pairs else None + end_time = last.get("end") if pairs else None + + # Build flat list of entries + rate_list = [] + for type_name in sorted(by_types.keys()): + entry = { + "Begin": begin_time, + "End": end_time, + "Qty": qty_by_types.get(type_name, {}).get("qty_sum", 0.0), + "Rate": by_types[type_name]["Rate"], + "Type": type_name, + } + rate_list.append(entry) + return { "time": time_block, - "data_log": { + "data_summary": { "total_timesteps": n_ts, "metrics_per_step": mps, "log_count": log_count, + "total_rating": round(total_r, 4), }, - "rate": { - "by_types": by_types, - "total": {"Rating": round(total_r, 4)}, + "by_type": { + "rate": rate_list, }, } @@ -267,7 +295,8 @@ def write_yaml(path: Path, doc: dict[str, Any]) -> None: def main() -> None: parser = argparse.ArgumentParser( description=( - "Summarize Loki JSON log entries to YAML (time, data_log, rate)." + "Summarize Loki JSON log entries to YAML " + "(time, data_summary, by_type)." ), ) parser.add_argument( @@ -282,11 +311,20 @@ def main() -> None: ) parser.add_argument( "--debug", + action="store_true", + help=( + "Enable debug mode: write _diff.txt with one " + "[ts,log] JSON per line." + ), + ) + parser.add_argument( + "--debug_dir", type=Path, default=None, metavar="DIR", help=( - "If set, write _diff.txt with one [ts,log] JSON per line." + "Directory for debug output. If not specified, uses the " + "directory from -o output path." ), ) args = parser.parse_args() @@ -299,10 +337,12 @@ def main() -> None: out_path = args.output or (args.json.parent / f"{stem}_total.yml") pairs = extract_and_sort(args.json) - dbg = str(args.debug).strip() if args.debug is not None else "" - if dbg and dbg != ".": - args.debug.mkdir(parents=True, exist_ok=True) - dbg_file = args.debug / f"{args.json.stem}_diff.txt" + if args.debug: + # Determine debug directory: use --debug_dir if provided, + # otherwise use output directory + debug_dir = args.debug_dir if args.debug_dir else out_path.parent + debug_dir.mkdir(parents=True, exist_ok=True) + dbg_file = debug_dir / f"{args.json.stem}_diff.txt" with dbg_file.open("w", encoding="utf-8") as f: for ts, log_str in pairs: print(json.dumps([ts, log_str], ensure_ascii=False), file=f) @@ -310,13 +350,6 @@ def main() -> None: doc = build_summary(pairs) write_yaml(out_path, doc) - if doc["data_log"]["metrics_per_step"] == "ERROR": - per_ts = Counter(ts for ts, _ in pairs) - exp = next(iter(per_ts.values()), 0) - for ts in sorted(per_ts, key=int): - if per_ts[ts] != exp: - print(ts, per_ts[ts], file=sys.stdout) - if __name__ == "__main__": main() diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index f05796e29..359947c7e 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -2,18 +2,53 @@ import logging import argparse import json +import sys import yaml from datetime import datetime, timezone, timedelta from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List, Union from jinja2 import Environment +def _get_value_for_step( + values: List[Union[int, float]], + step_idx: int, + num_steps: int +) -> Union[int, float]: + """ + Get the appropriate value from a list based on the current step index. + + Values are distributed evenly across all steps. For example, if there are + 12 steps and 4 values, each value covers 3 steps: + - Steps 0-2: values[0] + - Steps 3-5: values[1] + - Steps 6-8: values[2] + - Steps 9-11: values[3] + + Args: + values: List of values to choose from. + step_idx: Current step index (0-based). + num_steps: Total number of steps. + + Returns: + The value corresponding to the current step. + """ + num_values = len(values) + if num_values == 1: + return values[0] + + # Calculate how many steps each value covers + steps_per_value = num_steps / num_values + # Determine which value index to use, clamping to valid range + value_idx = min(int(step_idx // steps_per_value), num_values - 1) + return values[value_idx] + + # --- Configure logging with a default level that can be changed --- logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%Y-%m-%dT%H:%M:%S+00:00' + datefmt='%Y-%m-%dT%H:%M:%SZ' ) logger = logging.getLogger() @@ -27,11 +62,11 @@ def _format_timestamp(epoch_seconds: float, invalid_timestamp: str) -> str: invalid_timestamp (str): String to return for invalid timestamps. Returns: - str: The formatted datetime string (e.g., "2023-10-26T14:30:00+00:00"). + str: The formatted datetime string (e.g., "2023-10-26T14:30:00Z"). """ try: dt_object = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) - return dt_object.isoformat() + return dt_object.strftime('%Y-%m-%dT%H:%M:%SZ') except (ValueError, TypeError): logger.warning(f"Invalid epoch value provided: {epoch_seconds}") return invalid_timestamp @@ -73,7 +108,8 @@ def generate_loki_data( start_time: datetime, end_time: datetime, time_step_seconds: int, - config: Dict[str, Any] + config: Dict[str, Any], + reverse_timestamps: bool = True, ): """ Generate synthetic Loki log data by preparing a data list and rendering. @@ -85,6 +121,9 @@ def generate_loki_data( end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. config (Dict[str, Any]): Configuration dictionary loaded from file. + reverse_timestamps (bool): If True, sort timestamps in descending order + (newest first, oldest last). If False, sort in ascending order + (oldest first, newest last). Default is True (descending). """ # Hardcoded constant for invalid timestamps invalid_timestamp = "INVALID_TIMESTAMP" @@ -175,39 +214,54 @@ def generate_loki_data( logger.error(f"Invalid log type configuration: {log_type_config}") raise ValueError("Each log type in log_types must be a dictionary") - log_type_name = log_type_config.get("name") - if not log_type_name: - logger.error("Each log type must have a 'name' field") - raise ValueError("Each log type must have a 'name' field") + # "type" is log-type identifier (dict key) and output value + type_key = log_type_config.get("type") + if not type_key: + logger.error("Each log type must have a 'type' field") + raise ValueError("Each log type must have a 'type' field") # Validate required fields - missing = [f for f in required_fields if f not in log_type_config] + # metadata is optional for generation; name is not a log-type field + required_for_item = [ + f for f in required_fields + if f not in ("name", "metadata") + ] + missing = [f for f in required_for_item if f not in log_type_config] if missing: logger.error( - f"Missing required fields in {log_type_name} config: {missing}" + f"Missing required fields in {type_key!r} config: {missing}" ) raise ValueError( - f"Missing required fields in {log_type_name}: {missing}" + f"Missing required fields in {type_key!r}: {missing}" ) # Build groupby from config groupby = log_type_config.get("groupby", {}) if not isinstance(groupby, dict): logger.error( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) raise ValueError( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) - log_types[log_type_name] = { - "type": log_type_config["type"], + # Ensure qty and price are lists for step-based distribution + qty_val = log_type_config["qty"] + price_val = log_type_config["price"] + qty_list = qty_val if isinstance(qty_val, list) else [qty_val] + price_list = price_val if isinstance(price_val, list) else [price_val] + + log_types[type_key] = { + "type": type_key, "unit": log_type_config["unit"], "description": log_type_config.get("description"), - "qty": log_type_config["qty"], - "price": log_type_config["price"], + "qty": qty_list, + "price": price_list, "groupby": groupby.copy(), - "metadata": log_type_config.get("metadata", {}) + "metadata": log_type_config.get("metadata", {}), + "mutate": log_type_config.get("mutate"), + "factor": log_type_config.get("factor"), + "offset": log_type_config.get("offset") } # --- Step 3: Load template and render --- @@ -231,15 +285,22 @@ def tojson_preserve_order(obj): # --- Render the template in one pass with all the data --- logger.info("Rendering final output...") + if reverse_timestamps: + log_data_list.reverse() + logger.debug( + "Sorted timestamps in descending order " + "(newest first, oldest last)." + ) + + # Calculate total number of steps for value distribution + num_steps = len(log_data_list) + logger.debug(f"Total number of time steps: {num_steps}") + # Pre-calculate log types with date fields for each time step log_types_list = [] for idx, item in enumerate(log_data_list): - # For the last entry, use end_time to ensure it shows today's date - if idx == len(log_data_list) - 1: - dt = end_time - else: - epoch_seconds = item["nanoseconds"] / 1_000_000_000 - dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) iso_year, iso_week, _ = dt.isocalendar() day_of_year = dt.timetuple().tm_yday @@ -267,6 +328,13 @@ def tojson_preserve_order(obj): log_type_with_dates = log_type_data.copy() log_type_with_dates["groupby"] = log_type_data["groupby"].copy() log_type_with_dates["groupby"].update(date_fields) + # Select qty and price based on step index distribution + log_type_with_dates["qty"] = _get_value_for_step( + log_type_data["qty"], idx, num_steps + ) + log_type_with_dates["price"] = _get_value_for_step( + log_type_data["price"], idx, num_steps + ) log_types_with_dates[log_type_name] = log_type_with_dates log_types_list.append(log_types_with_dates) @@ -296,8 +364,19 @@ def tojson_preserve_order(obj): ) except IOError as e: logger.error(f"Failed to write to output file '{output_path}': {e}") - except Exception as e: - logger.error(f"An unexpected error occurred during file write: {e}") + raise + + # --- Step 5: Validate that the output is valid JSON --- + try: + with output_path.open('r') as f_in: + json.load(f_in) + logger.info("Output file validated as valid JSON.") + except json.JSONDecodeError as e: + logger.error( + f"Output file is not valid JSON: {e}. " + f"Delete '{output_path}' and fix the template or data." + ) + sys.exit(1) def main(): @@ -326,6 +405,20 @@ def main(): ) # --- Optional Utility Arguments --- + parser.add_argument( + "--ascending", + action="store_false", + dest="reverse", + help="Sort timestamps in ascending order: oldest first, newest last." + ) + parser.add_argument( + "--descending", + action="store_true", + dest="reverse", + default=True, + help="Sort timestamps in descending order: newest first, oldest last " + "(default behavior)." + ) parser.add_argument( "--debug", action="store_true", @@ -362,7 +455,8 @@ def main(): start_time=start_time_utc, end_time=end_time_utc, time_step_seconds=step_seconds, - config=config + config=config, + reverse_timestamps=args.reverse, ) except FileNotFoundError: logger.error( diff --git a/roles/telemetry_chargeback/files/test_dyn_basic.yml b/roles/telemetry_chargeback/files/test_dyn_basic.yml index cfe7adb18..d3f4a4342 100644 --- a/roles/telemetry_chargeback/files/test_dyn_basic.yml +++ b/roles/telemetry_chargeback/files/test_dyn_basic.yml @@ -16,10 +16,9 @@ log_types: price: - 0.10 groupby: - resource: null + resource: tenant-01 user: null project: null - tenant: tenant-01 metadata: container_format: bare disk_format: qcow2 @@ -33,10 +32,9 @@ log_types: price: - 0.10 groupby: - resource: null + resource: tenant-01 user: null project: null - tenant: tenant-01 metadata: container_format: bare disk_format: qcow2 @@ -50,10 +48,9 @@ log_types: price: - 5.00 groupby: - resource: null + resource: tenant-02 user: null project: null - tenant: tenant-02 flavor_name: null flavor_id: null mutate: NUMBOOL @@ -66,10 +63,9 @@ log_types: price: - 1.00 groupby: - resource: null + resource: tenant-01 user: null project: null - tenant: tenant-01 metadata: state: null mutate: NUMBOOL @@ -82,10 +78,9 @@ log_types: price: - 0.0 groupby: - resource: null + resource: tenant-01 user: null project: null - tenant: tenant-01 metadata: type: null @@ -97,10 +92,9 @@ log_types: price: - 0.10 groupby: - resource: null + resource: tenant-02 user: null project: null - tenant: tenant-02 metadata: type: null @@ -112,10 +106,9 @@ log_types: price: - 0.0 groupby: - resource: null + resource: tenant-01 user: null project: null - tenant: tenant-01 metadata: vm_instance: null @@ -127,10 +120,9 @@ log_types: price: - 0.0 groupby: - resource: null + resource: tenant-02 user: null project: null - tenant: tenant-02 metadata: vm_instance: null diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml index f94a3c1d2..23f253025 100644 --- a/roles/telemetry_chargeback/files/test_static.yml +++ b/roles/telemetry_chargeback/files/test_static.yml @@ -14,9 +14,9 @@ log_types: qty: 20.6 price: 0.02 groupby: - id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 - project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 - user_id: null + resource: tenant-01 + project: null + user: null metadata: container_format: bare disk_format: qcow2 @@ -28,9 +28,9 @@ log_types: qty: 1.0 price: 0.3 groupby: - id: de168c31-ed44-4a1a-a079-51bd238a91d6 - project_id: 9cf5bcfc61a24682acc448af2d062ad2 - user_id: c29ab6e886354bbd88ee9899e62d1d40 + resource: tenant-02 + project: null + user: null metadata: flavor_name: m1.tiny flavor_id: "1" diff --git a/roles/telemetry_chargeback/tasks/cleanup_ck.yml b/roles/telemetry_chargeback/tasks/cleanup_ck.yml new file mode 100644 index 000000000..01407d155 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/cleanup_ck.yml @@ -0,0 +1,5 @@ +--- +- name: "Cleanup local certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: absent diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index 0b8d5880d..c82e89efc 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -1,39 +1,38 @@ --- -- name: Check for preexisting output file +- name: "Set variables dynamically for {{ scenario_name }}" + ansible.builtin.set_fact: + cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" + cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_totals_metrics_suffix }}" + cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ scenario_name }}.yml" + +- name: "Check for preexisting output file" ansible.builtin.stat: - path: "{{ ck_output_file_local }}" + path: "{{ cloudkitty_data_file }}" register: file_preexists -- name: TEST Generate Synthetic Data +- name: "Generate Synthetic Data for {{ scenario_name }}" ansible.builtin.command: cmd: > python3 "{{ cloudkitty_synth_script }}" --tmpl "{{ cloudkitty_data_template }}" - -t "{{ ck_data_config }}" - -o "{{ ck_output_file_local }}" + -t "{{ cloudkitty_test_file }}" + -o "{{ cloudkitty_data_file }}" register: script_output - when: not file_preexists.stat.exists | bool + when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 -- name: Read the content of the file - ansible.builtin.slurp: - src: "{{ ck_output_file_local }}" - register: slurped_file - -- name: TEST Validate JSON format of synthetic data file - ansible.builtin.assert: - that: - # This filter will trigger a task failure if the string isn't valid JSON - - slurped_file.content | b64decode | from_json is defined - fail_msg: "The file does not contain valid JSON format." - success_msg: "JSON format validated successfully." - -- name: Print output_file_remote path - ansible.builtin.debug: - msg: "Synthetic data file: {{ ck_output_file_remote }}" +- name: "Generate chargeback rating from synthetic data file {{ scenario_name }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ cloudkitty_data_file }}" + -o "{{ cloudkitty_synth_totals_file }}" + {% if cloudkitty_debug | bool %}--debug --debug_dir "{{ cloudkitty_debug_dir }}"{% endif %} + register: synth_rating_info + when: not file_preexists.stat.exists | bool + changed_when: synth_rating_info.rc == 0 -- name: Copy output file to remote host - ansible.builtin.copy: - src: "{{ ck_output_file_local }}" - dest: "{{ ck_output_file_remote }}" - mode: '0644' +- name: "Load metrics from YAML file" + ansible.builtin.include_vars: + file: "{{ cloudkitty_synth_totals_file }}" + name: synth_data_rates diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..79e8896ee --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..9376dd787 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,9 @@ +--- +- name: "Ingest CloudKitty data log for {{ scenario_name }}" + ansible.builtin.include_tasks: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ scenario_name }}" + ansible.builtin.include_tasks: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ scenario_name }}" + ansible.builtin.include_tasks: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 98a94b233..cba889981 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,6 +1,34 @@ --- -- name: "Validate Chargeback Feature" +- name: "Validate Chargeback Feature deployed correctly" ansible.builtin.include_tasks: "chargeback_tests.yml" -- name: "Generate Synthetic Data" - ansible.builtin.include_tasks: "gen_synth_loki_data.yml" +- name: "Setup Loki Environment" + ansible.builtin.include_tasks: "setup_loki_env.yml" + +- name: "Find test files" + ansible.builtin.find: + paths: "{{ cloudkitty_scenario_dir }}" + patterns: "test_*.yml" + register: found_files_raw + +- name: "Extract only the filenames into a clean list" + ansible.builtin.set_fact: + found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" + +- name: "Run scenario file through workflow" + block: + - name: "Process and Loop if files exist" + ansible.builtin.include_tasks: run_test_scenarios.yml + loop: "{{ found_files }}" + loop_control: + loop_var: scenario_name + when: found_files | length > 0 + + rescue: + - name: "Log failure" + ansible.builtin.debug: + msg: "Running test scenarios loop failed." + + always: + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..700dd40be --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ scenario_name }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ scenario_name }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ scenario_name }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" + {% if cloudkitty_debug | bool %}--debug --debug_dir "{{ cloudkitty_debug_dir }}"{% endif %} + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml new file mode 100644 index 000000000..0ce65ee09 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -0,0 +1,6 @@ +--- +- name: "Generate Synthetic Data for each file: {{ scenario_name }}" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ scenario_name }}" + ansible.builtin.include_tasks: "load_loki_data.yml" diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml new file mode 100644 index 000000000..e4a80250f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -0,0 +1,56 @@ +--- +# Setup Loki Environment + +# Dynamic URL's +- name: "Get Loki Public Route Host" + ansible.builtin.command: + cmd: | + oc get route cloudkitty-lokistack -n {{ cloudkitty_namespace }} -o "jsonpath={.spec.host}" + register: loki_route + changed_when: false + +- name: "Set Loki URLs" + ansible.builtin.set_fact: + # Base URL + loki_base_url: "https://{{ loki_route.stdout }}" + + # Internal Flush URL (Service DNS: https://..svc:3100/flush) + ingester_flush_url: "https://cloudkitty-lokistack-ingester-http.{{ cloudkitty_namespace }}.svc:3100/flush" + +- name: "Set Derived Loki URLs" + ansible.builtin.set_fact: + loki_push_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/push" + loki_query_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/query_range" + +- name: "Debug URLs" + ansible.builtin.debug: + msg: + - "Loki Route: {{ loki_base_url }}" + - "Push URL: {{ loki_push_url }}" + - "Flush URL: {{ ingester_flush_url }}" + - "Query URL: {{ loki_query_url }}" + +# Certs to Ingest & Retrieve data to/from Loki +- name: "Ensure Local Certificate Directory Exists" + ansible.builtin.file: + path: "{{ cert_dir }}" + state: directory + mode: '0755' + +- name: "Extract Certificates from OpenShift Secret" + ansible.builtin.command: + cmd: | + oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract Client Certificates" + ansible.builtin.command: + cmd: | + oc extract {{ client_secret }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract CA Bundle" + ansible.builtin.command: + cmd: | + oc extract {{ ca_configmap }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true diff --git a/roles/telemetry_chargeback/templates/loki_data_templ.j2 b/roles/telemetry_chargeback/templates/loki_data_templ.j2 index b676f3013..ef27ae48a 100644 --- a/roles/telemetry_chargeback/templates/loki_data_templ.j2 +++ b/roles/telemetry_chargeback/templates/loki_data_templ.j2 @@ -13,7 +13,10 @@ "qty": entry_data.qty, "price": entry_data.price, "groupby": entry_data.groupby, - "metadata": entry_data.metadata + "metadata": entry_data.metadata, + "mutate": entry_data.get("mutate"), + "factor": entry_data.get("factor"), + "offset": entry_data.get("offset") } -%} [ "{{ item.nanoseconds }}", diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 5d7a47804..2054c1b5b 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -1,7 +1,4 @@ --- -logs_dir_zuul: "/home/zuul/ci-framework-data/logs" -artifacts_dir_zuul: "/home/zuul/ci-framework-data/artifacts" - cloudkitty_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" cloudkitty_data_template: "{{ role_path }}/templates/loki_data_templ.j2" ck_data_config: "{{ role_path }}/files/test_static.yml"