From 0353315c3a4cf9af2ce38d649ff85a11e9dba37e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 4 May 2026 19:37:35 -0400 Subject: [PATCH 01/28] Add azure-cosmos-avad-test module for AVAD Change Feed Processor soak testing Adds a new internal soak test tool under sdk/cosmos/ for validating All Versions and Deletes (AVAD) Change Feed Processor correctness. Components: - Ingestor: generates create/replace/upsert/delete workload - AvadReader: CFP in AVAD mode with previous-image and CRTS validation - LatestVersionReader: CFP in latest-version mode for parity checks - Reconciler: gap detection, LSN/CRTS ordering, AVAD-superset-of-LV checks - HealthMonitor: online health checks via Cosmos reconciliation container - Helm chart: AKS deployment with StatefulSets for consumers - Chaos scenarios: pod kill, restart storm, lease throttle, partition split, network fault, node drain Key AVAD validations: - LSN extracted from ChangeFeedMetaData (correct for delete tombstones) - CRTS (conflict resolution timestamp) captured and ordering validated - previousImage presence checked on replace/delete events - AVAD superset of LV parity assertion Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/versioning/external_dependencies.txt | 1 + sdk/cosmos/azure-cosmos-avad-test/Dockerfile | 29 ++ .../azure-cosmos-avad-test/chaos/README.md | 53 +++ .../chaos/chaos-schedule.yaml | 52 +++ .../chaos/scenarios/lease-throttle.sh | 49 +++ .../chaos/scenarios/network-fault.sh | 35 ++ .../chaos/scenarios/node-drain.sh | 30 ++ .../chaos/scenarios/partition-split.sh | 57 +++ .../chaos/scenarios/pod-kill.sh | 22 + .../chaos/scenarios/restart-storm.sh | 16 + .../azure-cosmos-avad-test/infra/README.md | 52 +++ .../infra/chart/Chart.yaml | 6 + .../infra/chart/templates/_helpers.tpl | 68 ++++ .../infra/chart/templates/configmap.yaml | 18 + .../chart/templates/consumer-statefulset.yaml | 70 ++++ .../templates/health-monitor-cronjob.yaml | 47 +++ .../chart/templates/ingestor-deployment.yaml | 48 +++ .../infra/chart/values.yaml | 90 +++++ .../infra/scripts/setup-acr.sh | 44 ++ .../infra/scripts/setup-aks.sh | 49 +++ .../infra/scripts/setup-cosmos.sh | 66 +++ sdk/cosmos/azure-cosmos-avad-test/pom.xml | 202 +++++++++ .../azure-cosmos-avad-test/run-cutover.sh | 382 ++++++++++++++++++ sdk/cosmos/azure-cosmos-avad-test/run-soak.sh | 242 +++++++++++ .../java/com/azure/cosmos/avadtest/Main.java | 142 +++++++ .../cosmos/avadtest/config/TestConfig.java | 124 ++++++ .../cosmos/avadtest/health/HealthMonitor.java | 251 ++++++++++++ .../cosmos/avadtest/health/HealthServer.java | 89 ++++ .../cosmos/avadtest/ingestor/Ingestor.java | 306 ++++++++++++++ .../cosmos/avadtest/metrics/SoakMetrics.java | 95 +++++ .../cosmos/avadtest/reader/AvadReader.java | 203 ++++++++++ .../avadtest/reader/LatestVersionReader.java | 139 +++++++ .../avadtest/reconciliation/EventLog.java | 79 ++++ .../avadtest/reconciliation/Reconciler.java | 210 ++++++++++ .../reconciliation/ReconciliationWriter.java | 182 +++++++++ .../src/main/resources/application.properties | 24 ++ .../src/main/resources/logback.xml | 32 ++ sdk/cosmos/pom.xml | 1 + 38 files changed, 3605 insertions(+) create mode 100644 sdk/cosmos/azure-cosmos-avad-test/Dockerfile create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/README.md create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/chaos-schedule.yaml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/lease-throttle.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/network-fault.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/node-drain.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/partition-split.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/pod-kill.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/restart-storm.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/README.md create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/chart/Chart.yaml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/_helpers.tpl create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/configmap.yaml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/consumer-statefulset.yaml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/health-monitor-cronjob.yaml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/ingestor-deployment.yaml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/chart/values.yaml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-acr.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-aks.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-cosmos.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/pom.xml create mode 100644 sdk/cosmos/azure-cosmos-avad-test/run-cutover.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/run-soak.sh create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/Main.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/resources/application.properties create mode 100644 sdk/cosmos/azure-cosmos-avad-test/src/main/resources/logback.xml diff --git a/eng/versioning/external_dependencies.txt b/eng/versioning/external_dependencies.txt index d23f3b1c80d3..a6c9ba0c2fe9 100644 --- a/eng/versioning/external_dependencies.txt +++ b/eng/versioning/external_dependencies.txt @@ -59,6 +59,7 @@ io.netty:netty-transport-native-unix-common;4.1.132.Final io.netty:netty-transport-native-kqueue;4.1.132.Final io.projectreactor.netty:reactor-netty-http;1.2.16 io.projectreactor:reactor-core;3.7.17 +info.picocli:picocli;4.7.6 io.vertx:vertx-codegen;4.5.26 io.vertx:vertx-core;4.5.26 javax.websocket:javax.websocket-api;1.1 diff --git a/sdk/cosmos/azure-cosmos-avad-test/Dockerfile b/sdk/cosmos/azure-cosmos-avad-test/Dockerfile new file mode 100644 index 000000000000..9a05c24f4552 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/Dockerfile @@ -0,0 +1,29 @@ +# Multi-stage build for cosmos-avad-test soak runner +# JDK 21 for production, Maven for build stage + +# --- Build stage --- +FROM maven:3.9-eclipse-temurin-21 AS build +WORKDIR /build +COPY pom.xml . +RUN mvn dependency:go-offline -B +COPY src/ src/ +RUN mvn package -DskipTests -B + +# --- Runtime stage --- +FROM eclipse-temurin:21-jre-jammy +WORKDIR /app + +# Install curl for health probes and az CLI for chaos scripts +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /build/target/cosmos-avad-test-1.0-SNAPSHOT.jar /app/app.jar + +# Health endpoint port +EXPOSE 8080 + +# JVM tuning for container environments +ENV JAVA_OPTS="-XX:+UseContainerSupport -XX:MaxRAMPercentage=75.0 -XX:+ExitOnOutOfMemoryError" + +ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -jar /app/app.jar $0 $@"] diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/README.md b/sdk/cosmos/azure-cosmos-avad-test/chaos/README.md new file mode 100644 index 000000000000..449d77a8f5dc --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/README.md @@ -0,0 +1,53 @@ +# Cosmos DB Soak Test — Chaos Library + +Reusable chaos injection scenarios for AKS-hosted Cosmos DB +consumers. Works with any workload deployed via the soak +infra Helm chart. + +## Scenarios + +| Scenario | Script | What It Tests | +|----------|--------|---------------| +| Pod Kill | `pod-kill.sh` | Lease rebalancing after random pod loss | +| Restart Storm | `restart-storm.sh` | Mass lease handoff on rolling restart | +| Lease Throttle | `lease-throttle.sh` | CFP behavior under lease container RU starvation | +| Network Fault | `network-fault.sh` | Retry behavior, session consistency | +| Partition Split | `partition-split.sh` | Continuation token validity across splits | +| Node Drain | `node-drain.sh` | Graceful shutdown, lease release timing | + +## Usage + +### Manual — run one scenario + +```bash +export NAMESPACE=cosmos-soak +export COSMOS_ACCOUNT=abhm-cfp-region-test +export COSMOS_RG=abhm-rg + +# Kill a random AVAD CFP pod +bash chaos/scenarios/pod-kill.sh + +# Throttle lease container to 400 RU for 5 min +TARGET_RU=400 THROTTLE_DURATION=300 bash chaos/scenarios/lease-throttle.sh + +# Trigger partition split (2x throughput) +SCALE_FACTOR=2 bash chaos/scenarios/partition-split.sh +``` + +### Automated — via soak orchestrator + +The `run-soak.sh` orchestrator reads `chaos-schedule.yaml` +and fires scenarios on a phase-based schedule: + +``` +Warm-up → Steady → Chaos → Recovery → repeat +``` + +See `chaos-schedule.yaml` for interval/parameter config. + +## Adding a New Scenario + +1. Create `chaos/scenarios/my-scenario.sh` +2. Use env vars for all parameters (no hardcoded values) +3. Add an entry to `chaos-schedule.yaml` +4. The soak orchestrator will pick it up automatically diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/chaos-schedule.yaml b/sdk/cosmos/azure-cosmos-avad-test/chaos/chaos-schedule.yaml new file mode 100644 index 000000000000..08ac8ef767c0 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/chaos-schedule.yaml @@ -0,0 +1,52 @@ +# Chaos schedule configuration +# The soak orchestrator reads this file to determine when to +# fire each chaos scenario. +# +# interval_hours: time between invocations of this scenario +# recovery_minutes: time to wait after chaos before checking health +# enabled: set to false to skip this scenario + +schedule: + - scenario: pod-kill + interval_hours: 2 + recovery_minutes: 5 + enabled: true + params: + component: avad-cfp + + - scenario: restart-storm + interval_hours: 8 + recovery_minutes: 15 + enabled: true + params: + component: avad-cfp + + - scenario: lease-throttle + interval_hours: 4 + recovery_minutes: 10 + enabled: true + params: + target_ru: 400 + throttle_duration: 300 + + - scenario: network-fault + interval_hours: 6 + recovery_minutes: 5 + enabled: true + params: + block_duration: 30 + component: avad-cfp + + - scenario: partition-split + interval_hours: 12 + recovery_minutes: 30 + enabled: true + params: + scale_factor: 2 + + - scenario: node-drain + interval_hours: 24 + recovery_minutes: 15 + enabled: true + params: + component: avad-cfp diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/lease-throttle.sh b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/lease-throttle.sh new file mode 100644 index 000000000000..ae67f9502a12 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/lease-throttle.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Lease Throttle — scale lease container RU to simulate throttling +set -euo pipefail + +COSMOS_ACCOUNT="${COSMOS_ACCOUNT:?Set COSMOS_ACCOUNT}" +COSMOS_RG="${COSMOS_RG:?Set COSMOS_RG}" +COSMOS_DB="${COSMOS_DB:-graph_db}" +LEASE_CONTAINER="${LEASE_CONTAINER:-avad-test-leases}" +TARGET_RU="${TARGET_RU:-400}" +THROTTLE_DURATION="${THROTTLE_DURATION:-300}" # 5 minutes + +echo "[$(date '+%H:%M:%S')] Chaos: lease-throttle" + +# Save current throughput +CURRENT_RU=$(az cosmosdb sql container throughput show \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "$LEASE_CONTAINER" \ + --query "resource.throughput" -o tsv 2>/dev/null || echo "autoscale") + +echo " Current lease RU: $CURRENT_RU" +echo " Scaling to: $TARGET_RU RU for ${THROTTLE_DURATION}s" + +# Scale down +az cosmosdb sql container throughput update \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "$LEASE_CONTAINER" \ + --throughput "$TARGET_RU" \ + --output none + +echo " Lease container throttled to $TARGET_RU RU" +sleep "$THROTTLE_DURATION" + +# Restore +if [ "$CURRENT_RU" != "autoscale" ]; then + echo " Restoring lease RU to $CURRENT_RU" + az cosmosdb sql container throughput update \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "$LEASE_CONTAINER" \ + --throughput "$CURRENT_RU" \ + --output none +fi + +echo " Lease throttle complete" diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/network-fault.sh b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/network-fault.sh new file mode 100644 index 000000000000..79aad0d10e95 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/network-fault.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Network Fault — block Cosmos endpoint temporarily +set -euo pipefail + +NAMESPACE="${NAMESPACE:-cosmos-soak}" +COMPONENT="${COMPONENT:-avad-cfp}" +BLOCK_DURATION="${BLOCK_DURATION:-30}" # seconds + +echo "[$(date '+%H:%M:%S')] Chaos: network-fault (${BLOCK_DURATION}s block)" + +# Get a random pod +POD=$(kubectl get pods -n "$NAMESPACE" \ + -l "app.kubernetes.io/component=${COMPONENT}" \ + --field-selector=status.phase=Running \ + -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | shuf -n 1) + +if [ -z "$POD" ]; then + echo " No running pods found" + exit 0 +fi + +echo " Target pod: $POD" +echo " Injecting network fault for ${BLOCK_DURATION}s" + +# Block outbound to Cosmos port 443 using iptables +kubectl exec -n "$NAMESPACE" "$POD" -- \ + sh -c "iptables -A OUTPUT -p tcp --dport 443 -j DROP 2>/dev/null || echo 'iptables not available (need NET_ADMIN)'" + +sleep "$BLOCK_DURATION" + +# Remove the block +kubectl exec -n "$NAMESPACE" "$POD" -- \ + sh -c "iptables -D OUTPUT -p tcp --dport 443 -j DROP 2>/dev/null || true" + +echo " Network fault removed from $POD" diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/node-drain.sh b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/node-drain.sh new file mode 100644 index 000000000000..0d5d660a1603 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/node-drain.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Node Drain — cordon and drain an AKS node +set -euo pipefail + +echo "[$(date '+%H:%M:%S')] Chaos: node-drain" + +# Find a node running CFP pods +NAMESPACE="${NAMESPACE:-cosmos-soak}" +COMPONENT="${COMPONENT:-avad-cfp}" + +NODE=$(kubectl get pods -n "$NAMESPACE" \ + -l "app.kubernetes.io/component=${COMPONENT}" \ + --field-selector=status.phase=Running \ + -o jsonpath='{.items[0].spec.nodeName}') + +if [ -z "$NODE" ]; then + echo " No nodes found running $COMPONENT pods" + exit 0 +fi + +echo " Draining node: $NODE" +kubectl cordon "$NODE" +kubectl drain "$NODE" --ignore-daemonsets --delete-emptydir-data \ + --timeout=300s --force || true + +echo " Node $NODE drained. Waiting 120s before uncordoning..." +sleep 120 + +kubectl uncordon "$NODE" +echo " Node $NODE uncordoned" diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/partition-split.sh b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/partition-split.sh new file mode 100644 index 000000000000..b9589ddbee80 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/partition-split.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Partition Split — scale feed container throughput to trigger split +set -euo pipefail + +COSMOS_ACCOUNT="${COSMOS_ACCOUNT:?Set COSMOS_ACCOUNT}" +COSMOS_RG="${COSMOS_RG:?Set COSMOS_RG}" +COSMOS_DB="${COSMOS_DB:-graph_db}" +FEED_CONTAINER="${FEED_CONTAINER:-avad-test}" +SCALE_FACTOR="${SCALE_FACTOR:-2}" + +echo "[$(date '+%H:%M:%S')] Chaos: partition-split" + +# Get current throughput +CURRENT_RU=$(az cosmosdb sql container throughput show \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "$FEED_CONTAINER" \ + --query "resource.throughput" -o tsv) + +TARGET_RU=$((CURRENT_RU * SCALE_FACTOR)) + +echo " Current feed RU: $CURRENT_RU" +echo " Scaling to: $TARGET_RU RU (${SCALE_FACTOR}x) to trigger split" + +# Get pre-split partition count +PRE_SPLIT_PARTITIONS=$(az cosmosdb sql container show \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "$FEED_CONTAINER" \ + --query "resource.statistics[0].partitionCount" -o tsv 2>/dev/null || echo "unknown") + +echo " Pre-split partition count: $PRE_SPLIT_PARTITIONS" + +# Scale up +az cosmosdb sql container throughput update \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "$FEED_CONTAINER" \ + --throughput "$TARGET_RU" \ + --output none + +echo " Feed container scaled to $TARGET_RU RU" +echo " Partition split may take several minutes to complete" + +# Poll for split completion (check partition count changes) +WAIT_TIME=0 +MAX_WAIT=1800 # 30 minutes +while [ $WAIT_TIME -lt $MAX_WAIT ]; do + sleep 60 + WAIT_TIME=$((WAIT_TIME + 60)) + echo " Waiting for split... (${WAIT_TIME}s elapsed)" +done + +echo " Partition split chaos event complete (waited ${WAIT_TIME}s)" diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/pod-kill.sh b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/pod-kill.sh new file mode 100644 index 000000000000..3ceda4849f3d --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/pod-kill.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Pod Kill — kill a random CFP pod to test lease rebalancing +set -euo pipefail + +NAMESPACE="${NAMESPACE:-cosmos-soak}" +COMPONENT="${COMPONENT:-avad-cfp}" +LABEL="app.kubernetes.io/component=${COMPONENT}" + +echo "[$(date '+%H:%M:%S')] Chaos: pod-kill targeting $COMPONENT" + +POD=$(kubectl get pods -n "$NAMESPACE" -l "$LABEL" \ + --field-selector=status.phase=Running \ + -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | shuf -n 1) + +if [ -z "$POD" ]; then + echo " No running pods found for $LABEL" + exit 0 +fi + +echo " Killing pod: $POD" +kubectl delete pod "$POD" -n "$NAMESPACE" --grace-period=0 --force +echo " Pod $POD killed" diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/restart-storm.sh b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/restart-storm.sh new file mode 100644 index 000000000000..e934ffd0b686 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/restart-storm.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Restart Storm — rolling restart all CFP pods +set -euo pipefail + +NAMESPACE="${NAMESPACE:-cosmos-soak}" +RELEASE="${RELEASE:-cosmos-soak}" +COMPONENT="${COMPONENT:-avad-cfp}" + +echo "[$(date '+%H:%M:%S')] Chaos: restart-storm for ${RELEASE}-${COMPONENT}" +kubectl rollout restart statefulset "${RELEASE}-${COMPONENT}" -n "$NAMESPACE" +echo " Rolling restart initiated" + +# Wait for rollout to complete (with timeout) +kubectl rollout status statefulset "${RELEASE}-${COMPONENT}" \ + -n "$NAMESPACE" --timeout=600s || true +echo " Restart storm complete" diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/README.md b/sdk/cosmos/azure-cosmos-avad-test/infra/README.md new file mode 100644 index 000000000000..7d69356c35a8 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/README.md @@ -0,0 +1,52 @@ +# Cosmos DB Soak Test — Infrastructure + +Reusable Helm chart and setup scripts for running Cosmos DB +change feed processor soak tests on AKS. + +## Quick Start + +```bash +# 1. Create AKS cluster +./scripts/setup-aks.sh + +# 2. Create Cosmos containers +./scripts/setup-cosmos.sh + +# 3. Build + push image to ACR +./scripts/setup-acr.sh + +# 4. Deploy (from repo root) +cd ../.. +./run-soak.sh +``` + +## What This Provides + +| Component | Description | +|-----------|-------------| +| `chart/` | Helm chart with templated Deployments, StatefulSets, ConfigMaps, probes | +| `scripts/setup-aks.sh` | AKS cluster provisioning | +| `scripts/setup-cosmos.sh` | Cosmos containers (feed, lease, reconciliation, health) | +| `scripts/setup-acr.sh` | ACR creation + image build/push | + +## Reusing for Your Own Workload + +1. Build a container image with your workload logic +2. Implement HTTP endpoints: `/health` (liveness), `/ready` + (readiness), `/metrics` (optional) +3. Create a `values-myworkload.yaml` overriding: + - `image.repository` / `image.tag` + - `cosmos.*` (endpoint, containers, etc.) + - `avadConsumer.replicas` / `lvConsumer.replicas` +4. Deploy: `helm upgrade --install my-soak ./infra/chart -f values-myworkload.yaml` + +## Azure Resources + +Default configuration targets: +- Subscription: `b31b6408-0fb5-4688-9a3c-33ffb3983297` +- Resource Group: `abhm-rg` +- AKS: `abhm-avad-soak-aks` (3x D4s_v5 nodes) +- ACR: `abhmavadsoakacr` +- Cosmos: `abhm-cfp-region-test` + +Override via environment variables in each script. diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/Chart.yaml b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/Chart.yaml new file mode 100644 index 000000000000..35b187435015 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: cosmos-soak +description: Reusable Helm chart for Cosmos DB soak testing on AKS +version: 0.1.0 +appVersion: "1.0" +type: application diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/_helpers.tpl b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/_helpers.tpl new file mode 100644 index 000000000000..aedf64eb14f3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/_helpers.tpl @@ -0,0 +1,68 @@ +{{- define "cosmos-soak.labels" -}} +app.kubernetes.io/name: {{ .Chart.Name }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{- define "cosmos-soak.selectorLabels" -}} +app.kubernetes.io/name: {{ .Chart.Name }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{- define "cosmos-soak.cosmosEnv" -}} +- name: COSMOS_ENDPOINT + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: endpoint +- name: COSMOS_KEY + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-secrets + key: cosmos-key +- name: COSMOS_DATABASE + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: database +- name: COSMOS_FEED_CONTAINER + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: feedContainer +- name: COSMOS_LEASE_CONTAINER + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: leaseContainer +- name: COSMOS_PREFERRED_REGION + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: preferredRegion +- name: OPS_PER_SEC + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: opsPerSec +- name: DOC_SIZE_BYTES + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: docSizeBytes +- name: LOGICAL_PARTITION_COUNT + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: logicalPartitionCount +- name: DURATION_SECONDS + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: durationSeconds +- name: WORKER_COUNT + valueFrom: + configMapKeyRef: + name: {{ .Release.Name }}-config + key: workerCount +{{- end }} diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/configmap.yaml b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/configmap.yaml new file mode 100644 index 000000000000..e40e405e01c7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/configmap.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-config + namespace: {{ .Values.namespace }} + labels: + {{- include "cosmos-soak.labels" . | nindent 4 }} +data: + endpoint: {{ .Values.cosmos.endpoint | quote }} + database: {{ .Values.cosmos.database | quote }} + feedContainer: {{ .Values.cosmos.feedContainer | quote }} + leaseContainer: {{ .Values.cosmos.leaseContainer | quote }} + preferredRegion: {{ .Values.cosmos.preferredRegion | quote }} + opsPerSec: {{ .Values.cosmos.opsPerSec | quote }} + docSizeBytes: {{ .Values.cosmos.docSizeBytes | quote }} + logicalPartitionCount: {{ .Values.cosmos.logicalPartitionCount | quote }} + durationSeconds: {{ .Values.cosmos.durationSeconds | quote }} + workerCount: {{ .Values.cosmos.workerCount | quote }} diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/consumer-statefulset.yaml b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/consumer-statefulset.yaml new file mode 100644 index 000000000000..cd0b7752e623 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/consumer-statefulset.yaml @@ -0,0 +1,70 @@ +{{- range $consumerName, $consumer := dict "avad" .Values.avadConsumer "lv" .Values.lvConsumer }} +{{- if $consumer.enabled }} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ $.Release.Name }}-{{ $consumerName }}-cfp + namespace: {{ $.Values.namespace }} + labels: + {{- include "cosmos-soak.labels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $consumerName }}-cfp +spec: + serviceName: {{ $.Release.Name }}-{{ $consumerName }}-cfp + replicas: {{ $consumer.replicas }} + selector: + matchLabels: + {{- include "cosmos-soak.selectorLabels" $ | nindent 6 }} + app.kubernetes.io/component: {{ $consumerName }}-cfp + template: + metadata: + labels: + {{- include "cosmos-soak.selectorLabels" $ | nindent 8 }} + app.kubernetes.io/component: {{ $consumerName }}-cfp + spec: + imagePullSecrets: + - name: acr-secret + containers: + - name: {{ $consumerName }}-cfp + image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag }}" + imagePullPolicy: {{ $.Values.image.pullPolicy }} + args: ["--mode", "{{ $consumer.mode }}"] + ports: + - containerPort: 8080 + name: health + env: + {{- include "cosmos-soak.cosmosEnv" $ | nindent 12 }} + resources: + {{- toYaml $consumer.resources | nindent 12 }} + livenessProbe: + httpGet: + path: {{ $.Values.probes.liveness.path }} + port: {{ $.Values.probes.liveness.port }} + initialDelaySeconds: {{ $.Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ $.Values.probes.liveness.periodSeconds }} + readinessProbe: + httpGet: + path: {{ $.Values.probes.readiness.path }} + port: {{ $.Values.probes.readiness.port }} + initialDelaySeconds: {{ $.Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ $.Values.probes.readiness.periodSeconds }} +--- +# Headless Service for StatefulSet DNS +apiVersion: v1 +kind: Service +metadata: + name: {{ $.Release.Name }}-{{ $consumerName }}-cfp + namespace: {{ $.Values.namespace }} + labels: + {{- include "cosmos-soak.labels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $consumerName }}-cfp +spec: + clusterIP: None + selector: + {{- include "cosmos-soak.selectorLabels" $ | nindent 4 }} + app.kubernetes.io/component: {{ $consumerName }}-cfp + ports: + - port: 8080 + name: health +{{- end }} +{{- end }} diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/health-monitor-cronjob.yaml b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/health-monitor-cronjob.yaml new file mode 100644 index 000000000000..ffab3a311c74 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/health-monitor-cronjob.yaml @@ -0,0 +1,47 @@ +{{- if .Values.healthMonitor.enabled }} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Release.Name }}-health-monitor + namespace: {{ .Values.namespace }} + labels: + {{- include "cosmos-soak.labels" . | nindent 4 }} + app.kubernetes.io/component: health-monitor +spec: + schedule: {{ .Values.healthMonitor.schedule | quote }} + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 5 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 1 + template: + metadata: + labels: + {{- include "cosmos-soak.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: health-monitor + spec: + restartPolicy: Never + imagePullSecrets: + - name: acr-secret + containers: + - name: health-monitor + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - "--mode" + - "health-monitor" + - "--run-id" + - "{{ .Release.Name }}" + - "--gap-sla-minutes" + - "{{ .Values.healthMonitor.gapSlaMinutes }}" + env: + {{- include "cosmos-soak.cosmosEnv" . | nindent 16 }} + resources: + requests: + cpu: "250m" + memory: "256Mi" + limits: + cpu: "500m" + memory: "512Mi" +{{- end }} diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/ingestor-deployment.yaml b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/ingestor-deployment.yaml new file mode 100644 index 000000000000..f6aef0f2b4c1 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/ingestor-deployment.yaml @@ -0,0 +1,48 @@ +{{- if .Values.ingestor.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-ingestor + namespace: {{ .Values.namespace }} + labels: + {{- include "cosmos-soak.labels" . | nindent 4 }} + app.kubernetes.io/component: ingestor +spec: + replicas: {{ .Values.ingestor.replicas }} + selector: + matchLabels: + {{- include "cosmos-soak.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ingestor + template: + metadata: + labels: + {{- include "cosmos-soak.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: ingestor + spec: + imagePullSecrets: + - name: acr-secret + containers: + - name: ingestor + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: ["--mode", "{{ .Values.ingestor.mode }}"] + ports: + - containerPort: 8080 + name: health + env: + {{- include "cosmos-soak.cosmosEnv" . | nindent 12 }} + resources: + {{- toYaml .Values.ingestor.resources | nindent 12 }} + livenessProbe: + httpGet: + path: {{ .Values.probes.liveness.path }} + port: {{ .Values.probes.liveness.port }} + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + readinessProbe: + httpGet: + path: {{ .Values.probes.readiness.path }} + port: {{ .Values.probes.readiness.port }} + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} +{{- end }} diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/values.yaml b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/values.yaml new file mode 100644 index 000000000000..6b201871c18e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/chart/values.yaml @@ -0,0 +1,90 @@ +# Default values for cosmos-soak Helm chart. +# Override with values-avad.yaml or your own values file. + +namespace: cosmos-soak + +image: + repository: abhmavadsoakacr.azurecr.io/cosmos-avad-test + tag: latest + pullPolicy: Always + +# Ingestor deployment +ingestor: + enabled: true + replicas: 3 + mode: ingestor + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "1000m" + memory: "1Gi" + +# AVAD CFP consumer (StatefulSet for stable hostnames) +avadConsumer: + enabled: true + replicas: 20 + mode: avad-reader + leasePrefix: "avad-" + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "1000m" + memory: "1Gi" + +# LV CFP consumer (StatefulSet, parity baseline) +lvConsumer: + enabled: true + replicas: 20 + mode: lv-reader + leasePrefix: "lv-" + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "1000m" + memory: "1Gi" + +# Health monitor CronJob +healthMonitor: + enabled: true + schedule: "*/5 * * * *" + gapSlaMinutes: 10 + +# Cosmos DB configuration +cosmos: + endpoint: "" + database: "graph_db" + feedContainer: "avad-test" + leaseContainer: "avad-test-leases" + preferredRegion: "West Central US" + opsPerSec: 5000 + docSizeBytes: 1024 + logicalPartitionCount: 100000 + durationSeconds: 0 # 0 = run forever + workerCount: 2 + +# Key Vault CSI +keyVault: + enabled: true + vaultName: "abhm-avad-soak-kv" + tenantId: "" + clientId: "" + secretName: "cosmos-key" + +# Probes +probes: + liveness: + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + readiness: + path: /ready + port: 8080 + initialDelaySeconds: 15 + periodSeconds: 5 diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-acr.sh b/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-acr.sh new file mode 100644 index 000000000000..a382d0cf863b --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-acr.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# ============================================================================= +# ACR Setup + Image Build/Push +# ============================================================================= + +set -euo pipefail + +SUBSCRIPTION="${SUBSCRIPTION:-b31b6408-0fb5-4688-9a3c-33ffb3983297}" +RG="${RG:-abhm-rg}" +ACR_NAME="${ACR_NAME:-abhmavadsoakacr}" +IMAGE_NAME="${IMAGE_NAME:-cosmos-avad-test}" +IMAGE_TAG="${IMAGE_TAG:-latest}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$SCRIPT_DIR/../.." + +az account set --subscription "$SUBSCRIPTION" + +echo "=== ACR Setup ===" + +# Create ACR (if not exists) +az acr create \ + --resource-group "$RG" \ + --name "$ACR_NAME" \ + --sku Basic \ + --output none 2>/dev/null || echo "ACR already exists" + +# Attach ACR to AKS (if AKS exists) +AKS_CLUSTER="${AKS_CLUSTER:-abhm-avad-soak-aks}" +az aks update \ + --resource-group "$RG" \ + --name "$AKS_CLUSTER" \ + --attach-acr "$ACR_NAME" \ + --output none 2>/dev/null || echo "AKS-ACR attachment skipped" + +echo "=== Building + pushing image ===" + +# Build and push using ACR Tasks (no local Docker needed) +az acr build \ + --registry "$ACR_NAME" \ + --image "${IMAGE_NAME}:${IMAGE_TAG}" \ + --file "$PROJECT_DIR/Dockerfile" \ + "$PROJECT_DIR" + +echo "Image pushed: ${ACR_NAME}.azurecr.io/${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-aks.sh b/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-aks.sh new file mode 100644 index 000000000000..c0ebec583eec --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-aks.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# ============================================================================= +# AKS Cluster Setup for Cosmos DB Soak Testing +# ============================================================================= +# Creates an AKS cluster in the abhm-rg resource group. +# Reusable: change variables for your own resource group/subscription. +# ============================================================================= + +set -euo pipefail + +SUBSCRIPTION="${SUBSCRIPTION:-b31b6408-0fb5-4688-9a3c-33ffb3983297}" +RG="${RG:-abhm-rg}" +LOCATION="${LOCATION:-eastus}" +CLUSTER_NAME="${CLUSTER_NAME:-abhm-avad-soak-aks}" +NODE_COUNT="${NODE_COUNT:-3}" +NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D4s_v5}" +K8S_VERSION="${K8S_VERSION:-1.29}" + +echo "=== Setting up AKS cluster ===" +echo " Subscription: $SUBSCRIPTION" +echo " Resource Group: $RG" +echo " Cluster: $CLUSTER_NAME" +echo " Nodes: $NODE_COUNT x $NODE_VM_SIZE" + +az account set --subscription "$SUBSCRIPTION" + +# Create cluster +az aks create \ + --resource-group "$RG" \ + --name "$CLUSTER_NAME" \ + --location "$LOCATION" \ + --node-count "$NODE_COUNT" \ + --node-vm-size "$NODE_VM_SIZE" \ + --kubernetes-version "$K8S_VERSION" \ + --enable-managed-identity \ + --enable-addons monitoring \ + --generate-ssh-keys \ + --output none + +echo "AKS cluster created" + +# Get credentials +az aks get-credentials \ + --resource-group "$RG" \ + --name "$CLUSTER_NAME" \ + --overwrite-existing + +echo "kubectl context set to $CLUSTER_NAME" +kubectl get nodes diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-cosmos.sh b/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-cosmos.sh new file mode 100644 index 000000000000..7bd5e33e31de --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-cosmos.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# ============================================================================= +# Cosmos DB Container Setup for Soak Testing +# ============================================================================= +# Creates required containers in an existing Cosmos account. +# Idempotent — safe to run multiple times. +# ============================================================================= + +set -euo pipefail + +COSMOS_ACCOUNT="${COSMOS_ACCOUNT:-abhm-cfp-region-test}" +COSMOS_RG="${COSMOS_RG:-abhm-rg}" +COSMOS_DB="${COSMOS_DB:-graph_db}" +SUBSCRIPTION="${SUBSCRIPTION:-b31b6408-0fb5-4688-9a3c-33ffb3983297}" + +az account set --subscription "$SUBSCRIPTION" + +echo "=== Setting up Cosmos DB containers ===" + +# Feed container (AVAD-enabled, /tenantId PK) +echo "Creating feed container: avad-test" +az cosmosdb sql container create \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "avad-test" \ + --partition-key-path "/tenantId" \ + --throughput 10000 \ + --output none 2>/dev/null || echo " already exists" + +# Lease container (/id PK) +echo "Creating lease container: avad-test-leases" +az cosmosdb sql container create \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "avad-test-leases" \ + --partition-key-path "/id" \ + --throughput 1000 \ + --output none 2>/dev/null || echo " already exists" + +# Reconciliation container (/correlationId PK, TTL 24h) +echo "Creating reconciliation container" +az cosmosdb sql container create \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "reconciliation" \ + --partition-key-path "/correlationId" \ + --throughput 5000 \ + --ttl 86400 \ + --output none 2>/dev/null || echo " already exists" + +# Soak health container (/runId PK, TTL 30 days) +echo "Creating soak-health container" +az cosmosdb sql container create \ + --account-name "$COSMOS_ACCOUNT" \ + --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DB" \ + --name "soak-health" \ + --partition-key-path "/runId" \ + --throughput 400 \ + --ttl 2592000 \ + --output none 2>/dev/null || echo " already exists" + +echo "=== All containers ready ===" diff --git a/sdk/cosmos/azure-cosmos-avad-test/pom.xml b/sdk/cosmos/azure-cosmos-avad-test/pom.xml new file mode 100644 index 000000000000..fdaa4ea4e62e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/pom.xml @@ -0,0 +1,202 @@ + + + 4.0.0 + + com.azure + azure-client-sdk-parent + 1.7.0 + ../../parents/azure-client-sdk-parent + + + com.azure + azure-cosmos-avad-test + 1.0.0-beta.1 + Microsoft Azure Cosmos DB - AVAD Change Feed Processor Soak Test + Soak test and correctness validation for All Versions and Deletes (AVAD) Change Feed Processor + https://github.com/Azure/azure-sdk-for-java + + + + azure-java-build-docs + ${site.url}/site/${project.artifactId} + + + + + https://github.com/Azure/azure-sdk-for-java + + + + + UTF-8 + 17 + 17 + 0.01 + 0.01 + true + true + true + true + - + true + + + + + + com.azure + azure-cosmos + 4.81.0-beta.1 + + + + + info.picocli + picocli + 4.7.6 + + + + + ch.qos.logback + logback-classic + 1.3.14 + + + + org.slf4j + slf4j-api + 1.7.36 + + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.6.0 + + + package + shade + + + + com.azure.cosmos.avadtest.Main + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + false + + + + + + + org.codehaus.mojo + exec-maven-plugin + 3.5.1 + + com.azure.cosmos.avadtest.Main + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.3 + + true + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.6.0 + + false + false + + + + + + com.github.spotbugs + spotbugs-maven-plugin + 4.8.3.1 + + false + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.14.0 + + + default-compile + + 17 + 17 + false + + + + base-compile + compile + + 17 + 17 + 17 + false + + + + + 17 + 17 + false + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.6.1 + + + + + info.picocli:picocli:[4.7.6] + org.slf4j:slf4j-api:[1.7.36] + ch.qos.logback:logback-classic:[1.3.14] + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-avad-test/run-cutover.sh b/sdk/cosmos/azure-cosmos-avad-test/run-cutover.sh new file mode 100644 index 000000000000..44efcd677ec1 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/run-cutover.sh @@ -0,0 +1,382 @@ +#!/bin/bash +# ============================================================================= +# AVAD Cut-Over Test Orchestrator +# ============================================================================= +# Runs from your dev box. Coordinates across 2 VMs via SSH. +# +# Architecture: +# Dev box (this script) ──SSH──► EUS VM (ingestor) +# ──SSH──► WCUS VM (lv-reader, avad-reader) +# +# Sequence: +# 1. Start ingestor on EUS VM +# 2. Wait for warm-up (configurable) +# 3. Start LV CFP reader on WCUS VM +# 4. Wait for LV warm-up (verify events flowing) +# 5. Start AVAD CFP reader on WCUS VM (staggered start) +# 6. Run all 3 concurrently for PARALLEL_DURATION +# 7. Stop LV reader → verify AVAD continues +# 8. Run AVAD-only for AVAD_ONLY_DURATION +# 9. Stop all +# 10. Collect logs from both VMs +# 11. Run reconciler locally +# ============================================================================= + +set -euo pipefail + +# ── Cleanup trap — stops all remote processes on error/exit ──────────────── +INGESTOR_PID="" +LV_PID="" +AVAD_PID="" +SPARK_LV_RUN_ID="" +SPARK_AVAD_RUN_ID="" + +cleanup() { + log "=== Cleanup triggered ===" + [ -n "$AVAD_PID" ] && stop_remote "$READER_VM" "$AVAD_PID" "avad-reader" 10 2>/dev/null || true + [ -n "$LV_PID" ] && stop_remote "$READER_VM" "$LV_PID" "lv-reader" 10 2>/dev/null || true + [ -n "$INGESTOR_PID" ] && stop_remote "$INGESTOR_VM" "$INGESTOR_PID" "ingestor" 10 2>/dev/null || true + [ -n "$SPARK_LV_RUN_ID" ] && stop_spark_job "$SPARK_LV_RUN_ID" "LV Spark" 2>/dev/null || true + [ -n "$SPARK_AVAD_RUN_ID" ] && stop_spark_job "$SPARK_AVAD_RUN_ID" "AVAD Spark" 2>/dev/null || true + log "=== Cleanup complete ===" +} +trap cleanup EXIT ERR INT TERM + +# ── Configuration ────────────────────────────────────────────────────────── +INGESTOR_VM="azureuser@" +READER_VM="azureuser@" + +# Databricks workspace +DATABRICKS_HOST="${DATABRICKS_HOST:?Set DATABRICKS_HOST (e.g. https://adb-xxx.azuredatabricks.net)}" +DATABRICKS_TOKEN="${DATABRICKS_TOKEN:?Set DATABRICKS_TOKEN}" +SPARK_LV_JOB_ID="${SPARK_LV_JOB_ID:?Set SPARK_LV_JOB_ID (Databricks job ID for LV Spark reader)}" +SPARK_AVAD_JOB_ID="${SPARK_AVAD_JOB_ID:?Set SPARK_AVAD_JOB_ID (Databricks job ID for AVAD Spark reader)}" + +JAR="cosmos-avad-test-1.0-SNAPSHOT.jar" +JAR_PATH="/home/azureuser/$JAR" + +# Cosmos config (set these or export before running) +export COSMOS_ENDPOINT="${COSMOS_ENDPOINT:?Set COSMOS_ENDPOINT}" +export COSMOS_KEY="${COSMOS_KEY:?Set COSMOS_KEY}" +export COSMOS_DATABASE="${COSMOS_DATABASE:-graph_db}" +export COSMOS_FEED_CONTAINER="${COSMOS_FEED_CONTAINER:-avad-test}" +export COSMOS_LEASE_CONTAINER="${COSMOS_LEASE_CONTAINER:-avad-test-leases}" +export COSMOS_PREFERRED_REGION="${COSMOS_PREFERRED_REGION:-West Central US}" +export OPS_PER_SEC="${OPS_PER_SEC:-10}" + +# Timing +INGESTOR_WARMUP_SEC=60 # Let ingestor run before starting readers +LV_WARMUP_SEC=120 # Let LV reader run before starting AVAD +PARALLEL_DURATION_SEC=1800 # All workloads running before split #1 (30 min) +SPLIT1_SETTLE_SEC=300 # Wait for split #1 to complete (5 min) +SPLIT2_SETTLE_SEC=300 # Wait for split #2 to complete (5 min) +POST_SPLIT_DURATION_SEC=1800 # Run after splits to verify no gaps (30 min) +AVAD_ONLY_DURATION_SEC=1800 # AVAD-only after LV shutdown (30 min) +SPARK_AVAD_DELAY_SEC=120 # Delay before starting AVAD Spark after LV Spark + +# Throughput levels for 2-level split test +# Start at 10K (~1 PP) → 50K (~5 PP, split #1) → 100K (~10 PP, split #2) +INITIAL_THROUGHPUT=10000 +SPLIT1_THROUGHPUT=50000 +SPLIT2_THROUGHPUT=100000 + +# Cosmos container config (for throughput scaling) +COSMOS_ACCOUNT="${COSMOS_ACCOUNT:-abhm-cfp-region-test}" +COSMOS_RG="${COSMOS_RG:-abhm-rg}" + +# Local output directory +OUTPUT_DIR="./cutover-results-$(date +%Y%m%d-%H%M%S)" +mkdir -p "$OUTPUT_DIR" + +# ── Helper functions ─────────────────────────────────────────────────────── + +log() { echo "[$(date '+%H:%M:%S')] $*"; } + +ssh_cmd() { + local vm="$1"; shift + ssh -o StrictHostKeyChecking=no "$vm" "$@" +} + +# Start a process on a remote VM, returns the remote PID +start_remote() { + local vm="$1" + local mode="$2" + local log_file="$3" + local extra_env="${4:-}" + + log "Starting --mode $mode on $vm" + local pid + pid=$(ssh_cmd "$vm" " + export COSMOS_ENDPOINT='$COSMOS_ENDPOINT' + export COSMOS_KEY='$COSMOS_KEY' + export COSMOS_DATABASE='$COSMOS_DATABASE' + export COSMOS_FEED_CONTAINER='$COSMOS_FEED_CONTAINER' + export COSMOS_LEASE_CONTAINER='$COSMOS_LEASE_CONTAINER' + export COSMOS_PREFERRED_REGION='$COSMOS_PREFERRED_REGION' + export OPS_PER_SEC='$OPS_PER_SEC' + $extra_env + nohup java -jar $JAR_PATH --mode $mode > $log_file 2>&1 & + echo \$! + ") + log " PID: $pid" + echo "$pid" +} + +# Stop a process on a remote VM by PID with graceful timeout +stop_remote() { + local vm="$1" + local pid="$2" + local label="$3" + local timeout="${4:-30}" # default 30s graceful shutdown + + log "Stopping $label (PID $pid) on $vm, timeout=${timeout}s" + ssh_cmd "$vm" "kill $pid 2>/dev/null || true" + + # Wait for graceful exit with timeout loop + local elapsed=0 + while [ $elapsed -lt $timeout ]; do + if ! ssh_cmd "$vm" "kill -0 $pid 2>/dev/null"; then + log " $label exited gracefully after ${elapsed}s" + return 0 + fi + sleep 2 + elapsed=$((elapsed + 2)) + done + + # Force kill only if still running + log " $label still running after ${timeout}s, sending SIGKILL" + ssh_cmd "$vm" "kill -9 $pid 2>/dev/null || true" + sleep 2 + log " $label force-killed" +} + +# Collect a file from a remote VM +collect_log() { + local vm="$1" + local remote_path="$2" + local local_name="$3" + + log "Collecting $remote_path from $vm" + scp -o StrictHostKeyChecking=no "$vm:$remote_path" "$OUTPUT_DIR/$local_name" 2>/dev/null || \ + log " WARNING: Could not collect $remote_path" +} + +# ── Databricks helpers ───────────────────────────────────────────────────── + +dbx_api() { + local method="$1" + local endpoint="$2" + shift 2 + curl -s -X "$method" \ + "$DATABRICKS_HOST/api/2.1/jobs$endpoint" \ + -H "Authorization: Bearer $DATABRICKS_TOKEN" \ + -H "Content-Type: application/json" \ + "$@" +} + +# Start a Databricks job, returns run_id +start_spark_job() { + local job_id="$1" + local label="$2" + + log "Starting Spark $label (job_id=$job_id)" + local response + response=$(dbx_api POST "/run-now" -d "{\"job_id\": $job_id}") + local run_id + run_id=$(echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('run_id',''))" 2>/dev/null) + + if [ -z "$run_id" ]; then + log " ERROR: Failed to start Spark $label: $response" + echo "" + else + log " Spark $label run_id: $run_id" + echo "$run_id" + fi +} + +# Cancel a Databricks run +stop_spark_job() { + local run_id="$1" + local label="$2" + + if [ -n "$run_id" ]; then + log "Cancelling Spark $label (run_id=$run_id)" + dbx_api POST "/runs/cancel" -d "{\"run_id\": $run_id}" > /dev/null + log " Spark $label cancelled" + fi +} + +# Get Spark run status +spark_status() { + local run_id="$1" + dbx_api GET "/runs/get?run_id=$run_id" | \ + python3 -c "import sys,json; r=json.load(sys.stdin); print(r.get('state',{}).get('life_cycle_state','UNKNOWN'))" 2>/dev/null +} + +# ── Pre-flight checks ───────────────────────────────────────────────────── + +log "=== AVAD Cut-Over Test ===" +log "Ingestor VM: $INGESTOR_VM" +log "Reader VM: $READER_VM" +log "Databricks: $DATABRICKS_HOST" +log "Spark LV job: $SPARK_LV_JOB_ID" +log "Spark AVAD job: $SPARK_AVAD_JOB_ID" +log "Output dir: $OUTPUT_DIR" +log "" + +log "Checking JAR exists on both VMs..." +ssh_cmd "$INGESTOR_VM" "test -f $JAR_PATH" || { log "ERROR: JAR not found on ingestor VM"; exit 1; } +ssh_cmd "$READER_VM" "test -f $JAR_PATH" || { log "ERROR: JAR not found on reader VM"; exit 1; } +log " ✅ JAR found on both VMs" + +log "Checking Java on both VMs..." +ssh_cmd "$INGESTOR_VM" "java -version" 2>&1 | head -1 +ssh_cmd "$READER_VM" "java -version" 2>&1 | head -1 +log "" + +# ── Phase 0: Scale container to initial throughput ───────────────────────── + +log "=== Phase 0: Scaling container to ${INITIAL_THROUGHPUT} RU/s (~1 PP) ===" +az cosmosdb sql container throughput update \ + --account-name "$COSMOS_ACCOUNT" --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DATABASE" --name "$COSMOS_FEED_CONTAINER" \ + --throughput "$INITIAL_THROUGHPUT" -o none 2>&1 || log " WARNING: throughput update failed" +log " Container at ${INITIAL_THROUGHPUT} RU/s. Waiting 60s for propagation..." +sleep 60 + +# ── Phase 1: Start ingestor ─────────────────────────────────────────────── + +log "=== Phase 1: Ingestor warm-up ($INGESTOR_WARMUP_SEC sec) ===" +INGESTOR_PID=$(start_remote "$INGESTOR_VM" "ingestor" "/home/azureuser/ingestor.log" \ + "export PRODUCED_LOG=/home/azureuser/produced.log") +sleep "$INGESTOR_WARMUP_SEC" +log " Ingestor warm-up complete" + +# ── Phase 2: Start LV reader + LV Spark ─────────────────────────────────── + +log "=== Phase 2: LV Reader + LV Spark warm-up ($LV_WARMUP_SEC sec) ===" +LV_PID=$(start_remote "$READER_VM" "lv-reader" "/home/azureuser/lv-reader.log" \ + "export CONSUMED_LOG=/home/azureuser/consumed-lv.log") +SPARK_LV_RUN_ID=$(start_spark_job "$SPARK_LV_JOB_ID" "LV reader") +sleep "$LV_WARMUP_SEC" +log " LV reader + LV Spark warm-up complete" + +# ── Phase 3: Start AVAD reader + AVAD Spark (staggered) ──────────────────── + +log "=== Phase 3: AVAD Reader + AVAD Spark starting (staggered after LV) ===" +AVAD_PID=$(start_remote "$READER_VM" "avad-reader" "/home/azureuser/avad-reader.log" \ + "export CONSUMED_LOG=/home/azureuser/consumed-avad.log") +log " AVAD CFP started, waiting ${SPARK_AVAD_DELAY_SEC}s before starting AVAD Spark..." +sleep "$SPARK_AVAD_DELAY_SEC" +SPARK_AVAD_RUN_ID=$(start_spark_job "$SPARK_AVAD_JOB_ID" "AVAD reader") +log " AVAD reader + AVAD Spark started" + +# ── Phase 4: Parallel run before splits ──────────────────────────────────── + +log "=== Phase 4: Parallel run — all 5 workloads at ${INITIAL_THROUGHPUT} RU/s ($PARALLEL_DURATION_SEC sec) ===" +log " All 5 workloads running on ~1 physical partition. Waiting..." +sleep "$PARALLEL_DURATION_SEC" + +# ── Phase 4a: Split #1 — scale 10K → 50K (~1 PP → ~5 PP) ────────────────── + +log "=== Phase 4a: Split #1 — scaling ${INITIAL_THROUGHPUT} → ${SPLIT1_THROUGHPUT} RU/s ===" +az cosmosdb sql container throughput update \ + --account-name "$COSMOS_ACCOUNT" --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DATABASE" --name "$COSMOS_FEED_CONTAINER" \ + --throughput "$SPLIT1_THROUGHPUT" -o none 2>&1 || log " WARNING: throughput update failed" +log " Throughput update submitted. Waiting ${SPLIT1_SETTLE_SEC}s for split to complete..." +sleep "$SPLIT1_SETTLE_SEC" +log " Split #1 settle complete" + +# ── Phase 4b: Split #2 — scale 50K → 100K (~5 PP → ~10 PP) ──────────────── + +log "=== Phase 4b: Split #2 — scaling ${SPLIT1_THROUGHPUT} → ${SPLIT2_THROUGHPUT} RU/s ===" +az cosmosdb sql container throughput update \ + --account-name "$COSMOS_ACCOUNT" --resource-group "$COSMOS_RG" \ + --database-name "$COSMOS_DATABASE" --name "$COSMOS_FEED_CONTAINER" \ + --throughput "$SPLIT2_THROUGHPUT" -o none 2>&1 || log " WARNING: throughput update failed" +log " Throughput update submitted. Waiting ${SPLIT2_SETTLE_SEC}s for split to complete..." +sleep "$SPLIT2_SETTLE_SEC" +log " Split #2 settle complete" + +# ── Phase 4c: Post-split run — verify no events missed ───────────────────── + +log "=== Phase 4c: Post-split run — all workloads on ~10 PPs ($POST_SPLIT_DURATION_SEC sec) ===" +log " Verifying CFP handled both splits. Waiting..." +sleep "$POST_SPLIT_DURATION_SEC" + +# ── Phase 5: Stop LV reader + LV Spark ──────────────────────────────────── + +log "=== Phase 5: Stopping LV Reader + LV Spark (AVAD continues) ===" +stop_remote "$READER_VM" "$LV_PID" "lv-reader" +stop_spark_job "$SPARK_LV_RUN_ID" "LV reader" + +# ── Phase 6: AVAD-only run ───────────────────────────────────────────────── + +log "=== Phase 6: AVAD-only run ($AVAD_ONLY_DURATION_SEC sec) ===" +log " Ingestor + AVAD CFP + AVAD Spark running. LV stopped. Waiting..." +sleep "$AVAD_ONLY_DURATION_SEC" + +# ── Phase 7: Stop all ────────────────────────────────────────────────────── + +log "=== Phase 7: Stopping all workloads ===" +stop_remote "$READER_VM" "$AVAD_PID" "avad-reader" +stop_spark_job "$SPARK_AVAD_RUN_ID" "AVAD reader" +stop_remote "$INGESTOR_VM" "$INGESTOR_PID" "ingestor" + +# ── Phase 8: Collect logs ────────────────────────────────────────────────── + +log "=== Phase 8: Collecting logs ===" +collect_log "$INGESTOR_VM" "/home/azureuser/produced.log" "produced.log" +collect_log "$INGESTOR_VM" "/home/azureuser/ingestor.log" "ingestor.log" +collect_log "$READER_VM" "/home/azureuser/consumed-lv.log" "consumed-lv.log" +collect_log "$READER_VM" "/home/azureuser/consumed-avad.log" "consumed-avad.log" +collect_log "$READER_VM" "/home/azureuser/lv-reader.log" "lv-reader.log" +collect_log "$READER_VM" "/home/azureuser/avad-reader.log" "avad-reader.log" + +# Collect Spark run details +log "Collecting Spark run status..." +if [ -n "$SPARK_LV_RUN_ID" ]; then + dbx_api GET "/runs/get?run_id=$SPARK_LV_RUN_ID" > "$OUTPUT_DIR/spark-lv-run.json" + log " Spark LV final status: $(spark_status "$SPARK_LV_RUN_ID")" +fi +if [ -n "$SPARK_AVAD_RUN_ID" ]; then + dbx_api GET "/runs/get?run_id=$SPARK_AVAD_RUN_ID" > "$OUTPUT_DIR/spark-avad-run.json" + log " Spark AVAD final status: $(spark_status "$SPARK_AVAD_RUN_ID")" +fi + +# ── Phase 9: Reconciliation ─────────────────────────────────────────────── + +log "=== Phase 9: Running reconciliation ===" + +log "Gap detection: produced vs AVAD consumed" +java -jar "target/$JAR" --mode reconcile \ + --produced "$OUTPUT_DIR/produced.log" \ + --consumed "$OUTPUT_DIR/consumed-avad.log" \ + | tee "$OUTPUT_DIR/reconcile-avad.txt" +AVAD_EXIT=$? + +log "Gap detection: produced vs LV consumed" +java -jar "target/$JAR" --mode reconcile \ + --produced "$OUTPUT_DIR/produced.log" \ + --consumed "$OUTPUT_DIR/consumed-lv.log" \ + | tee "$OUTPUT_DIR/reconcile-lv.txt" +LV_EXIT=$? + +log "Parity check: LV vs AVAD" +java -jar "target/$JAR" --mode reconcile \ + --lv "$OUTPUT_DIR/consumed-lv.log" \ + --avad "$OUTPUT_DIR/consumed-avad.log" \ + | tee "$OUTPUT_DIR/parity.txt" +PARITY_EXIT=$? + +# ── Summary ──────────────────────────────────────────────────────────────── + +log "=== RESULTS ===" +log " AVAD gap check: $([ $AVAD_EXIT -eq 0 ] && echo '✅ PASS' || echo '❌ FAIL')" +log " LV gap check: $([ $LV_EXIT -eq 0 ] && echo '✅ PASS' || echo '❌ FAIL')" +log " LV↔AVAD parity: $([ $PARITY_EXIT -eq 0 ] && echo '✅ PASS' || echo '❌ FAIL')" +log " Results in: $OUTPUT_DIR" +log "=== DONE ===" + +exit $(( AVAD_EXIT + LV_EXIT + PARITY_EXIT )) diff --git a/sdk/cosmos/azure-cosmos-avad-test/run-soak.sh b/sdk/cosmos/azure-cosmos-avad-test/run-soak.sh new file mode 100644 index 000000000000..92e85c8a2e44 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/run-soak.sh @@ -0,0 +1,242 @@ +#!/bin/bash +# ============================================================================= +# AVAD Soak Test Orchestrator +# ============================================================================= +# Single script that manages the full soak test lifecycle: +# 1. Deploy workloads via Helm +# 2. Run phase-based chaos (warm-up → steady → chaos → recovery → repeat) +# 3. Continuous reconciliation (no missed changes, AVAD ⊇ LV) +# 4. Collect results on exit +# +# Prerequisites: +# - AKS cluster configured (kubectl context set) +# - Helm 3 installed +# - ACR image pushed (see infra/scripts/setup-acr.sh) +# - Cosmos containers created (see infra/scripts/setup-cosmos.sh) +# - az CLI logged in (for partition split + lease throttle) +# ============================================================================= + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# ── Configuration ───────────────────────────────────────────────────────── +NAMESPACE="${NAMESPACE:-cosmos-soak}" +RELEASE="${RELEASE:-cosmos-soak}" +VALUES_FILE="${VALUES_FILE:-$SCRIPT_DIR/infra/chart/values.yaml}" +VALUES_OVERRIDE="${VALUES_OVERRIDE:-}" +CHAOS_SCHEDULE="${CHAOS_SCHEDULE:-$SCRIPT_DIR/chaos/chaos-schedule.yaml}" + +# Timing (all in seconds) +SOAK_DURATION_HOURS="${SOAK_DURATION_HOURS:-24}" +WARMUP_SEC="${WARMUP_SEC:-1800}" # 30 min +STEADY_SEC="${STEADY_SEC:-3600}" # 60 min +RECOVERY_SEC="${RECOVERY_SEC:-1800}" # 30 min default +HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-300}" # 5 min + +# Behavior +CHAOS_ENABLED="${CHAOS_ENABLED:-true}" +ABORT_ON_GAP="${ABORT_ON_GAP:-false}" + +# Cosmos (for chaos scripts) +export COSMOS_ACCOUNT="${COSMOS_ACCOUNT:-abhm-cfp-region-test}" +export COSMOS_RG="${COSMOS_RG:-abhm-rg}" +export COSMOS_DB="${COSMOS_DB:-graph_db}" +export FEED_CONTAINER="${FEED_CONTAINER:-avad-test}" +export LEASE_CONTAINER="${LEASE_CONTAINER:-avad-test-leases}" + +# Output +OUTPUT_DIR="$SCRIPT_DIR/soak-results-$(date +%Y%m%d-%H%M%S)" +mkdir -p "$OUTPUT_DIR" + +SOAK_DURATION_SEC=$((SOAK_DURATION_HOURS * 3600)) +START_TIME=$(date +%s) +CHAOS_PID="" + +# ── Helper functions ────────────────────────────────────────────────────── + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$OUTPUT_DIR/soak.log"; } + +elapsed() { + local now=$(date +%s) + echo $(( now - START_TIME )) +} + +is_expired() { + [ "$SOAK_DURATION_SEC" -gt 0 ] && [ "$(elapsed)" -ge "$SOAK_DURATION_SEC" ] +} + +# ── Cleanup ─────────────────────────────────────────────────────────────── + +cleanup() { + log "=== Cleanup triggered ===" + [ -n "$CHAOS_PID" ] && kill "$CHAOS_PID" 2>/dev/null || true + + log "Collecting pod logs..." + for pod in $(kubectl get pods -n "$NAMESPACE" -o name 2>/dev/null); do + local name=$(basename "$pod") + kubectl logs "$pod" -n "$NAMESPACE" --tail=5000 \ + > "$OUTPUT_DIR/${name}.log" 2>/dev/null || true + done + + log "Collecting health metrics..." + kubectl get pods -n "$NAMESPACE" -o wide \ + > "$OUTPUT_DIR/pods-final.txt" 2>/dev/null || true + + log "Results saved to: $OUTPUT_DIR" + log "=== Soak test ended (elapsed: $(elapsed)s) ===" +} +trap cleanup EXIT ERR INT TERM + +# ── Health check ────────────────────────────────────────────────────────── + +check_health() { + log "Running health check..." + local healthy=true + + # Check all pods are running + local not_ready=$(kubectl get pods -n "$NAMESPACE" \ + --field-selector=status.phase!=Running \ + -o name 2>/dev/null | wc -l) + + if [ "$not_ready" -gt 0 ]; then + log " ⚠️ $not_ready pods not in Running state" + fi + + # Check metrics from ingestor pods + for pod in $(kubectl get pods -n "$NAMESPACE" \ + -l "app.kubernetes.io/component=ingestor" \ + -o jsonpath='{.items[*].metadata.name}'); do + local metrics=$(kubectl exec -n "$NAMESPACE" "$pod" -- \ + curl -s http://localhost:8080/metrics 2>/dev/null || echo "unreachable") + echo "$metrics" >> "$OUTPUT_DIR/metrics-$(date +%H%M%S).log" + + # Check for missing previousImage + local missing=$(echo "$metrics" | grep "previous_image_missing" | awk '{print $2}') + if [ -n "$missing" ] && [ "$missing" != "0" ]; then + log " ❌ Missing previousImage count: $missing" + healthy=false + fi + done + + if [ "$healthy" = true ]; then + log " ✅ Health check passed" + else + log " ❌ Health check FAILED" + if [ "$ABORT_ON_GAP" = "true" ]; then + log "ABORT_ON_GAP is set — stopping soak test" + exit 1 + fi + fi +} + +# ── Chaos runner (background) ──────────────────────────────────────────── + +run_chaos_loop() { + log "Chaos loop starting (schedule: $CHAOS_SCHEDULE)" + + # Simple time-based chaos: iterate through scenarios + local iteration=0 + while true; do + iteration=$((iteration + 1)) + log "=== Chaos iteration $iteration ===" + + # Pod kill (every 2 hours → check every loop) + if [ $((iteration % 1)) -eq 0 ]; then + log "Firing: pod-kill" + bash "$SCRIPT_DIR/chaos/scenarios/pod-kill.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" + sleep "$RECOVERY_SEC" + check_health + fi + + # Lease throttle (every other iteration) + if [ $((iteration % 2)) -eq 0 ]; then + log "Firing: lease-throttle" + bash "$SCRIPT_DIR/chaos/scenarios/lease-throttle.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" + sleep "$RECOVERY_SEC" + check_health + fi + + # Restart storm (every 4th iteration) + if [ $((iteration % 4)) -eq 0 ]; then + log "Firing: restart-storm" + bash "$SCRIPT_DIR/chaos/scenarios/restart-storm.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" + sleep "$RECOVERY_SEC" + check_health + fi + + # Partition split (every 6th iteration) + if [ $((iteration % 6)) -eq 0 ]; then + log "Firing: partition-split" + bash "$SCRIPT_DIR/chaos/scenarios/partition-split.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" + sleep "$RECOVERY_SEC" + check_health + fi + + # Steady state between chaos events + log "Steady state for ${STEADY_SEC}s..." + sleep "$STEADY_SEC" + + if is_expired; then + log "Soak duration expired, stopping chaos loop" + break + fi + done +} + +# ── Main ────────────────────────────────────────────────────────────────── + +log "=== AVAD Soak Test Starting ===" +log "Duration: ${SOAK_DURATION_HOURS}h (${SOAK_DURATION_SEC}s)" +log "Chaos: $CHAOS_ENABLED" +log "Output: $OUTPUT_DIR" + +# 1. Create namespace +kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - + +# 2. Deploy via Helm +log "Deploying workloads via Helm..." +HELM_ARGS=( + upgrade --install "$RELEASE" + "$SCRIPT_DIR/infra/chart" + --namespace "$NAMESPACE" + --values "$VALUES_FILE" +) +[ -n "$VALUES_OVERRIDE" ] && HELM_ARGS+=(--values "$VALUES_OVERRIDE") + +helm "${HELM_ARGS[@]}" +log "Helm deploy complete" + +# 3. Wait for warm-up +log "Warm-up phase (${WARMUP_SEC}s)..." +sleep 30 # let pods start scheduling + +# Wait for all pods to be ready +kubectl wait --for=condition=ready pods \ + --all -n "$NAMESPACE" \ + --timeout="${WARMUP_SEC}s" || { + log "⚠️ Not all pods ready after warm-up, continuing anyway" +} + +log "Warm-up complete" +check_health + +# 4. Start chaos loop in background (if enabled) +if [ "$CHAOS_ENABLED" = "true" ]; then + run_chaos_loop & + CHAOS_PID=$! + log "Chaos loop started (PID: $CHAOS_PID)" +fi + +# 5. Main monitoring loop +log "Entering main monitoring loop..." +while ! is_expired; do + sleep "$HEALTH_CHECK_INTERVAL" + check_health +done + +log "=== Soak duration reached (${SOAK_DURATION_HOURS}h) ===" +log "Final health check..." +check_health + +log "=== Soak Test Complete ===" diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/Main.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/Main.java new file mode 100644 index 000000000000..593141318d59 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/Main.java @@ -0,0 +1,142 @@ +package com.azure.cosmos.avadtest; + +import com.azure.cosmos.avadtest.config.TestConfig; +import com.azure.cosmos.avadtest.health.HealthMonitor; +import com.azure.cosmos.avadtest.health.HealthServer; +import com.azure.cosmos.avadtest.ingestor.Ingestor; +import com.azure.cosmos.avadtest.metrics.SoakMetrics; +import com.azure.cosmos.avadtest.reader.AvadReader; +import com.azure.cosmos.avadtest.reader.LatestVersionReader; +import com.azure.cosmos.avadtest.reconciliation.Reconciler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; + +import java.util.concurrent.Callable; + +@Command(name = "cosmos-avad-test", + mixinStandardHelpOptions = true, + version = "1.0", + description = "AVAD E2E test: ingestor, LV reader, AVAD reader, reconciler") +public final class Main implements Callable { + + private static final Logger log = LoggerFactory.getLogger(Main.class); + + @Option(names = "--mode", required = true, + description = "Mode: ingestor, lv-reader, avad-reader, reconcile") + private String mode; + + @Option(names = "--produced", description = "Produced log file (for reconcile mode)") + private String producedFile; + + @Option(names = "--consumed", description = "Consumed log file (for reconcile mode)") + private String consumedFile; + + @Option(names = "--lv", description = "LV consumed log file (for parity check)") + private String lvFile; + + @Option(names = "--avad", description = "AVAD consumed log file (for parity check)") + private String avadFile; + + @Option(names = "--health-port", defaultValue = "8080", + description = "Health server port (default: 8080)") + private int healthPort; + + @Option(names = "--run-id", defaultValue = "soak-default", + description = "Soak run identifier (for health monitor)") + private String runId; + + @Option(names = "--gap-sla-minutes", defaultValue = "10", + description = "Minutes before an unconsumed event is flagged as a gap") + private int gapSlaMinutes; + + @Override + public Integer call() throws Exception { + log.info("Starting cosmos-avad-test in mode: {}", mode); + + return switch (mode) { + case "ingestor" -> runIngestor(); + case "lv-reader" -> runLvReader(); + case "avad-reader" -> runAvadReader(); + case "reconcile" -> runReconcile(); + case "health-monitor" -> runHealthMonitor(); + default -> { + log.error("Unknown mode: {}. Use: ingestor, lv-reader, avad-reader, reconcile", mode); + yield 1; + } + }; + } + + private int runIngestor() throws Exception { + SoakMetrics metrics = new SoakMetrics(); + HealthServer healthServer = new HealthServer(metrics, healthPort); + healthServer.start(); + + TestConfig config = TestConfig.fromEnv(); + try (Ingestor ingestor = new Ingestor(config)) { + healthServer.setReady(true); + ingestor.run(); + return 0; + } finally { + healthServer.stop(); + } + } + + private int runLvReader() throws Exception { + SoakMetrics metrics = new SoakMetrics(); + HealthServer healthServer = new HealthServer(metrics, healthPort); + healthServer.start(); + + TestConfig config = TestConfig.fromEnv(); + try (LatestVersionReader reader = new LatestVersionReader(config)) { + healthServer.setReady(true); + reader.run(); + return 0; + } finally { + healthServer.stop(); + } + } + + private int runAvadReader() throws Exception { + SoakMetrics metrics = new SoakMetrics(); + HealthServer healthServer = new HealthServer(metrics, healthPort); + healthServer.start(); + + TestConfig config = TestConfig.fromEnv(); + try (AvadReader reader = new AvadReader(config)) { + healthServer.setReady(true); + reader.run(); + return 0; + } finally { + healthServer.stop(); + } + } + + private int runHealthMonitor() { + TestConfig config = TestConfig.fromEnv(); + HealthMonitor monitor = new HealthMonitor(config, runId, gapSlaMinutes); + try { + return monitor.runChecks(); + } finally { + monitor.close(); + } + } + + private int runReconcile() throws Exception { + if (producedFile != null && consumedFile != null) { + return Reconciler.reconcile(producedFile, consumedFile); + } else if (lvFile != null && avadFile != null) { + return Reconciler.parity(lvFile, avadFile); + } else { + log.error("Reconcile mode requires either --produced + --consumed or --lv + --avad"); + return 1; + } + } + + public static void main(String[] args) { + int exitCode = new CommandLine(new Main()).execute(args); + System.exit(exitCode); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java new file mode 100644 index 000000000000..2aaa4a9b58c8 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java @@ -0,0 +1,124 @@ +package com.azure.cosmos.avadtest.config; + +import java.util.List; + +/** + * Configuration loaded from environment variables or system properties. + * Env vars take precedence over system properties. + */ +public final class TestConfig { + + private final String endpoint; + private final String regionalEndpoint; + private final String key; + private final String database; + private final String feedContainer; + private final String leaseContainer; + private final String preferredRegion; + private final int opsPerSec; + private final int docSizeBytes; + private final int logicalPartitionCount; + private final String producedLogFile; + private final String consumedLogFile; + private final int durationSeconds; + private final int workerCount; + + private TestConfig(Builder builder) { + this.endpoint = builder.endpoint; + this.regionalEndpoint = builder.regionalEndpoint; + this.key = builder.key; + this.database = builder.database; + this.feedContainer = builder.feedContainer; + this.leaseContainer = builder.leaseContainer; + this.preferredRegion = builder.preferredRegion; + this.opsPerSec = builder.opsPerSec; + this.docSizeBytes = builder.docSizeBytes; + this.logicalPartitionCount = builder.logicalPartitionCount; + this.producedLogFile = builder.producedLogFile; + this.consumedLogFile = builder.consumedLogFile; + this.durationSeconds = builder.durationSeconds; + this.workerCount = builder.workerCount; + } + + public static TestConfig fromEnv() { + return new Builder() + .endpoint(env("COSMOS_ENDPOINT")) + .regionalEndpoint(envOrDefault("COSMOS_REGIONAL_ENDPOINT", "")) + .key(env("COSMOS_KEY")) + .database(envOrDefault("COSMOS_DATABASE", "graph_db")) + .feedContainer(envOrDefault("COSMOS_FEED_CONTAINER", "avad-test")) + .leaseContainer(envOrDefault("COSMOS_LEASE_CONTAINER", "avad-test-leases")) + .preferredRegion(envOrDefault("COSMOS_PREFERRED_REGION", "West Central US")) + .opsPerSec(Integer.parseInt(envOrDefault("OPS_PER_SEC", "5000"))) + .docSizeBytes(Integer.parseInt(envOrDefault("DOC_SIZE_BYTES", "1024"))) + .logicalPartitionCount(Integer.parseInt(envOrDefault("LOGICAL_PARTITION_COUNT", "100000"))) + .producedLogFile(envOrDefault("PRODUCED_LOG", "produced.log")) + .consumedLogFile(envOrDefault("CONSUMED_LOG", "consumed.log")) + .durationSeconds(Integer.parseInt(envOrDefault("DURATION_SECONDS", "3600"))) + .workerCount(Integer.parseInt(envOrDefault("WORKER_COUNT", "2"))) + .build(); + } + + private static String env(String name) { + String val = System.getenv(name); + if (val == null || val.isBlank()) { + val = System.getProperty(name); + } + if (val == null || val.isBlank()) { + throw new IllegalStateException("Required config missing: " + name); + } + return val; + } + + private static String envOrDefault(String name, String defaultVal) { + String val = System.getenv(name); + if (val == null || val.isBlank()) { + val = System.getProperty(name); + } + return (val != null && !val.isBlank()) ? val : defaultVal; + } + + public String endpoint() { return endpoint; } + public String regionalEndpoint() { return regionalEndpoint; } + /** Returns regional endpoint if set, otherwise global endpoint. For use by readers. */ + public String readerEndpoint() { + return (regionalEndpoint != null && !regionalEndpoint.isBlank()) ? regionalEndpoint : endpoint; + } + public String key() { return key; } + public String database() { return database; } + public String feedContainer() { return feedContainer; } + public String leaseContainer() { return leaseContainer; } + public String preferredRegion() { return preferredRegion; } + public List preferredRegions() { return List.of(preferredRegion); } + public int opsPerSec() { return opsPerSec; } + public int docSizeBytes() { return docSizeBytes; } + public int logicalPartitionCount() { return logicalPartitionCount; } + public String producedLogFile() { return producedLogFile; } + public String consumedLogFile() { return consumedLogFile; } + /** Duration in seconds. 0 = run forever until killed. */ + public int durationSeconds() { return durationSeconds; } + /** Number of CFP worker instances per reader mode. */ + public int workerCount() { return workerCount; } + + public static final class Builder { + private String endpoint, regionalEndpoint, key, database, feedContainer, leaseContainer; + private String preferredRegion, producedLogFile, consumedLogFile; + private int opsPerSec, docSizeBytes, logicalPartitionCount, durationSeconds, workerCount; + + public Builder endpoint(String v) { this.endpoint = v; return this; } + public Builder regionalEndpoint(String v) { this.regionalEndpoint = v; return this; } + public Builder key(String v) { this.key = v; return this; } + public Builder database(String v) { this.database = v; return this; } + public Builder feedContainer(String v) { this.feedContainer = v; return this; } + public Builder leaseContainer(String v) { this.leaseContainer = v; return this; } + public Builder preferredRegion(String v) { this.preferredRegion = v; return this; } + public Builder opsPerSec(int v) { this.opsPerSec = v; return this; } + public Builder docSizeBytes(int v) { this.docSizeBytes = v; return this; } + public Builder logicalPartitionCount(int v) { this.logicalPartitionCount = v; return this; } + public Builder producedLogFile(String v) { this.producedLogFile = v; return this; } + public Builder consumedLogFile(String v) { this.consumedLogFile = v; return this; } + public Builder durationSeconds(int v) { this.durationSeconds = v; return this; } + public Builder workerCount(int v) { this.workerCount = v; return this; } + public TestConfig build() { return new TestConfig(this); } + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java new file mode 100644 index 000000000000..1ba22846de78 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java @@ -0,0 +1,251 @@ +package com.azure.cosmos.avadtest.health; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.avadtest.config.TestConfig; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.PartitionKey; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; + +import java.time.Duration; +import java.time.Instant; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Online health monitor that queries the reconciliation + * container for gap detection and correctness checks. + * + * Designed to run as a standalone mode ("health-monitor") + * or as a K8s CronJob. Queries once, reports, and exits. + * + * Checks: + * 1. Gap detection — produced events not consumed within + * SLA window (default 10 min) + * 2. previousImage correctness — AVAD replace/delete + * events missing previousImage + * 3. Parity — LV events not in AVAD (AVAD ⊇ LV) + * + * Writes a health snapshot to the "soak-health" container. + */ +public final class HealthMonitor { + + private static final Logger log = LoggerFactory.getLogger(HealthMonitor.class); + private static final String RECONCILIATION_CONTAINER = "reconciliation"; + private static final String HEALTH_CONTAINER = "soak-health"; + + private final CosmosAsyncClient client; + private final CosmosAsyncContainer reconContainer; + private final CosmosAsyncContainer healthContainer; + private final String runId; + private final int gapSlaMinutes; + + public HealthMonitor(TestConfig config, String runId, int gapSlaMinutes) { + this.runId = runId; + this.gapSlaMinutes = gapSlaMinutes; + + this.client = new CosmosClientBuilder() + .endpoint(config.endpoint()) + .key(config.key()) + .gatewayMode() + .preferredRegions(config.preferredRegions()) + .buildAsyncClient(); + + this.reconContainer = client + .getDatabase(config.database()) + .getContainer(RECONCILIATION_CONTAINER); + + this.healthContainer = client + .getDatabase(config.database()) + .getContainer(HEALTH_CONTAINER); + } + + /** + * Run all health checks once and write a snapshot. + * Returns 0 if healthy, 1 if any check failed. + */ + public int runChecks() { + log.info("=== Health Monitor Check (runId={}) ===", runId); + Instant now = Instant.now(); + boolean healthy = true; + + // 1. Count produced events + long producedCount = countBySource("ingestor"); + log.info(" Produced (ingestor): {}", producedCount); + + // 2. Count AVAD consumed events + long avadConsumed = countBySource("cfp-avad"); + log.info(" AVAD consumed: {}", avadConsumed); + + // 3. Count LV consumed events + long lvConsumed = countBySource("cfp-lv"); + log.info(" LV consumed: {}", lvConsumed); + + // 4. Gap detection — produced but not in AVAD + // (older than SLA window) + long gapCount = countGaps("ingestor", "cfp-avad"); + log.info(" Gaps (produced not in AVAD, >{} min): {}", + gapSlaMinutes, gapCount); + if (gapCount > 0) { + log.error(" ❌ {} missed changes detected", gapCount); + healthy = false; + } + + // 5. Parity — LV not in AVAD + long parityGaps = countGaps("cfp-lv", "cfp-avad"); + log.info(" Parity gaps (LV not in AVAD): {}", parityGaps); + if (parityGaps > 0) { + log.error(" ❌ {} LV events missing in AVAD", parityGaps); + healthy = false; + } + + // 6. Missing previousImage + long missingPrev = countMissingPreviousImage(); + log.info(" Missing previousImage: {}", missingPrev); + if (missingPrev > 0) { + log.error(" ❌ {} replace/delete events missing previousImage", + missingPrev); + healthy = false; + } + + // Write health snapshot + writeHealthSnapshot(now, producedCount, avadConsumed, + lvConsumed, gapCount, parityGaps, missingPrev, healthy); + + String status = healthy ? "✅ HEALTHY" : "❌ UNHEALTHY"; + log.info(" Status: {}", status); + return healthy ? 0 : 1; + } + + private long countBySource(String source) { + String query = "SELECT VALUE COUNT(1) FROM c WHERE c.source = '" + source + "'"; + try { + return reconContainer.queryItems(query, new CosmosQueryRequestOptions(), JsonNode.class) + .byPage(1) + .flatMap(page -> { + if (page.getResults().isEmpty()) return Mono.just(0L); + return Mono.just(page.getResults().get(0).asLong()); + }) + .blockFirst(Duration.ofSeconds(30)); + } catch (Exception e) { + log.warn("Failed to count source={}: {}", source, e.getMessage()); + return -1; + } + } + + private long countGaps(String producerSource, String consumerSource) { + // Count events in producer that are not in consumer + // and are older than the SLA window + String query = String.format( + "SELECT VALUE COUNT(1) FROM c " + + "WHERE c.source = '%s' " + + "AND NOT IS_DEFINED(" + + " (SELECT VALUE 1 FROM c2 IN c " + + " WHERE c2.source = '%s' " + + " AND c2.correlationId = c.correlationId)" + + ") " + + "AND c.timestamp < '%s'", + producerSource, consumerSource, + Instant.now().minus(Duration.ofMinutes(gapSlaMinutes)).toString() + ); + + // Simplified approach: count producer IDs not in consumer + // Cross-partition query is expensive; use a simpler approach + // by sampling recent events + String sampleQuery = String.format( + "SELECT TOP 100 c.correlationId FROM c " + + "WHERE c.source = '%s' " + + "AND c.timestamp < '%s' " + + "ORDER BY c.timestamp DESC", + producerSource, + Instant.now().minus(Duration.ofMinutes(gapSlaMinutes)).toString() + ); + + try { + AtomicLong gaps = new AtomicLong(0); + reconContainer.queryItems(sampleQuery, new CosmosQueryRequestOptions(), JsonNode.class) + .byPage(100) + .flatMap(page -> { + for (JsonNode item : page.getResults()) { + String corrId = item.get("correlationId").asText(); + // Check if consumer has this event + String checkQuery = String.format( + "SELECT VALUE COUNT(1) FROM c " + + "WHERE c.correlationId = '%s' " + + "AND c.source = '%s'", + corrId, consumerSource); + Long count = reconContainer.queryItems( + checkQuery, + new CosmosQueryRequestOptions().setPartitionKey(new PartitionKey(corrId)), + JsonNode.class) + .byPage(1) + .map(p -> p.getResults().isEmpty() ? 0L : p.getResults().get(0).asLong()) + .blockFirst(Duration.ofSeconds(5)); + if (count == null || count == 0) { + gaps.incrementAndGet(); + } + } + return Mono.empty(); + }) + .blockLast(Duration.ofSeconds(60)); + return gaps.get(); + } catch (Exception e) { + log.warn("Failed gap detection {}->{}: {}", producerSource, consumerSource, e.getMessage()); + return -1; + } + } + + private long countMissingPreviousImage() { + String query = + "SELECT VALUE COUNT(1) FROM c " + + "WHERE c.source = 'cfp-avad' " + + "AND c.opType IN ('replace', 'delete') " + + "AND c.hasPreviousImage = false"; + try { + return reconContainer.queryItems(query, new CosmosQueryRequestOptions(), JsonNode.class) + .byPage(1) + .flatMap(page -> { + if (page.getResults().isEmpty()) return Mono.just(0L); + return Mono.just(page.getResults().get(0).asLong()); + }) + .blockFirst(Duration.ofSeconds(30)); + } catch (Exception e) { + log.warn("Failed previousImage check: {}", e.getMessage()); + return -1; + } + } + + private void writeHealthSnapshot(Instant timestamp, + long produced, long avadConsumed, long lvConsumed, + long gapCount, long parityGaps, long missingPrev, + boolean healthy) { + ObjectNode doc = JsonNodeFactory.instance.objectNode(); + doc.put("id", "health-" + timestamp.toString()); + doc.put("runId", runId); + doc.put("timestamp", timestamp.toString()); + doc.put("producedCount", produced); + doc.put("avadConsumedCount", avadConsumed); + doc.put("lvConsumedCount", lvConsumed); + doc.put("gapCount", gapCount); + doc.put("parityGaps", parityGaps); + doc.put("missingPreviousImage", missingPrev); + doc.put("status", healthy ? "HEALTHY" : "UNHEALTHY"); + + try { + healthContainer.upsertItem(doc, new PartitionKey(runId), null) + .block(Duration.ofSeconds(10)); + log.info(" Health snapshot written"); + } catch (Exception e) { + log.warn("Failed to write health snapshot: {}", e.getMessage()); + } + } + + public void close() { + client.close(); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java new file mode 100644 index 000000000000..418a6748ac79 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java @@ -0,0 +1,89 @@ +package com.azure.cosmos.avadtest.health; + +import com.azure.cosmos.avadtest.metrics.SoakMetrics; +import com.sun.net.httpserver.HttpServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Lightweight HTTP health server for Kubernetes probes. + * + * Endpoints: + * GET /health — liveness probe (always 200 if JVM is up) + * GET /ready — readiness probe (200 when workload is ready) + * GET /metrics — Micrometer-style plain text metrics + * + * Reusable contract: any workload image that implements these + * three endpoints can plug into the soak infra Helm chart. + */ +public final class HealthServer { + + private static final Logger log = LoggerFactory.getLogger(HealthServer.class); + private static final int DEFAULT_PORT = 8080; + + private final HttpServer server; + private final AtomicBoolean ready = new AtomicBoolean(false); + private final SoakMetrics metrics; + + public HealthServer(SoakMetrics metrics) throws IOException { + this(metrics, DEFAULT_PORT); + } + + public HealthServer(SoakMetrics metrics, int port) throws IOException { + this.metrics = metrics; + this.server = HttpServer.create(new InetSocketAddress(port), 0); + this.server.setExecutor(Executors.newFixedThreadPool(2)); + + server.createContext("/health", exchange -> { + byte[] body = "{\"status\":\"UP\"}".getBytes(StandardCharsets.UTF_8); + exchange.getResponseHeaders().set("Content-Type", "application/json"); + exchange.sendResponseHeaders(200, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + }); + + server.createContext("/ready", exchange -> { + boolean isReady = ready.get(); + String json = "{\"ready\":" + isReady + "}"; + byte[] body = json.getBytes(StandardCharsets.UTF_8); + exchange.getResponseHeaders().set("Content-Type", "application/json"); + exchange.sendResponseHeaders(isReady ? 200 : 503, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + }); + + server.createContext("/metrics", exchange -> { + String metricsText = metrics.toPrometheusText(); + byte[] body = metricsText.getBytes(StandardCharsets.UTF_8); + exchange.getResponseHeaders().set("Content-Type", "text/plain"); + exchange.sendResponseHeaders(200, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + }); + } + + public void start() { + server.start(); + log.info("Health server started on port {}", server.getAddress().getPort()); + } + + public void setReady(boolean isReady) { + ready.set(isReady); + log.info("Readiness set to: {}", isReady); + } + + public void stop() { + server.stop(2); + log.info("Health server stopped"); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java new file mode 100644 index 000000000000..224663d6041d --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -0,0 +1,306 @@ +package com.azure.cosmos.avadtest.ingestor; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.avadtest.config.TestConfig; +import com.azure.cosmos.avadtest.reconciliation.EventLog; +import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.PartitionKey; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.core.scheduler.Schedulers; + +import java.time.Duration; +import java.time.Instant; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; + +/** + * Ingestion workload: creates (40%), replaces (25%), upserts (15%), deletes (20%). + * Every operation gets a unique eventId for per-event reconciliation. + * Uses micro-batch rate limiting (batch of N ops every 10ms). + */ +public final class Ingestor implements AutoCloseable { + + private static final Logger log = LoggerFactory.getLogger(Ingestor.class); + private static final int TICK_INTERVAL_MS = 10; + private static final double FAILURE_ABORT_THRESHOLD = 0.5; // abort if >50% failures + + private final TestConfig config; + private final CosmosAsyncClient client; + private final CosmosAsyncContainer container; + private final EventLog eventLog; + private final ReconciliationWriter reconWriter; + private final AtomicLong seqCounter = new AtomicLong(0); + private final AtomicBoolean running = new AtomicBoolean(true); + + // Failure tracking + private final LongAdder successCount = new LongAdder(); + private final LongAdder failureCount = new LongAdder(); + + // Track recently created doc IDs for replace/upsert/delete operations + private final String[] recentDocIds; + private final AtomicLong recentIndex = new AtomicLong(0); + + // Ops per tick = opsPerSec * tickIntervalMs / 1000 + private final int opsPerTick; + + public Ingestor(TestConfig config) throws Exception { + this.config = config; + this.eventLog = new EventLog(config.producedLogFile()); + this.reconWriter = new ReconciliationWriter(config, "ingestor"); + this.recentDocIds = new String[10_000]; // ring buffer + this.opsPerTick = Math.max(1, config.opsPerSec() * TICK_INTERVAL_MS / 1000); + + this.client = new CosmosClientBuilder() + .endpoint(config.endpoint()) + .key(config.key()) + .gatewayMode() + .preferredRegions(config.preferredRegions()) + .buildAsyncClient(); + + this.container = client + .getDatabase(config.database()) + .getContainer(config.feedContainer()); + + log.info("Ingestor initialized: endpoint={}, db={}, container={}, ops/sec={}, opsPerTick={}", + config.endpoint(), config.database(), config.feedContainer(), + config.opsPerSec(), opsPerTick); + } + + public void run() throws InterruptedException { + int durationSec = config.durationSeconds(); + log.info("Starting ingestion at {} ops/sec, duration={}", + config.opsPerSec(), durationSec > 0 ? durationSec + "s" : "unlimited"); + + CountDownLatch latch = new CountDownLatch(1); + + // Simple approach: generate ops as fast as possible, bounded by concurrency + int concurrency = Math.min(config.opsPerSec(), 500); + Flux.generate(sink -> { + if (running.get()) { sink.next(seqCounter.get()); } + else { sink.complete(); } + }) + .flatMap(tick -> executeOperation() + .subscribeOn(Schedulers.boundedElastic()), concurrency) + .sample(Duration.ofMillis(TICK_INTERVAL_MS)) // pace output + .doOnError(e -> log.error("Ingestion error", e)) + .doOnComplete(latch::countDown) + .subscribe(); + + // Auto-stop after duration + if (durationSec > 0) { + Schedulers.single().schedule(() -> { + log.info("Duration {}s reached, stopping ingestor...", durationSec); + running.set(false); + }, durationSec, java.util.concurrent.TimeUnit.SECONDS); + } + + // Periodic failure rate check + Flux.interval(Duration.ofSeconds(30)) + .takeWhile(tick -> running.get()) + .subscribe(tick -> { + long s = successCount.sum(); + long f = failureCount.sum(); + long total = s + f; + double failRate = total > 0 ? (double) f / total : 0; + log.info("Progress: success={}, failures={}, failRate={:.1f}%, seq={}", + s, f, failRate * 100, seqCounter.get()); + if (total > 100 && failRate > FAILURE_ABORT_THRESHOLD) { + log.error("Failure rate {:.1f}% exceeds threshold, aborting!", failRate * 100); + running.set(false); + } + }); + + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + log.info("Shutdown signal received, stopping ingestor..."); + running.set(false); + })); + + latch.await(); + } + + private Mono executeOperation() { + int roll = ThreadLocalRandom.current().nextInt(100); + if (roll < 40) { + return doCreate(); + } else if (roll < 65) { + return doReplace(); + } else if (roll < 80) { + return doUpsert(); + } else { + return doDelete(); + } + } + + private Mono doCreate() { + String docId = UUID.randomUUID().toString(); + String eventId = UUID.randomUUID().toString(); + String pk = "tenant-" + ThreadLocalRandom.current().nextInt(config.logicalPartitionCount()); + long seq = seqCounter.incrementAndGet(); + String ts = Instant.now().toString(); + + ObjectNode doc = buildDoc(docId, pk, seq, eventId, "create", ts); + + return container.createItem(doc, new PartitionKey(pk), new CosmosItemRequestOptions()) + .doOnSuccess(resp -> { + successCount.increment(); + eventLog.logProduced(eventId, seq, "create", pk, ts); + reconWriter.record(eventId, seq, "create", pk, -1, false, -1); + trackRecentId(docId + "|" + pk); + }) + .doOnError(e -> { + failureCount.increment(); + log.warn("Create failed: docId={}, error={}", docId, e.getMessage()); + }) + .onErrorResume(e -> Mono.empty()) + .then(); + } + + private Mono doReplace() { + String recent = getRecentId(); + if (recent == null) return doCreate(); + + String[] parts = recent.split("\\|"); + String docId = parts[0]; + String pk = parts[1]; + String eventId = UUID.randomUUID().toString(); + long seq = seqCounter.incrementAndGet(); + String ts = Instant.now().toString(); + + return container.readItem(docId, new PartitionKey(pk), ObjectNode.class) + .flatMap(readResp -> { + ObjectNode doc = readResp.getItem(); + doc.put("seqNo", seq); + doc.put("eventId", eventId); + doc.put("operationType", "replace"); + doc.put("timestamp", ts); + doc.put("payload", generatePayload()); + return container.replaceItem(doc, docId, new PartitionKey(pk), new CosmosItemRequestOptions()); + }) + .doOnSuccess(resp -> { + successCount.increment(); + eventLog.logProduced(eventId, seq, "replace", pk, ts); + reconWriter.record(eventId, seq, "replace", pk, -1, false, -1); + }) + .doOnError(e -> { + failureCount.increment(); + log.warn("Replace failed: docId={}, error={}", docId, e.getMessage()); + }) + .onErrorResume(e -> Mono.empty()) + .then(); + } + + private Mono doUpsert() { + String recent = getRecentId(); + String docId; + String pk; + if (recent != null && ThreadLocalRandom.current().nextBoolean()) { + String[] parts = recent.split("\\|"); + docId = parts[0]; + pk = parts[1]; + } else { + docId = UUID.randomUUID().toString(); + pk = "tenant-" + ThreadLocalRandom.current().nextInt(config.logicalPartitionCount()); + } + + String eventId = UUID.randomUUID().toString(); + long seq = seqCounter.incrementAndGet(); + String ts = Instant.now().toString(); + ObjectNode doc = buildDoc(docId, pk, seq, eventId, "upsert", ts); + + return container.upsertItem(doc, new PartitionKey(pk), new CosmosItemRequestOptions()) + .doOnSuccess(resp -> { + successCount.increment(); + eventLog.logProduced(eventId, seq, "upsert", pk, ts); + reconWriter.record(eventId, seq, "upsert", pk, -1, false, -1); + trackRecentId(docId + "|" + pk); + }) + .doOnError(e -> { + failureCount.increment(); + log.warn("Upsert failed: docId={}, error={}", docId, e.getMessage()); + }) + .onErrorResume(e -> Mono.empty()) + .then(); + } + + private Mono doDelete() { + String recent = getRecentId(); + if (recent == null) return doCreate(); // nothing to delete yet + + String[] parts = recent.split("\\|"); + String docId = parts[0]; + String pk = parts[1]; + String eventId = UUID.randomUUID().toString(); + long seq = seqCounter.incrementAndGet(); + String ts = Instant.now().toString(); + + return container.deleteItem(docId, new PartitionKey(pk), new CosmosItemRequestOptions()) + .doOnSuccess(resp -> { + successCount.increment(); + eventLog.logProduced(eventId, seq, "delete", pk, ts); + reconWriter.record(eventId, seq, "delete", pk, -1, false, -1); + }) + .doOnError(e -> { + failureCount.increment(); + // 404 is expected if already deleted — don't warn loudly + if (!e.getMessage().contains("404")) { + log.warn("Delete failed: docId={}, error={}", docId, e.getMessage()); + } + }) + .onErrorResume(e -> Mono.empty()) + .then(); + } + + private ObjectNode buildDoc(String docId, String pk, long seq, + String eventId, String opType, String ts) { + ObjectNode doc = JsonNodeFactory.instance.objectNode(); + doc.put("id", docId); + doc.put("tenantId", pk); + doc.put("eventId", eventId); + doc.put("seqNo", seq); + doc.put("operationType", opType); + doc.put("timestamp", ts); + doc.put("payload", generatePayload()); + return doc; + } + + private String generatePayload() { + int size = config.docSizeBytes(); + if (size <= 0) return ""; + return "x".repeat(Math.min(size, 10_000)); + } + + private void trackRecentId(String idAndPk) { + int idx = (int) (recentIndex.incrementAndGet() % recentDocIds.length); + recentDocIds[idx] = idAndPk; + } + + private String getRecentId() { + long idx = recentIndex.get(); + if (idx == 0) return null; + int start = (int) (idx % recentDocIds.length); + int offset = ThreadLocalRandom.current().nextInt(Math.min((int) idx, recentDocIds.length)); + return recentDocIds[(start - offset + recentDocIds.length) % recentDocIds.length]; + } + + @Override + public void close() { + running.set(false); + try { eventLog.close(); } catch (Exception e) { /* ignore */ } + reconWriter.close(); + client.close(); + log.info("Ingestor closed. Total ops: {}, success: {}, failures: {}", + seqCounter.get(), successCount.sum(), failureCount.sum()); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java new file mode 100644 index 000000000000..4b64bc6ffc82 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java @@ -0,0 +1,95 @@ +package com.azure.cosmos.avadtest.metrics; + +import java.util.concurrent.atomic.LongAdder; + +/** + * Centralized soak test metrics. Thread-safe counters for all + * workload components. + * + * Exported as Prometheus-compatible text via /metrics endpoint. + * Reusable: other workloads can extend or compose this class. + */ +public final class SoakMetrics { + + // Ingestor metrics + private final LongAdder ingestorOpsSuccess = new LongAdder(); + private final LongAdder ingestorOpsFailure = new LongAdder(); + private final LongAdder ingestorCreates = new LongAdder(); + private final LongAdder ingestorReplaces = new LongAdder(); + private final LongAdder ingestorUpserts = new LongAdder(); + private final LongAdder ingestorDeletes = new LongAdder(); + + // CFP consumer metrics + private final LongAdder cfpAvadEventsConsumed = new LongAdder(); + private final LongAdder cfpLvEventsConsumed = new LongAdder(); + private final LongAdder cfpPreviousImageMissing = new LongAdder(); + private final LongAdder cfpLsnViolations = new LongAdder(); + + // Reconciliation metrics + private final LongAdder reconWrites = new LongAdder(); + private final LongAdder reconErrors = new LongAdder(); + private final LongAdder reconDrops = new LongAdder(); + + // --- Ingestor --- + public void recordIngestorSuccess() { ingestorOpsSuccess.increment(); } + public void recordIngestorFailure() { ingestorOpsFailure.increment(); } + public void recordIngestorCreate() { ingestorCreates.increment(); } + public void recordIngestorReplace() { ingestorReplaces.increment(); } + public void recordIngestorUpsert() { ingestorUpserts.increment(); } + public void recordIngestorDelete() { ingestorDeletes.increment(); } + + // --- CFP --- + public void recordAvadEvent() { cfpAvadEventsConsumed.increment(); } + public void recordLvEvent() { cfpLvEventsConsumed.increment(); } + public void recordMissingPreviousImage() { cfpPreviousImageMissing.increment(); } + public void recordLsnViolation() { cfpLsnViolations.increment(); } + + // --- Reconciliation --- + public void recordReconWrite() { reconWrites.increment(); } + public void recordReconError() { reconErrors.increment(); } + public void recordReconDrop() { reconDrops.increment(); } + + /** + * Export all metrics as Prometheus-compatible plain text. + */ + public String toPrometheusText() { + StringBuilder sb = new StringBuilder(2048); + + appendMetric(sb, "cosmos_soak_ingestor_ops_success_total", + "Total successful ingestor operations", ingestorOpsSuccess.sum()); + appendMetric(sb, "cosmos_soak_ingestor_ops_failure_total", + "Total failed ingestor operations", ingestorOpsFailure.sum()); + appendMetric(sb, "cosmos_soak_ingestor_creates_total", + "Total create operations", ingestorCreates.sum()); + appendMetric(sb, "cosmos_soak_ingestor_replaces_total", + "Total replace operations", ingestorReplaces.sum()); + appendMetric(sb, "cosmos_soak_ingestor_upserts_total", + "Total upsert operations", ingestorUpserts.sum()); + appendMetric(sb, "cosmos_soak_ingestor_deletes_total", + "Total delete operations", ingestorDeletes.sum()); + + appendMetric(sb, "cosmos_soak_cfp_avad_events_consumed_total", + "Total AVAD change feed events consumed", cfpAvadEventsConsumed.sum()); + appendMetric(sb, "cosmos_soak_cfp_lv_events_consumed_total", + "Total LV change feed events consumed", cfpLvEventsConsumed.sum()); + appendMetric(sb, "cosmos_soak_cfp_previous_image_missing_total", + "Replace/delete events missing previousImage", cfpPreviousImageMissing.sum()); + appendMetric(sb, "cosmos_soak_cfp_lsn_violations_total", + "LSN ordering violations", cfpLsnViolations.sum()); + + appendMetric(sb, "cosmos_soak_reconciliation_writes_total", + "Total reconciliation writes", reconWrites.sum()); + appendMetric(sb, "cosmos_soak_reconciliation_errors_total", + "Total reconciliation errors", reconErrors.sum()); + appendMetric(sb, "cosmos_soak_reconciliation_drops_total", + "Total reconciliation drops", reconDrops.sum()); + + return sb.toString(); + } + + private void appendMetric(StringBuilder sb, String name, String help, long value) { + sb.append("# HELP ").append(name).append(' ').append(help).append('\n'); + sb.append("# TYPE ").append(name).append(" counter\n"); + sb.append(name).append(' ').append(value).append('\n'); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java new file mode 100644 index 000000000000..aee287f123fa --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -0,0 +1,203 @@ +package com.azure.cosmos.avadtest.reader; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.avadtest.config.TestConfig; +import com.azure.cosmos.avadtest.reconciliation.EventLog; +import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; +import com.azure.cosmos.models.ChangeFeedProcessorOptions; +import com.azure.cosmos.models.ChangeFeedProcessorItem; +import com.azure.cosmos.models.ChangeFeedMetaData; +import com.azure.cosmos.models.ChangeFeedOperationType; +import com.azure.cosmos.ChangeFeedProcessor; +import com.azure.cosmos.ChangeFeedProcessorBuilder; +import com.fasterxml.jackson.databind.JsonNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.LongAdder; + +/** + * All Versions and Deletes (AVAD) ChangeFeedProcessor reader. + * Lease prefix "avad-" — isolated from Latest Version reader leases. + * Gateway mode, preferred region configurable (default: West Central US). + * + * Additional validations vs LatestVersionReader: + * - previousImage must be non-null on replace and delete events + * - operationType metadata is checked for create/replace/delete + */ +public final class AvadReader implements AutoCloseable { + + private static final Logger log = LoggerFactory.getLogger(AvadReader.class); + private static final String LEASE_PREFIX = "avad-"; + + private final TestConfig config; + private final CosmosAsyncClient client; + private final CosmosAsyncContainer feedContainer; + private final CosmosAsyncContainer leaseContainer; + private final EventLog eventLog; + private final ReconciliationWriter reconWriter; + private final List processors = new ArrayList<>(); + + // Correctness counters (thread-safe for concurrent CFP batch processing) + private final LongAdder missingPreviousImageCount = new LongAdder(); + private final LongAdder totalReplaces = new LongAdder(); + private final LongAdder totalDeletes = new LongAdder(); + private final LongAdder totalCreates = new LongAdder(); + private final LongAdder crtsViolationCount = new LongAdder(); + + public AvadReader(TestConfig config) throws Exception { + this.config = config; + this.eventLog = new EventLog(config.consumedLogFile()); + this.reconWriter = new ReconciliationWriter(config, "cfp-avad"); + + this.client = new CosmosClientBuilder() + .endpoint(config.readerEndpoint()) + .key(config.key()) + .gatewayMode() + .preferredRegions(config.preferredRegions()) + .buildAsyncClient(); + + this.feedContainer = client + .getDatabase(config.database()) + .getContainer(config.feedContainer()); + + this.leaseContainer = client + .getDatabase(config.database()) + .getContainer(config.leaseContainer()); + + log.info("AvadReader initialized: prefix={}, endpoint={}, region={}, workers={}", + LEASE_PREFIX, config.readerEndpoint(), config.preferredRegion(), config.workerCount()); + } + + public void run() throws InterruptedException { + CountDownLatch latch = new CountDownLatch(1); + int workers = config.workerCount(); + + for (int i = 0; i < workers; i++) { + final int workerIdx = i; + ChangeFeedProcessorOptions options = new ChangeFeedProcessorOptions(); + options.setLeasePrefix(LEASE_PREFIX); + options.setFeedPollDelay(Duration.ofSeconds(1)); + options.setStartTime(Instant.now().minus(Duration.ofDays(5))); + + ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() + .hostName("avad-host-" + ProcessHandle.current().pid() + "-w" + workerIdx) + .feedContainer(feedContainer) + .leaseContainer(leaseContainer) + .options(options) + .handleAllVersionsAndDeletesChanges(this::handleChanges) + .buildChangeFeedProcessor(); + + processor.start() + .doOnSuccess(v -> log.info("AVAD ChangeFeedProcessor worker-{} started", workerIdx)) + .doOnError(e -> log.error("Failed to start AVAD CFP worker-{}", workerIdx, e)) + .block(); + + processors.add(processor); + } + + log.info("All {} AVAD workers started", workers); + + latch.await(); + } + + private void handleChanges(List items) { + for (ChangeFeedProcessorItem item : items) { + JsonNode current = item.getCurrent(); + JsonNode previous = item.getPrevious(); + ChangeFeedMetaData metadata = item.getChangeFeedMetaData(); + + if (metadata == null) { + log.warn("Null metadata on AVAD change feed item, skipping"); + continue; + } + + // LSN and CRTS come from metadata — not from the document body. + // This is critical for deletes where current may be a tombstone. + ChangeFeedOperationType opEnum = metadata.getOperationType(); + String opType = opEnum != null ? opEnum.toString().toLowerCase() : "unknown"; + long lsn = metadata.getLogSequenceNumber(); + Instant crtsInstant = metadata.getConflictResolutionTimestamp(); + long crts = crtsInstant != null ? crtsInstant.toEpochMilli() : -1; + + // For deletes, current is a tombstone — extract business fields from previous + JsonNode source; + if (opEnum == ChangeFeedOperationType.DELETE) { + source = previous; + } else { + source = (current != null && !current.isNull()) ? current : previous; + } + + String eventId = source != null ? getTextOrEmpty(source, "eventId") : ""; + long seqNo = source != null && source.has("seqNo") ? source.get("seqNo").asLong() : -1; + String pk = source != null ? getTextOrEmpty(source, "tenantId") : ""; + String timestamp = source != null ? getTextOrEmpty(source, "timestamp") : ""; + + boolean hasPrevious = previous != null && !previous.isNull(); + + // Track operation types and validate previousImage + switch (opType) { + case "create" -> totalCreates.increment(); + case "replace" -> { + totalReplaces.increment(); + if (!hasPrevious) { + missingPreviousImageCount.increment(); + log.warn("⚠️ MISSING previous on REPLACE: eventId={}, pk={}", eventId, pk); + } + } + case "delete" -> { + totalDeletes.increment(); + if (!hasPrevious) { + missingPreviousImageCount.increment(); + log.warn("⚠️ MISSING previous on DELETE: eventId={}, pk={}", eventId, pk); + } + } + } + + eventLog.logConsumedAvad(eventId, seqNo, opType, pk, timestamp, lsn, crts); + reconWriter.record(eventId, seqNo, opType, pk, lsn, hasPrevious, crts); + } + + eventLog.flush(); + } + + private void logCorrectnessReport() { + log.info("=== AVAD Correctness Report ==="); + log.info(" Creates: {}", totalCreates.sum()); + log.info(" Replaces: {} (missing previous: {})", totalReplaces.sum(), missingPreviousImageCount.sum()); + log.info(" Deletes: {}", totalDeletes.sum()); + long missing = missingPreviousImageCount.sum(); + if (missing > 0) { + log.error("❌ previous MISSING on {} replace/delete events", missing); + } else { + log.info("✅ All replace/delete events have previous image"); + } + long crtsViolations = crtsViolationCount.sum(); + if (crtsViolations > 0) { + log.error("❌ {} CRTS ordering violations detected", crtsViolations); + } + } + + private static String getTextOrEmpty(JsonNode node, String field) { + return node.has(field) ? node.get(field).asText() : ""; + } + + @Override + public void close() { + logCorrectnessReport(); + for (ChangeFeedProcessor p : processors) { + try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } + } + try { eventLog.close(); } catch (Exception e) { /* ignore */ } + reconWriter.close(); + client.close(); + log.info("AvadReader closed ({} workers)", processors.size()); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java new file mode 100644 index 000000000000..51a10dce7003 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -0,0 +1,139 @@ +package com.azure.cosmos.avadtest.reader; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.avadtest.config.TestConfig; +import com.azure.cosmos.avadtest.reconciliation.EventLog; +import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; +import com.azure.cosmos.models.ChangeFeedProcessorOptions; +import com.azure.cosmos.models.ChangeFeedProcessorItem; +import com.azure.cosmos.ChangeFeedProcessor; +import com.azure.cosmos.ChangeFeedProcessorBuilder; +import com.fasterxml.jackson.databind.JsonNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; + +/** + * Latest Version ChangeFeedProcessor reader. + * Lease prefix "lv-" — isolated from AVAD reader leases. + * Gateway mode, preferred region configurable (default: West Central US). + */ +public final class LatestVersionReader implements AutoCloseable { + + private static final Logger log = LoggerFactory.getLogger(LatestVersionReader.class); + private static final String LEASE_PREFIX = "lv-"; + + private final TestConfig config; + private final CosmosAsyncClient client; + private final CosmosAsyncContainer feedContainer; + private final CosmosAsyncContainer leaseContainer; + private final EventLog eventLog; + private final ReconciliationWriter reconWriter; + private final List processors = new ArrayList<>(); + + public LatestVersionReader(TestConfig config) throws Exception { + this.config = config; + this.eventLog = new EventLog(config.consumedLogFile()); + this.reconWriter = new ReconciliationWriter(config, "cfp-lv"); + + this.client = new CosmosClientBuilder() + .endpoint(config.readerEndpoint()) + .key(config.key()) + .gatewayMode() + .contentResponseOnWriteEnabled(true) + .preferredRegions(config.preferredRegions()) + .buildAsyncClient(); + + this.feedContainer = client + .getDatabase(config.database()) + .getContainer(config.feedContainer()); + + this.leaseContainer = client + .getDatabase(config.database()) + .getContainer(config.leaseContainer()); + + log.info("LatestVersionReader initialized: prefix={}, endpoint={}, region={}, workers={}", + LEASE_PREFIX, config.readerEndpoint(), config.preferredRegion(), config.workerCount()); + } + + public void run() throws InterruptedException { + CountDownLatch latch = new CountDownLatch(1); + int workers = config.workerCount(); + + for (int i = 0; i < workers; i++) { + final int workerIdx = i; + ChangeFeedProcessorOptions options = new ChangeFeedProcessorOptions(); + options.setLeasePrefix(LEASE_PREFIX); + options.setFeedPollDelay(Duration.ofSeconds(1)); + options.setStartTime(Instant.now().minus(Duration.ofDays(5))); + + ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() + .hostName("lv-host-" + ProcessHandle.current().pid() + "-w" + i) + .feedContainer(feedContainer) + .leaseContainer(leaseContainer) + .options(options) + .handleLatestVersionChanges(this::handleChanges) + .buildChangeFeedProcessor(); + + processor.start() + .doOnSuccess(v -> log.info("LV ChangeFeedProcessor worker-{} started", workerIdx)) + .doOnError(e -> log.error("Failed to start LV CFP worker-{}", workerIdx, e)) + .block(); + + processors.add(processor); + } + + log.info("All {} LV workers started", workers); + + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + log.info("Shutdown signal, stopping {} LV CFP workers...", processors.size()); + for (ChangeFeedProcessor p : processors) { + try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } + } + latch.countDown(); + })); + + latch.await(); + } + + private void handleChanges(List items) { + for (ChangeFeedProcessorItem item : items) { + JsonNode current = item.getCurrent(); + if (current == null || current.isNull()) continue; // LV mode shouldn't get null current + + String eventId = getTextOrEmpty(current, "eventId"); + long seqNo = current.has("seqNo") ? current.get("seqNo").asLong() : -1; + String opType = getTextOrEmpty(current, "operationType"); + String pk = getTextOrEmpty(current, "tenantId"); + String timestamp = getTextOrEmpty(current, "timestamp"); + long lsn = current.has("_lsn") ? current.get("_lsn").asLong() : -1; + + eventLog.logConsumed(eventId, seqNo, opType, pk, timestamp, lsn); + reconWriter.record(eventId, seqNo, opType, pk, lsn, false, -1); + } + + eventLog.flush(); + } + + private static String getTextOrEmpty(JsonNode node, String field) { + return node.has(field) ? node.get(field).asText() : ""; + } + + @Override + public void close() { + for (ChangeFeedProcessor p : processors) { + try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } + } + try { eventLog.close(); } catch (Exception e) { /* ignore */ } + reconWriter.close(); + client.close(); + log.info("LatestVersionReader closed ({} workers)", processors.size()); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java new file mode 100644 index 000000000000..31218bbc193c --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java @@ -0,0 +1,79 @@ +package com.azure.cosmos.avadtest.reconciliation; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Append-only event log for reconciliation. + * + * Format per line: + * correlationId,seqNo,opType,partitionKey,timestamp,lsn[,crts] + * + * Ingestor writes: correlationId,seqNo,opType,partitionKey,timestamp, + * LV reader writes: correlationId,seqNo,opType,partitionKey,timestamp,lsn + * AVAD reader writes: correlationId,seqNo,opType,partitionKey,timestamp,lsn,crts + */ +public final class EventLog implements AutoCloseable { + + private static final Logger log = LoggerFactory.getLogger(EventLog.class); + + private final BufferedWriter writer; + private final ReentrantLock lock = new ReentrantLock(); + + public EventLog(String filePath) throws IOException { + Path path = Path.of(filePath); + this.writer = Files.newBufferedWriter(path, + StandardOpenOption.CREATE, StandardOpenOption.APPEND, StandardOpenOption.WRITE); + log.info("EventLog opened: {}", path.toAbsolutePath()); + } + + public void logProduced(String correlationId, long seqNo, String opType, + String partitionKey, String timestamp) { + writeLine(String.format("%s,%d,%s,%s,%s,", correlationId, seqNo, opType, partitionKey, timestamp)); + } + + public void logConsumed(String correlationId, long seqNo, String opType, + String partitionKey, String timestamp, long lsn) { + writeLine(String.format("%s,%d,%s,%s,%s,%d", correlationId, seqNo, opType, partitionKey, timestamp, lsn)); + } + + public void logConsumedAvad(String correlationId, long seqNo, String opType, + String partitionKey, String timestamp, long lsn, long crts) { + writeLine(String.format("%s,%d,%s,%s,%s,%d,%d", correlationId, seqNo, opType, partitionKey, timestamp, lsn, crts)); + } + + private void writeLine(String line) { + lock.lock(); + try { + writer.write(line); + writer.newLine(); + } catch (IOException e) { + log.error("Failed to write event log", e); + } finally { + lock.unlock(); + } + } + + public void flush() { + lock.lock(); + try { + writer.flush(); + } catch (IOException e) { + log.error("Failed to flush event log", e); + } finally { + lock.unlock(); + } + } + + @Override + public void close() throws IOException { + flush(); + writer.close(); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java new file mode 100644 index 000000000000..dd3c9a4ed72b --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java @@ -0,0 +1,210 @@ +package com.azure.cosmos.avadtest.reconciliation; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Collectors; + +/** + * Reconciler that compares produced vs consumed event logs. + * Uses eventId (unique per operation) for per-event reconciliation. + * + * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn[,crts] + * + * Checks: + * 1. Gap detection — every produced eventId must appear in consumed + * 2. LV ↔ AVAD parity — every LV event must appear in AVAD (AVAD ⊇ LV) + * 3. Ordering — LSN must be monotonically increasing per partitionKey + * 4. CRTS ordering — CRTS must be monotonically increasing per partitionKey (AVAD only) + * + * Exit code: 0 = all checks pass, 1 = failures detected + */ +public final class Reconciler { + + private static final Logger log = LoggerFactory.getLogger(Reconciler.class); + + public static int reconcile(String producedFile, String consumedFile) throws IOException { + log.info("=== Gap Detection: {} vs {} ===", producedFile, consumedFile); + + Set produced = loadEventIds(producedFile); + Set consumed = loadEventIds(consumedFile); + + Set missing = new HashSet<>(produced); + missing.removeAll(consumed); + + // Count duplicates (at-least-once delivery) + long totalConsumedLines = Files.lines(Path.of(consumedFile)).filter(l -> !l.isBlank()).count(); + long duplicates = totalConsumedLines - consumed.size(); + + log.info("Produced: {} unique events", produced.size()); + log.info("Consumed: {} unique events ({} total lines, {} duplicates)", + consumed.size(), totalConsumedLines, duplicates); + log.info("Missing (gaps): {}", missing.size()); + + if (!missing.isEmpty()) { + log.error("❌ MISSED CHANGES DETECTED:"); + missing.stream().limit(50).forEach(id -> log.error(" missing: {}", id)); + if (missing.size() > 50) { + log.error(" ... and {} more", missing.size() - 50); + } + } + + int orderViolations = checkOrderingByLsn(consumedFile); + int crtsViolations = checkOrderingByCrts(consumedFile); + + boolean passed = missing.isEmpty() && orderViolations == 0 && crtsViolations == 0; + log.info(passed ? "✅ All checks passed" : "❌ Checks FAILED"); + return passed ? 0 : 1; + } + + public static int parity(String lvFile, String avadFile) throws IOException { + log.info("=== LV ↔ AVAD Parity: {} vs {} ===", lvFile, avadFile); + + Set lvIds = loadEventIds(lvFile); + Set avadIds = loadEventIds(avadFile); + + Set missingInAvad = new HashSet<>(lvIds); + missingInAvad.removeAll(avadIds); + + Set avadOnly = new HashSet<>(avadIds); + avadOnly.removeAll(lvIds); + + log.info("LV events: {}", lvIds.size()); + log.info("AVAD events: {}", avadIds.size()); + log.info("Missing in AVAD (should be 0): {}", missingInAvad.size()); + log.info("AVAD-only events (deletes, extra versions): {}", avadOnly.size()); + + if (!missingInAvad.isEmpty()) { + log.error("❌ AVAD MISSING LV EVENTS:"); + missingInAvad.stream().limit(50).forEach(id -> log.error(" missing: {}", id)); + } + + boolean passed = missingInAvad.isEmpty(); + log.info(passed ? "✅ Parity check passed (AVAD ⊇ LV)" : "❌ Parity check FAILED"); + return passed ? 0 : 1; + } + + /** Loads unique eventIds (first field per line). */ + private static Set loadEventIds(String file) throws IOException { + try (var lines = Files.lines(Path.of(file))) { + return lines + .filter(l -> !l.isBlank()) + .map(l -> l.split(",")[0]) + .collect(Collectors.toSet()); + } + } + + /** + * Check that LSN is monotonically increasing per partitionKey. + * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn + * Sorts by (partitionKey, lsn) then checks for inversions. + */ + private static int checkOrderingByLsn(String consumedFile) throws IOException { + log.info("=== LSN Ordering Check: {} ===", consumedFile); + + // Load all records grouped by partition key + Map> recordsByPk = new HashMap<>(); + + try (var reader = new BufferedReader(new FileReader(consumedFile))) { + String line; + while ((line = reader.readLine()) != null) { + if (line.isBlank()) continue; + String[] parts = line.split(","); + if (parts.length < 6) continue; + + String pk = parts[3]; + long seqNo = Long.parseLong(parts[1]); + long lsn = parts[5].isBlank() ? -1 : Long.parseLong(parts[5]); + if (lsn < 0) continue; // skip records without LSN + + recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) + .add(new long[]{lsn, seqNo}); + } + } + + int violations = 0; + for (var entry : recordsByPk.entrySet()) { + String pk = entry.getKey(); + List records = entry.getValue(); + // Sort by LSN, then check seqNo is non-decreasing within same LSN batch + records.sort(Comparator.comparingLong(r -> r[0])); + + long prevLsn = -1; + for (long[] record : records) { + if (record[0] < prevLsn) { + violations++; + if (violations <= 10) { + log.warn("LSN ordering violation: PK={}, prevLsn={}, currLsn={}", + pk, prevLsn, record[0]); + } + } + prevLsn = record[0]; + } + } + + log.info("LSN ordering violations: {} (across {} partition keys)", + violations, recordsByPk.size()); + return violations; + } + + /** + * Check that CRTS is monotonically increasing per partitionKey. + * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn,crts + * Only applies to AVAD logs (7 columns). Lines without CRTS are skipped. + */ + private static int checkOrderingByCrts(String consumedFile) throws IOException { + log.info("=== CRTS Ordering Check: {} ===", consumedFile); + + Map> recordsByPk = new HashMap<>(); + + try (var reader = new BufferedReader(new FileReader(consumedFile))) { + String line; + while ((line = reader.readLine()) != null) { + if (line.isBlank()) continue; + String[] parts = line.split(","); + if (parts.length < 7) continue; // CRTS is column 6, only in AVAD logs + + String pk = parts[3]; + long lsn = parts[5].isBlank() ? -1 : Long.parseLong(parts[5]); + long crts = parts[6].isBlank() ? -1 : Long.parseLong(parts[6]); + if (crts < 0) continue; + + recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) + .add(new long[]{crts, lsn}); + } + } + + if (recordsByPk.isEmpty()) { + log.info("No CRTS data found (not an AVAD log?), skipping check"); + return 0; + } + + int violations = 0; + for (var entry : recordsByPk.entrySet()) { + String pk = entry.getKey(); + List records = entry.getValue(); + // Sort by LSN (delivery order), then check CRTS is non-decreasing + records.sort(Comparator.comparingLong(r -> r[1])); + + long prevCrts = -1; + for (long[] record : records) { + if (prevCrts > 0 && record[0] < prevCrts) { + violations++; + if (violations <= 10) { + log.warn("CRTS ordering violation: PK={}, prevCrts={}, currCrts={}, lsn={}", + pk, prevCrts, record[0], record[1]); + } + } + prevCrts = record[0]; + } + } + + log.info("CRTS ordering violations: {} (across {} partition keys)", + violations, recordsByPk.size()); + return violations; + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java new file mode 100644 index 000000000000..970b42081e50 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -0,0 +1,182 @@ +package com.azure.cosmos.avadtest.reconciliation; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; +import com.azure.cosmos.avadtest.config.TestConfig; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.PartitionKey; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; +import reactor.core.publisher.Sinks; +import reactor.core.scheduler.Schedulers; + +import java.time.Duration; +import java.time.Instant; +import java.util.concurrent.atomic.LongAdder; + +/** + * Writes reconciliation events to a shared Cosmos container. + * All consumers (CFP LV, CFP AVAD, Spark LV, Spark AVAD, Ingestor) write here + * with a common schema, enabling a single reconciliation query across all of them. + * + * Container: "reconciliation" in same database + * Partition key: /correlationId + * + * Document schema: + * { + * "id": "{source}-{correlationId}", // unique per source+event + * "correlationId": "corr-uuid", + * "source": "ingestor|cfp-lv|cfp-avad|spark-lv|spark-avad", + * "seqNo": 12345, + * "opType": "create|replace|upsert|delete", + * "partitionKey": "tenant-42", + * "lsn": 999, // -1 for ingestor + * "hasPreviousImage": true, // only for AVAD sources + * "crts": 1714300800000, // conflict resolution timestamp (epoch ms), -1 if N/A + * "timestamp": "2026-04-28T12:00:00Z", + * "recordedAt": "2026-04-28T12:00:01Z" + * } + */ +public final class ReconciliationWriter implements AutoCloseable { + + private static final Logger log = LoggerFactory.getLogger(ReconciliationWriter.class); + private static final String RECONCILIATION_CONTAINER = "reconciliation"; + private static final int MAX_RETRIES = 3; + private static final int MAX_REQUEUES = 2; // max times a doc can be requeued after all retries fail + private static final String REQUEUE_COUNT_FIELD = "_requeueCount"; + + private static final CosmosEndToEndOperationLatencyPolicyConfig E2E_POLICY = + new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(6)).build(); + + private final String source; + private final CosmosAsyncContainer container; + private final CosmosAsyncClient client; + private final LongAdder writeCount = new LongAdder(); + private final LongAdder errorCount = new LongAdder(); + private final LongAdder retryCount = new LongAdder(); + private final LongAdder dropCount = new LongAdder(); + + private final Sinks.Many sink; + private final reactor.core.Disposable subscription; + + public ReconciliationWriter(TestConfig config, String source) { + this.source = source; + + this.client = new CosmosClientBuilder() + .endpoint(config.endpoint()) + .key(config.key()) + .gatewayMode() + .preferredRegions(config.preferredRegions()) + .buildAsyncClient(); + + this.container = client + .getDatabase(config.database()) + .getContainer(RECONCILIATION_CONTAINER); + + this.sink = Sinks.many().multicast().onBackpressureBuffer(100_000); + + this.subscription = sink.asFlux() + .flatMap(this::writeDoc, 50) + .subscribeOn(Schedulers.boundedElastic()) + .subscribe(); + + log.info("ReconciliationWriter initialized: source={}, container={}", + source, RECONCILIATION_CONTAINER); + } + + /** + * Record a produced or consumed event for reconciliation. + * Non-blocking — buffers internally and writes async. + */ + public void record(String eventId, long seqNo, String opType, + String partitionKey, long lsn, boolean hasPreviousImage, long crts) { + ObjectNode doc = JsonNodeFactory.instance.objectNode(); + doc.put("id", source + "-" + eventId); + doc.put("correlationId", eventId); + doc.put("source", source); + doc.put("seqNo", seqNo); + doc.put("opType", opType); + doc.put("partitionKey", partitionKey); + doc.put("lsn", lsn); + doc.put("hasPreviousImage", hasPreviousImage); + doc.put("crts", crts); + doc.put("timestamp", Instant.now().toString()); + doc.put(REQUEUE_COUNT_FIELD, 0); + + Sinks.EmitResult result = sink.tryEmitNext(doc); + if (result.isFailure()) { + dropCount.increment(); + log.warn("Reconciliation sink full/closed, dropping event: eventId={}", eventId); + } + } + + private Mono writeDoc(ObjectNode doc) { + String eventId = doc.get("correlationId").asText(); + + CosmosItemRequestOptions options = new CosmosItemRequestOptions(); + options.setCosmosEndToEndOperationLatencyPolicyConfig(E2E_POLICY); + + return container.upsertItem(doc, new PartitionKey(eventId), options) + .doOnSuccess(r -> writeCount.increment()) + .retryWhen(reactor.util.retry.Retry.backoff(MAX_RETRIES, Duration.ofMillis(500)) + .maxBackoff(Duration.ofSeconds(2)) + .filter(this::isRetryable) + .doBeforeRetry(signal -> { + retryCount.increment(); + log.warn("Reconciliation write retry #{} for id={}: {}", + signal.totalRetries() + 1, + doc.get("id").asText(), + signal.failure().getMessage()); + })) + .doOnError(e -> { + int requeueCount = doc.has(REQUEUE_COUNT_FIELD) ? doc.get(REQUEUE_COUNT_FIELD).asInt() : 0; + if (requeueCount < MAX_REQUEUES && isRetryable(e)) { + doc.put(REQUEUE_COUNT_FIELD, requeueCount + 1); + Sinks.EmitResult result = sink.tryEmitNext(doc); + if (result.isSuccess()) { + log.warn("Requeueing (attempt {}): id={}", requeueCount + 1, doc.get("id").asText()); + } else { + errorCount.increment(); + log.error("Requeue failed (sink full/closed): id={}", doc.get("id").asText()); + } + } else { + errorCount.increment(); + dropCount.increment(); + log.error("Permanently dropped: id={}, requeues={}, error={}", + doc.get("id").asText(), requeueCount, e.getMessage()); + } + }) + .onErrorResume(e -> Mono.empty()) + .then(); + } + + private boolean isRetryable(Throwable e) { + String msg = e.getMessage(); + if (msg == null) return true; + // Don't retry permanent failures + return !msg.contains("404") && !msg.contains("Unauthorized") && !msg.contains("403"); + } + + public long getWriteCount() { return writeCount.sum(); } + public long getErrorCount() { return errorCount.sum(); } + public long getDropCount() { return dropCount.sum(); } + + @Override + public void close() { + sink.tryEmitComplete(); + // Wait for the subscriber to drain all buffered writes + try { + subscription.dispose(); + Thread.sleep(10_000); // allow in-flight writes to complete + } catch (InterruptedException ignored) {} + client.close(); + log.info("ReconciliationWriter closed: source={}, writes={}, retries={}, errors={}, drops={}", + source, writeCount.sum(), retryCount.sum(), errorCount.sum(), dropCount.sum()); + } +} diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/application.properties b/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/application.properties new file mode 100644 index 000000000000..5751fb3edca4 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/application.properties @@ -0,0 +1,24 @@ +# Cosmos AVAD Test — Default Config +# Override via environment variables (same names, uppercase) + +COSMOS_ENDPOINT=https://abhm-cfp-region-test.documents.azure.com:443/ +COSMOS_KEY= +COSMOS_DATABASE=graph_db +COSMOS_FEED_CONTAINER=avad-test +COSMOS_LEASE_CONTAINER=avad-test-leases +COSMOS_PREFERRED_REGION=West Central US + +# Ingestor settings +OPS_PER_SEC=5000 +DOC_SIZE_BYTES=1024 +LOGICAL_PARTITION_COUNT=100000 + +# Log files +PRODUCED_LOG=produced.log +CONSUMED_LOG=consumed-avad.log + +# Duration (0 = run forever) +DURATION_SECONDS=3600 + +# CFP worker count per reader mode +WORKER_COUNT=2 diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/logback.xml b/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/logback.xml new file mode 100644 index 000000000000..ee24b16d9ac3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/logback.xml @@ -0,0 +1,32 @@ + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + cosmos-avad-test.log + + cosmos-avad-test.%d{yyyy-MM-dd}.%i.log + 100MB + 7 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + diff --git a/sdk/cosmos/pom.xml b/sdk/cosmos/pom.xml index 45c4329c4179..d7601f3a9329 100644 --- a/sdk/cosmos/pom.xml +++ b/sdk/cosmos/pom.xml @@ -12,6 +12,7 @@ azure-cosmos azure-cosmos-benchmark + azure-cosmos-avad-test azure-cosmos-encryption azure-cosmos-spark_3 azure-cosmos-spark_3-3_2-12 From a266d8365c39830bc344b1636bad91cc4c1e2cc5 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 4 May 2026 19:46:21 -0400 Subject: [PATCH 02/28] Move AVAD soak test into azure-cosmos-benchmark module Moves AVAD code from standalone azure-cosmos-avad-test module into azure-cosmos-benchmark: - Java sources under com.azure.cosmos.avadtest package - Operational files (Dockerfile, Helm, chaos, scripts) under avad-soak/ - Converted from picocli to jcommander (no new dependencies) - Removed standalone module from sdk/cosmos/pom.xml - Reverted picocli addition to external_dependencies.txt Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/hooks/hooks.json | 15 +- eng/versioning/external_dependencies.txt | 1 - sdk/cosmos/azure-cosmos-avad-test/pom.xml | 202 ------------------ .../avad-soak}/Dockerfile | 0 .../avad-soak}/application.properties | 0 .../avad-soak}/chaos/README.md | 0 .../avad-soak}/chaos/chaos-schedule.yaml | 0 .../chaos/scenarios/lease-throttle.sh | 0 .../chaos/scenarios/network-fault.sh | 0 .../avad-soak}/chaos/scenarios/node-drain.sh | 0 .../chaos/scenarios/partition-split.sh | 0 .../avad-soak}/chaos/scenarios/pod-kill.sh | 0 .../chaos/scenarios/restart-storm.sh | 0 .../avad-soak}/infra/README.md | 0 .../avad-soak}/infra/chart/Chart.yaml | 0 .../infra/chart/templates/_helpers.tpl | 0 .../infra/chart/templates/configmap.yaml | 0 .../chart/templates/consumer-statefulset.yaml | 0 .../templates/health-monitor-cronjob.yaml | 0 .../chart/templates/ingestor-deployment.yaml | 0 .../avad-soak}/infra/chart/values.yaml | 0 .../avad-soak}/infra/scripts/setup-acr.sh | 0 .../avad-soak}/infra/scripts/setup-aks.sh | 0 .../avad-soak}/infra/scripts/setup-cosmos.sh | 0 .../avad-soak}/logback.xml | 0 .../avad-soak}/run-cutover.sh | 0 .../avad-soak}/run-soak.sh | 0 .../java/com/azure/cosmos/avadtest/Main.java | 98 +++++---- .../cosmos/avadtest/config/TestConfig.java | 0 .../cosmos/avadtest/health/HealthMonitor.java | 0 .../cosmos/avadtest/health/HealthServer.java | 0 .../cosmos/avadtest/ingestor/Ingestor.java | 0 .../cosmos/avadtest/metrics/SoakMetrics.java | 0 .../cosmos/avadtest/reader/AvadReader.java | 0 .../avadtest/reader/LatestVersionReader.java | 0 .../avadtest/reconciliation/EventLog.java | 0 .../avadtest/reconciliation/Reconciler.java | 0 .../reconciliation/ReconciliationWriter.java | 0 sdk/cosmos/pom.xml | 1 - 39 files changed, 71 insertions(+), 246 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-avad-test/pom.xml rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/Dockerfile (100%) rename sdk/cosmos/{azure-cosmos-avad-test/src/main/resources => azure-cosmos-benchmark/avad-soak}/application.properties (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/README.md (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/chaos-schedule.yaml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/scenarios/lease-throttle.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/scenarios/network-fault.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/scenarios/node-drain.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/scenarios/partition-split.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/scenarios/pod-kill.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/chaos/scenarios/restart-storm.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/README.md (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/chart/Chart.yaml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/chart/templates/_helpers.tpl (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/chart/templates/configmap.yaml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/chart/templates/consumer-statefulset.yaml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/chart/templates/health-monitor-cronjob.yaml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/chart/templates/ingestor-deployment.yaml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/chart/values.yaml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/scripts/setup-acr.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/scripts/setup-aks.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/infra/scripts/setup-cosmos.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test/src/main/resources => azure-cosmos-benchmark/avad-soak}/logback.xml (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/run-cutover.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark/avad-soak}/run-soak.sh (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/Main.java (58%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java (100%) rename sdk/cosmos/{azure-cosmos-avad-test => azure-cosmos-benchmark}/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java (100%) diff --git a/.github/hooks/hooks.json b/.github/hooks/hooks.json index 938bb0dac66b..f8450fb3ba3a 100644 --- a/.github/hooks/hooks.json +++ b/.github/hooks/hooks.json @@ -1,9 +1,20 @@ { + "version": 1, "hooks": { - "PostToolUse": [ + "preToolUse": [ { "type": "command", - "command": "pwsh eng/common/scripts/azsdk_tool_telemetry.ps1", + "powershell": "powershell -ExecutionPolicy Bypass -File \"C:/Users/abhmohanty/.copilot/hooks/block-force-push.ps1\"", + "bash": "pwsh -ExecutionPolicy Bypass -File ~/.copilot/hooks/block-force-push.ps1", + "cwd": ".", + "timeoutSec": 10 + } + ], + "postToolUse": [ + { + "type": "command", + "powershell": "powershell -ExecutionPolicy Bypass -Command \"$raw = [Console]::In.ReadToEnd(); $j = $raw | ConvertFrom-Json; if ($j.toolName -eq 'powershell' -and $j.toolArgs -match 'git push') { Add-Content -Path 'C:/Users/abhmohanty/.copilot/hooks/push-audit.log' -Value \\\"$(Get-Date -Format o) | $($j.toolArgs)\\\" }\"", + "cwd": ".", "timeoutSec": 5 } ] diff --git a/eng/versioning/external_dependencies.txt b/eng/versioning/external_dependencies.txt index a6c9ba0c2fe9..d23f3b1c80d3 100644 --- a/eng/versioning/external_dependencies.txt +++ b/eng/versioning/external_dependencies.txt @@ -59,7 +59,6 @@ io.netty:netty-transport-native-unix-common;4.1.132.Final io.netty:netty-transport-native-kqueue;4.1.132.Final io.projectreactor.netty:reactor-netty-http;1.2.16 io.projectreactor:reactor-core;3.7.17 -info.picocli:picocli;4.7.6 io.vertx:vertx-codegen;4.5.26 io.vertx:vertx-core;4.5.26 javax.websocket:javax.websocket-api;1.1 diff --git a/sdk/cosmos/azure-cosmos-avad-test/pom.xml b/sdk/cosmos/azure-cosmos-avad-test/pom.xml deleted file mode 100644 index fdaa4ea4e62e..000000000000 --- a/sdk/cosmos/azure-cosmos-avad-test/pom.xml +++ /dev/null @@ -1,202 +0,0 @@ - - - 4.0.0 - - com.azure - azure-client-sdk-parent - 1.7.0 - ../../parents/azure-client-sdk-parent - - - com.azure - azure-cosmos-avad-test - 1.0.0-beta.1 - Microsoft Azure Cosmos DB - AVAD Change Feed Processor Soak Test - Soak test and correctness validation for All Versions and Deletes (AVAD) Change Feed Processor - https://github.com/Azure/azure-sdk-for-java - - - - azure-java-build-docs - ${site.url}/site/${project.artifactId} - - - - - https://github.com/Azure/azure-sdk-for-java - - - - - UTF-8 - 17 - 17 - 0.01 - 0.01 - true - true - true - true - - - true - - - - - - com.azure - azure-cosmos - 4.81.0-beta.1 - - - - - info.picocli - picocli - 4.7.6 - - - - - ch.qos.logback - logback-classic - 1.3.14 - - - - org.slf4j - slf4j-api - 1.7.36 - - - - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.6.0 - - - package - shade - - - - com.azure.cosmos.avadtest.Main - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - false - - - - - - - org.codehaus.mojo - exec-maven-plugin - 3.5.1 - - com.azure.cosmos.avadtest.Main - - - - - org.apache.maven.plugins - maven-surefire-plugin - 3.5.3 - - true - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - 3.6.0 - - false - false - - - - - - com.github.spotbugs - spotbugs-maven-plugin - 4.8.3.1 - - false - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.14.0 - - - default-compile - - 17 - 17 - false - - - - base-compile - compile - - 17 - 17 - 17 - false - - - - - 17 - 17 - false - - - - - org.apache.maven.plugins - maven-enforcer-plugin - 3.6.1 - - - - - info.picocli:picocli:[4.7.6] - org.slf4j:slf4j-api:[1.7.36] - ch.qos.logback:logback-classic:[1.3.14] - - - - - - - - - - diff --git a/sdk/cosmos/azure-cosmos-avad-test/Dockerfile b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/Dockerfile rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/application.properties b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/resources/application.properties rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/README.md b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/README.md similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/README.md rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/README.md diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/chaos-schedule.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/chaos-schedule.yaml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/chaos-schedule.yaml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/chaos-schedule.yaml diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/lease-throttle.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/lease-throttle.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/lease-throttle.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/lease-throttle.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/network-fault.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/network-fault.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/network-fault.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/network-fault.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/node-drain.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/node-drain.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/node-drain.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/node-drain.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/partition-split.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/partition-split.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/partition-split.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/partition-split.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/pod-kill.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/pod-kill.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/pod-kill.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/pod-kill.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/restart-storm.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/restart-storm.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/chaos/scenarios/restart-storm.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/restart-storm.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/README.md b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/README.md similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/README.md rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/README.md diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/Chart.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/Chart.yaml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/chart/Chart.yaml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/Chart.yaml diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/_helpers.tpl b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/_helpers.tpl similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/_helpers.tpl rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/_helpers.tpl diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/configmap.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/configmap.yaml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/configmap.yaml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/configmap.yaml diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/consumer-statefulset.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/consumer-statefulset.yaml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/consumer-statefulset.yaml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/consumer-statefulset.yaml diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/health-monitor-cronjob.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/health-monitor-cronjob.yaml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/health-monitor-cronjob.yaml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/health-monitor-cronjob.yaml diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/ingestor-deployment.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/ingestor-deployment.yaml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/chart/templates/ingestor-deployment.yaml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/templates/ingestor-deployment.yaml diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/chart/values.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/values.yaml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/chart/values.yaml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/chart/values.yaml diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-acr.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-acr.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-aks.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-aks.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-aks.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-aks.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-cosmos.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-cosmos.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/infra/scripts/setup-cosmos.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-cosmos.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/resources/logback.xml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/logback.xml similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/resources/logback.xml rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/logback.xml diff --git a/sdk/cosmos/azure-cosmos-avad-test/run-cutover.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-cutover.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/run-cutover.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-cutover.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/run-soak.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-soak.sh similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/run-soak.sh rename to sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-soak.sh diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/Main.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java similarity index 58% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/Main.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java index 593141318d59..300fd0b959ff 100644 --- a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/Main.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java @@ -8,65 +8,62 @@ import com.azure.cosmos.avadtest.reader.AvadReader; import com.azure.cosmos.avadtest.reader.LatestVersionReader; import com.azure.cosmos.avadtest.reconciliation.Reconciler; +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import picocli.CommandLine; -import picocli.CommandLine.Command; -import picocli.CommandLine.Option; -import java.util.concurrent.Callable; - -@Command(name = "cosmos-avad-test", - mixinStandardHelpOptions = true, - version = "1.0", - description = "AVAD E2E test: ingestor, LV reader, AVAD reader, reconciler") -public final class Main implements Callable { +public final class Main { private static final Logger log = LoggerFactory.getLogger(Main.class); - @Option(names = "--mode", required = true, - description = "Mode: ingestor, lv-reader, avad-reader, reconcile") + @Parameter(names = "--mode", required = true, + description = "Mode: ingestor, lv-reader, avad-reader, reconcile, health-monitor") private String mode; - @Option(names = "--produced", description = "Produced log file (for reconcile mode)") + @Parameter(names = "--produced", description = "Produced log file (for reconcile mode)") private String producedFile; - @Option(names = "--consumed", description = "Consumed log file (for reconcile mode)") + @Parameter(names = "--consumed", description = "Consumed log file (for reconcile mode)") private String consumedFile; - @Option(names = "--lv", description = "LV consumed log file (for parity check)") + @Parameter(names = "--lv", description = "LV consumed log file (for parity check)") private String lvFile; - @Option(names = "--avad", description = "AVAD consumed log file (for parity check)") + @Parameter(names = "--avad", description = "AVAD consumed log file (for parity check)") private String avadFile; - @Option(names = "--health-port", defaultValue = "8080", - description = "Health server port (default: 8080)") - private int healthPort; + @Parameter(names = "--health-port", description = "Health server port (default: 8080)") + private int healthPort = 8080; + + @Parameter(names = "--run-id", description = "Soak run identifier (for health monitor)") + private String runId = "soak-default"; - @Option(names = "--run-id", defaultValue = "soak-default", - description = "Soak run identifier (for health monitor)") - private String runId; + @Parameter(names = "--gap-sla-minutes", description = "Minutes before an unconsumed event is flagged as a gap") + private int gapSlaMinutes = 10; - @Option(names = "--gap-sla-minutes", defaultValue = "10", - description = "Minutes before an unconsumed event is flagged as a gap") - private int gapSlaMinutes; + @Parameter(names = {"-h", "--help"}, description = "Help", help = true) + private boolean help; - @Override - public Integer call() throws Exception { + private int run() throws Exception { log.info("Starting cosmos-avad-test in mode: {}", mode); - return switch (mode) { - case "ingestor" -> runIngestor(); - case "lv-reader" -> runLvReader(); - case "avad-reader" -> runAvadReader(); - case "reconcile" -> runReconcile(); - case "health-monitor" -> runHealthMonitor(); - default -> { - log.error("Unknown mode: {}. Use: ingestor, lv-reader, avad-reader, reconcile", mode); - yield 1; - } - }; + switch (mode) { + case "ingestor": + return runIngestor(); + case "lv-reader": + return runLvReader(); + case "avad-reader": + return runAvadReader(); + case "reconcile": + return runReconcile(); + case "health-monitor": + return runHealthMonitor(); + default: + log.error("Unknown mode: {}. Use: ingestor, lv-reader, avad-reader, reconcile, health-monitor", mode); + return 1; + } } private int runIngestor() throws Exception { @@ -136,7 +133,28 @@ private int runReconcile() throws Exception { } public static void main(String[] args) { - int exitCode = new CommandLine(new Main()).execute(args); - System.exit(exitCode); + Main main = new Main(); + JCommander jc = JCommander.newBuilder().addObject(main).build(); + jc.setProgramName("cosmos-avad-test"); + + try { + jc.parse(args); + } catch (ParameterException e) { + log.error("Invalid arguments: {}", e.getMessage()); + jc.usage(); + System.exit(1); + } + + if (main.help) { + jc.usage(); + return; + } + + try { + System.exit(main.run()); + } catch (Exception e) { + log.error("Fatal error", e); + System.exit(1); + } } } diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java diff --git a/sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java similarity index 100% rename from sdk/cosmos/azure-cosmos-avad-test/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java rename to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java diff --git a/sdk/cosmos/pom.xml b/sdk/cosmos/pom.xml index d7601f3a9329..45c4329c4179 100644 --- a/sdk/cosmos/pom.xml +++ b/sdk/cosmos/pom.xml @@ -12,7 +12,6 @@ azure-cosmos azure-cosmos-benchmark - azure-cosmos-avad-test azure-cosmos-encryption azure-cosmos-spark_3 azure-cosmos-spark_3-3_2-12 From 626b0f1fe05a7efc7a6474ca8715fcba2074b6ca Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 4 May 2026 21:20:54 -0400 Subject: [PATCH 03/28] Fix AVAD code to Java 8 compatibility for benchmark module Convert Java 9+ features to Java 8 equivalents: - Switch expressions -> if/else chains - ProcessHandle.current().pid() -> ManagementFactory.getRuntimeMXBean().getName() - Path.of() -> Paths.get() - String.isBlank() -> .trim().isEmpty() - List.of() -> Collections.singletonList() - String.repeat() -> StringBuilder loop - var -> explicit types Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cosmos/avadtest/config/TestConfig.java | 13 +++++---- .../cosmos/avadtest/ingestor/Ingestor.java | 7 ++++- .../cosmos/avadtest/reader/AvadReader.java | 28 +++++++++---------- .../avadtest/reader/LatestVersionReader.java | 2 +- .../avadtest/reconciliation/EventLog.java | 2 +- .../avadtest/reconciliation/Reconciler.java | 24 ++++++++-------- 6 files changed, 40 insertions(+), 36 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java index 2aaa4a9b58c8..cf2e1d0f5211 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java @@ -1,5 +1,6 @@ package com.azure.cosmos.avadtest.config; +import java.util.Collections; import java.util.List; /** @@ -61,10 +62,10 @@ public static TestConfig fromEnv() { private static String env(String name) { String val = System.getenv(name); - if (val == null || val.isBlank()) { + if (val == null || val.trim().isEmpty()) { val = System.getProperty(name); } - if (val == null || val.isBlank()) { + if (val == null || val.trim().isEmpty()) { throw new IllegalStateException("Required config missing: " + name); } return val; @@ -72,24 +73,24 @@ private static String env(String name) { private static String envOrDefault(String name, String defaultVal) { String val = System.getenv(name); - if (val == null || val.isBlank()) { + if (val == null || val.trim().isEmpty()) { val = System.getProperty(name); } - return (val != null && !val.isBlank()) ? val : defaultVal; + return (val != null && !val.trim().isEmpty()) ? val : defaultVal; } public String endpoint() { return endpoint; } public String regionalEndpoint() { return regionalEndpoint; } /** Returns regional endpoint if set, otherwise global endpoint. For use by readers. */ public String readerEndpoint() { - return (regionalEndpoint != null && !regionalEndpoint.isBlank()) ? regionalEndpoint : endpoint; + return (regionalEndpoint != null && !regionalEndpoint.trim().isEmpty()) ? regionalEndpoint : endpoint; } public String key() { return key; } public String database() { return database; } public String feedContainer() { return feedContainer; } public String leaseContainer() { return leaseContainer; } public String preferredRegion() { return preferredRegion; } - public List preferredRegions() { return List.of(preferredRegion); } + public List preferredRegions() { return Collections.singletonList(preferredRegion); } public int opsPerSec() { return opsPerSec; } public int docSizeBytes() { return docSizeBytes; } public int logicalPartitionCount() { return logicalPartitionCount; } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index 224663d6041d..c458420ff984 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -278,7 +278,12 @@ private ObjectNode buildDoc(String docId, String pk, long seq, private String generatePayload() { int size = config.docSizeBytes(); if (size <= 0) return ""; - return "x".repeat(Math.min(size, 10_000)); + int len = Math.min(size, 10_000); + StringBuilder sb = new StringBuilder(len); + for (int i = 0; i < len; i++) { + sb.append('x'); + } + return sb.toString(); } private void trackRecentId(String idAndPk) { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index aee287f123fa..5df313667151 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -88,7 +88,7 @@ public void run() throws InterruptedException { options.setStartTime(Instant.now().minus(Duration.ofDays(5))); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() - .hostName("avad-host-" + ProcessHandle.current().pid() + "-w" + workerIdx) + .hostName("avad-host-" + java.lang.management.ManagementFactory.getRuntimeMXBean().getName() + "-w" + workerIdx) .feedContainer(feedContainer) .leaseContainer(leaseContainer) .options(options) @@ -143,21 +143,19 @@ private void handleChanges(List items) { boolean hasPrevious = previous != null && !previous.isNull(); // Track operation types and validate previousImage - switch (opType) { - case "create" -> totalCreates.increment(); - case "replace" -> { - totalReplaces.increment(); - if (!hasPrevious) { - missingPreviousImageCount.increment(); - log.warn("⚠️ MISSING previous on REPLACE: eventId={}, pk={}", eventId, pk); - } + if ("create".equals(opType)) { + totalCreates.increment(); + } else if ("replace".equals(opType)) { + totalReplaces.increment(); + if (!hasPrevious) { + missingPreviousImageCount.increment(); + log.warn("⚠️ MISSING previous on REPLACE: eventId={}, pk={}", eventId, pk); } - case "delete" -> { - totalDeletes.increment(); - if (!hasPrevious) { - missingPreviousImageCount.increment(); - log.warn("⚠️ MISSING previous on DELETE: eventId={}, pk={}", eventId, pk); - } + } else if ("delete".equals(opType)) { + totalDeletes.increment(); + if (!hasPrevious) { + missingPreviousImageCount.increment(); + log.warn("⚠️ MISSING previous on DELETE: eventId={}, pk={}", eventId, pk); } } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index 51a10dce7003..33a5d15a61d1 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -75,7 +75,7 @@ public void run() throws InterruptedException { options.setStartTime(Instant.now().minus(Duration.ofDays(5))); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() - .hostName("lv-host-" + ProcessHandle.current().pid() + "-w" + i) + .hostName("lv-host-" + java.lang.management.ManagementFactory.getRuntimeMXBean().getName() + "-w" + i) .feedContainer(feedContainer) .leaseContainer(leaseContainer) .options(options) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java index 31218bbc193c..0a0f01c3ffce 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java @@ -27,7 +27,7 @@ public final class EventLog implements AutoCloseable { private final ReentrantLock lock = new ReentrantLock(); public EventLog(String filePath) throws IOException { - Path path = Path.of(filePath); + Path path = java.nio.file.Paths.get(filePath); this.writer = Files.newBufferedWriter(path, StandardOpenOption.CREATE, StandardOpenOption.APPEND, StandardOpenOption.WRITE); log.info("EventLog opened: {}", path.toAbsolutePath()); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java index dd3c9a4ed72b..35532d180c63 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java @@ -37,7 +37,7 @@ public static int reconcile(String producedFile, String consumedFile) throws IOE missing.removeAll(consumed); // Count duplicates (at-least-once delivery) - long totalConsumedLines = Files.lines(Path.of(consumedFile)).filter(l -> !l.isBlank()).count(); + long totalConsumedLines = Files.lines(java.nio.file.Paths.get(consumedFile)).filter(l -> !l.trim().isEmpty()).count(); long duplicates = totalConsumedLines - consumed.size(); log.info("Produced: {} unique events", produced.size()); @@ -90,9 +90,9 @@ public static int parity(String lvFile, String avadFile) throws IOException { /** Loads unique eventIds (first field per line). */ private static Set loadEventIds(String file) throws IOException { - try (var lines = Files.lines(Path.of(file))) { + try (java.util.stream.Stream lines = Files.lines(java.nio.file.Paths.get(file))) { return lines - .filter(l -> !l.isBlank()) + .filter(l -> !l.trim().isEmpty()) .map(l -> l.split(",")[0]) .collect(Collectors.toSet()); } @@ -109,16 +109,16 @@ private static int checkOrderingByLsn(String consumedFile) throws IOException { // Load all records grouped by partition key Map> recordsByPk = new HashMap<>(); - try (var reader = new BufferedReader(new FileReader(consumedFile))) { + try (BufferedReader reader = new BufferedReader(new FileReader(consumedFile))) { String line; while ((line = reader.readLine()) != null) { - if (line.isBlank()) continue; + if (line.trim().isEmpty()) continue; String[] parts = line.split(","); if (parts.length < 6) continue; String pk = parts[3]; long seqNo = Long.parseLong(parts[1]); - long lsn = parts[5].isBlank() ? -1 : Long.parseLong(parts[5]); + long lsn = parts[5].trim().isEmpty() ? -1 : Long.parseLong(parts[5]); if (lsn < 0) continue; // skip records without LSN recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) @@ -127,7 +127,7 @@ private static int checkOrderingByLsn(String consumedFile) throws IOException { } int violations = 0; - for (var entry : recordsByPk.entrySet()) { + for (Map.Entry> entry : recordsByPk.entrySet()) { String pk = entry.getKey(); List records = entry.getValue(); // Sort by LSN, then check seqNo is non-decreasing within same LSN batch @@ -161,16 +161,16 @@ private static int checkOrderingByCrts(String consumedFile) throws IOException { Map> recordsByPk = new HashMap<>(); - try (var reader = new BufferedReader(new FileReader(consumedFile))) { + try (BufferedReader reader = new BufferedReader(new FileReader(consumedFile))) { String line; while ((line = reader.readLine()) != null) { - if (line.isBlank()) continue; + if (line.trim().isEmpty()) continue; String[] parts = line.split(","); if (parts.length < 7) continue; // CRTS is column 6, only in AVAD logs String pk = parts[3]; - long lsn = parts[5].isBlank() ? -1 : Long.parseLong(parts[5]); - long crts = parts[6].isBlank() ? -1 : Long.parseLong(parts[6]); + long lsn = parts[5].trim().isEmpty() ? -1 : Long.parseLong(parts[5]); + long crts = parts[6].trim().isEmpty() ? -1 : Long.parseLong(parts[6]); if (crts < 0) continue; recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) @@ -184,7 +184,7 @@ private static int checkOrderingByCrts(String consumedFile) throws IOException { } int violations = 0; - for (var entry : recordsByPk.entrySet()) { + for (Map.Entry> entry : recordsByPk.entrySet()) { String pk = entry.getKey(); List records = entry.getValue(); // Sort by LSN (delivery order), then check CRTS is non-decreasing From a7136cb92e4eb4bf02763b475229eee51e87503e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 11:57:37 -0400 Subject: [PATCH 04/28] Fix self-review findings: blocking bugs, watch issues, and chaos scope Blocking fixes: - AvadReader: add shutdown hook (matching LatestVersionReader pattern) - Ingestor: implement rate limiting with Flux.interval instead of unbounded Flux.generate + sample (which did not enforce OPS_PER_SEC) - Ingestor: fix delete correlation by stamping eventId into document before delete so AVAD reader's previous image has correct key - Reconciler: fix LSN/CRTS ordering checks to sort by delivery order (seqNo) then verify monotonicity, instead of sorting by value (no-op) - Ingestor: fix SLF4J format - {:.1f} is not valid SLF4J syntax Watch fixes: - ReconciliationWriter: fix close() to drain before dispose - ReconciliationWriter: use CosmosException.getStatusCode() for retry - LatestVersionReader: use metadata.getLogSequenceNumber() not _lsn - AvadReader: implement CRTS ordering validation per partition - HealthMonitor: treat -1 (query failures) as unhealthy - Dockerfile: fix build context path (point to module root) - setup-acr.sh: fix PROJECT_DIR to reach module root - application.properties: remove hardcoded endpoint - infra/README: document secret prerequisites, remove hardcoded names Scope changes: - Remove 4 chaos scenarios, keep only pod-kill and partition-split - Pre-compute payload string instead of per-operation allocation - Clear deleted IDs from ring buffer to avoid retargeting Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/Dockerfile | 5 +- .../avad-soak/application.properties | 2 +- .../avad-soak/chaos/README.md | 11 +--- .../avad-soak/chaos/chaos-schedule.yaml | 30 --------- .../chaos/scenarios/lease-throttle.sh | 49 -------------- .../chaos/scenarios/network-fault.sh | 35 ---------- .../avad-soak/chaos/scenarios/node-drain.sh | 30 --------- .../chaos/scenarios/restart-storm.sh | 16 ----- .../avad-soak/infra/README.md | 40 ++++++++--- .../avad-soak/infra/scripts/setup-acr.sh | 4 +- .../cosmos/avadtest/health/HealthMonitor.java | 3 + .../cosmos/avadtest/ingestor/Ingestor.java | 66 ++++++++++++------- .../cosmos/avadtest/reader/AvadReader.java | 23 +++++++ .../avadtest/reader/LatestVersionReader.java | 4 +- .../avadtest/reconciliation/Reconciler.java | 36 +++++----- .../reconciliation/ReconciliationWriter.java | 15 +++-- 16 files changed, 138 insertions(+), 231 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/lease-throttle.sh delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/network-fault.sh delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/node-drain.sh delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/restart-storm.sh diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile index 9a05c24f4552..4461a1beed81 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile @@ -1,4 +1,5 @@ # Multi-stage build for cosmos-avad-test soak runner +# Build context: azure-cosmos-benchmark/ (module root) # JDK 21 for production, Maven for build stage # --- Build stage --- @@ -13,12 +14,12 @@ RUN mvn package -DskipTests -B FROM eclipse-temurin:21-jre-jammy WORKDIR /app -# Install curl for health probes and az CLI for chaos scripts +# Install curl for health probes RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ && rm -rf /var/lib/apt/lists/* -COPY --from=build /build/target/cosmos-avad-test-1.0-SNAPSHOT.jar /app/app.jar +COPY --from=build /build/target/azure-cosmos-benchmark-*-jar-with-dependencies.jar /app/app.jar # Health endpoint port EXPOSE 8080 diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties index 5751fb3edca4..b939b896694b 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties @@ -1,7 +1,7 @@ # Cosmos AVAD Test — Default Config # Override via environment variables (same names, uppercase) -COSMOS_ENDPOINT=https://abhm-cfp-region-test.documents.azure.com:443/ +COSMOS_ENDPOINT= COSMOS_KEY= COSMOS_DATABASE=graph_db COSMOS_FEED_CONTAINER=avad-test diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/README.md b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/README.md index 449d77a8f5dc..0423dfc997c7 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/README.md +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/README.md @@ -9,11 +9,7 @@ infra Helm chart. | Scenario | Script | What It Tests | |----------|--------|---------------| | Pod Kill | `pod-kill.sh` | Lease rebalancing after random pod loss | -| Restart Storm | `restart-storm.sh` | Mass lease handoff on rolling restart | -| Lease Throttle | `lease-throttle.sh` | CFP behavior under lease container RU starvation | -| Network Fault | `network-fault.sh` | Retry behavior, session consistency | | Partition Split | `partition-split.sh` | Continuation token validity across splits | -| Node Drain | `node-drain.sh` | Graceful shutdown, lease release timing | ## Usage @@ -21,15 +17,12 @@ infra Helm chart. ```bash export NAMESPACE=cosmos-soak -export COSMOS_ACCOUNT=abhm-cfp-region-test -export COSMOS_RG=abhm-rg +export COSMOS_ACCOUNT= +export COSMOS_RG= # Kill a random AVAD CFP pod bash chaos/scenarios/pod-kill.sh -# Throttle lease container to 400 RU for 5 min -TARGET_RU=400 THROTTLE_DURATION=300 bash chaos/scenarios/lease-throttle.sh - # Trigger partition split (2x throughput) SCALE_FACTOR=2 bash chaos/scenarios/partition-split.sh ``` diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/chaos-schedule.yaml b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/chaos-schedule.yaml index 08ac8ef767c0..3f3b7380d94d 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/chaos-schedule.yaml +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/chaos-schedule.yaml @@ -14,39 +14,9 @@ schedule: params: component: avad-cfp - - scenario: restart-storm - interval_hours: 8 - recovery_minutes: 15 - enabled: true - params: - component: avad-cfp - - - scenario: lease-throttle - interval_hours: 4 - recovery_minutes: 10 - enabled: true - params: - target_ru: 400 - throttle_duration: 300 - - - scenario: network-fault - interval_hours: 6 - recovery_minutes: 5 - enabled: true - params: - block_duration: 30 - component: avad-cfp - - scenario: partition-split interval_hours: 12 recovery_minutes: 30 enabled: true params: scale_factor: 2 - - - scenario: node-drain - interval_hours: 24 - recovery_minutes: 15 - enabled: true - params: - component: avad-cfp diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/lease-throttle.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/lease-throttle.sh deleted file mode 100644 index ae67f9502a12..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/lease-throttle.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -# Lease Throttle — scale lease container RU to simulate throttling -set -euo pipefail - -COSMOS_ACCOUNT="${COSMOS_ACCOUNT:?Set COSMOS_ACCOUNT}" -COSMOS_RG="${COSMOS_RG:?Set COSMOS_RG}" -COSMOS_DB="${COSMOS_DB:-graph_db}" -LEASE_CONTAINER="${LEASE_CONTAINER:-avad-test-leases}" -TARGET_RU="${TARGET_RU:-400}" -THROTTLE_DURATION="${THROTTLE_DURATION:-300}" # 5 minutes - -echo "[$(date '+%H:%M:%S')] Chaos: lease-throttle" - -# Save current throughput -CURRENT_RU=$(az cosmosdb sql container throughput show \ - --account-name "$COSMOS_ACCOUNT" \ - --resource-group "$COSMOS_RG" \ - --database-name "$COSMOS_DB" \ - --name "$LEASE_CONTAINER" \ - --query "resource.throughput" -o tsv 2>/dev/null || echo "autoscale") - -echo " Current lease RU: $CURRENT_RU" -echo " Scaling to: $TARGET_RU RU for ${THROTTLE_DURATION}s" - -# Scale down -az cosmosdb sql container throughput update \ - --account-name "$COSMOS_ACCOUNT" \ - --resource-group "$COSMOS_RG" \ - --database-name "$COSMOS_DB" \ - --name "$LEASE_CONTAINER" \ - --throughput "$TARGET_RU" \ - --output none - -echo " Lease container throttled to $TARGET_RU RU" -sleep "$THROTTLE_DURATION" - -# Restore -if [ "$CURRENT_RU" != "autoscale" ]; then - echo " Restoring lease RU to $CURRENT_RU" - az cosmosdb sql container throughput update \ - --account-name "$COSMOS_ACCOUNT" \ - --resource-group "$COSMOS_RG" \ - --database-name "$COSMOS_DB" \ - --name "$LEASE_CONTAINER" \ - --throughput "$CURRENT_RU" \ - --output none -fi - -echo " Lease throttle complete" diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/network-fault.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/network-fault.sh deleted file mode 100644 index 79aad0d10e95..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/network-fault.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# Network Fault — block Cosmos endpoint temporarily -set -euo pipefail - -NAMESPACE="${NAMESPACE:-cosmos-soak}" -COMPONENT="${COMPONENT:-avad-cfp}" -BLOCK_DURATION="${BLOCK_DURATION:-30}" # seconds - -echo "[$(date '+%H:%M:%S')] Chaos: network-fault (${BLOCK_DURATION}s block)" - -# Get a random pod -POD=$(kubectl get pods -n "$NAMESPACE" \ - -l "app.kubernetes.io/component=${COMPONENT}" \ - --field-selector=status.phase=Running \ - -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | shuf -n 1) - -if [ -z "$POD" ]; then - echo " No running pods found" - exit 0 -fi - -echo " Target pod: $POD" -echo " Injecting network fault for ${BLOCK_DURATION}s" - -# Block outbound to Cosmos port 443 using iptables -kubectl exec -n "$NAMESPACE" "$POD" -- \ - sh -c "iptables -A OUTPUT -p tcp --dport 443 -j DROP 2>/dev/null || echo 'iptables not available (need NET_ADMIN)'" - -sleep "$BLOCK_DURATION" - -# Remove the block -kubectl exec -n "$NAMESPACE" "$POD" -- \ - sh -c "iptables -D OUTPUT -p tcp --dport 443 -j DROP 2>/dev/null || true" - -echo " Network fault removed from $POD" diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/node-drain.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/node-drain.sh deleted file mode 100644 index 0d5d660a1603..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/node-drain.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# Node Drain — cordon and drain an AKS node -set -euo pipefail - -echo "[$(date '+%H:%M:%S')] Chaos: node-drain" - -# Find a node running CFP pods -NAMESPACE="${NAMESPACE:-cosmos-soak}" -COMPONENT="${COMPONENT:-avad-cfp}" - -NODE=$(kubectl get pods -n "$NAMESPACE" \ - -l "app.kubernetes.io/component=${COMPONENT}" \ - --field-selector=status.phase=Running \ - -o jsonpath='{.items[0].spec.nodeName}') - -if [ -z "$NODE" ]; then - echo " No nodes found running $COMPONENT pods" - exit 0 -fi - -echo " Draining node: $NODE" -kubectl cordon "$NODE" -kubectl drain "$NODE" --ignore-daemonsets --delete-emptydir-data \ - --timeout=300s --force || true - -echo " Node $NODE drained. Waiting 120s before uncordoning..." -sleep 120 - -kubectl uncordon "$NODE" -echo " Node $NODE uncordoned" diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/restart-storm.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/restart-storm.sh deleted file mode 100644 index e934ffd0b686..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/chaos/scenarios/restart-storm.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -# Restart Storm — rolling restart all CFP pods -set -euo pipefail - -NAMESPACE="${NAMESPACE:-cosmos-soak}" -RELEASE="${RELEASE:-cosmos-soak}" -COMPONENT="${COMPONENT:-avad-cfp}" - -echo "[$(date '+%H:%M:%S')] Chaos: restart-storm for ${RELEASE}-${COMPONENT}" -kubectl rollout restart statefulset "${RELEASE}-${COMPONENT}" -n "$NAMESPACE" -echo " Rolling restart initiated" - -# Wait for rollout to complete (with timeout) -kubectl rollout status statefulset "${RELEASE}-${COMPONENT}" \ - -n "$NAMESPACE" --timeout=600s || true -echo " Restart storm complete" diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/README.md b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/README.md index 7d69356c35a8..359ef781176e 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/README.md +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/README.md @@ -3,6 +3,27 @@ Reusable Helm chart and setup scripts for running Cosmos DB change feed processor soak tests on AKS. +## Prerequisites + +Before deploying, create the required Kubernetes secrets: + +```bash +# Cosmos DB key secret (referenced by Helm chart) +kubectl create secret generic -secrets \ + --namespace cosmos-soak \ + --from-literal=cosmos-key="" + +# ACR pull secret (if not using AKS-managed ACR attachment) +kubectl create secret docker-registry acr-secret \ + --namespace cosmos-soak \ + --docker-server=.azurecr.io \ + --docker-username= \ + --docker-password= +``` + +If using AKS with `--attach-acr`, the `acr-secret` is not needed +and can be removed from the chart templates. + ## Quick Start ```bash @@ -15,7 +36,9 @@ change feed processor soak tests on AKS. # 3. Build + push image to ACR ./scripts/setup-acr.sh -# 4. Deploy (from repo root) +# 4. Create secrets (see Prerequisites above) + +# 5. Deploy (from repo root) cd ../.. ./run-soak.sh ``` @@ -42,11 +65,12 @@ cd ../.. ## Azure Resources -Default configuration targets: -- Subscription: `b31b6408-0fb5-4688-9a3c-33ffb3983297` -- Resource Group: `abhm-rg` -- AKS: `abhm-avad-soak-aks` (3x D4s_v5 nodes) -- ACR: `abhmavadsoakacr` -- Cosmos: `abhm-cfp-region-test` +Override default resource names via environment variables +in each script: -Override via environment variables in each script. +```bash +export SUBSCRIPTION="" +export RG="" +export AKS_CLUSTER="" +export ACR_NAME="" +``` diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh index a382d0cf863b..180446105582 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh @@ -11,7 +11,7 @@ ACR_NAME="${ACR_NAME:-abhmavadsoakacr}" IMAGE_NAME="${IMAGE_NAME:-cosmos-avad-test}" IMAGE_TAG="${IMAGE_TAG:-latest}" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_DIR="$SCRIPT_DIR/../.." +PROJECT_DIR="$SCRIPT_DIR/../../.." az account set --subscription "$SUBSCRIPTION" @@ -38,7 +38,7 @@ echo "=== Building + pushing image ===" az acr build \ --registry "$ACR_NAME" \ --image "${IMAGE_NAME}:${IMAGE_TAG}" \ - --file "$PROJECT_DIR/Dockerfile" \ + --file "$PROJECT_DIR/avad-soak/Dockerfile" \ "$PROJECT_DIR" echo "Image pushed: ${ACR_NAME}.azurecr.io/${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java index 1ba22846de78..3e2b5b9afbd3 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java @@ -77,14 +77,17 @@ public int runChecks() { // 1. Count produced events long producedCount = countBySource("ingestor"); log.info(" Produced (ingestor): {}", producedCount); + if (producedCount < 0) { log.error(" ❌ Query failed for ingestor count"); healthy = false; } // 2. Count AVAD consumed events long avadConsumed = countBySource("cfp-avad"); log.info(" AVAD consumed: {}", avadConsumed); + if (avadConsumed < 0) { log.error(" ❌ Query failed for AVAD count"); healthy = false; } // 3. Count LV consumed events long lvConsumed = countBySource("cfp-lv"); log.info(" LV consumed: {}", lvConsumed); + if (lvConsumed < 0) { log.error(" ❌ Query failed for LV count"); healthy = false; } // 4. Gap detection — produced but not in AVAD // (older than SLA window) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index c458420ff984..a5e567b626bf 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -54,6 +54,7 @@ public final class Ingestor implements AutoCloseable { // Ops per tick = opsPerSec * tickIntervalMs / 1000 private final int opsPerTick; + private final String precomputedPayload; public Ingestor(TestConfig config) throws Exception { this.config = config; @@ -62,6 +63,12 @@ public Ingestor(TestConfig config) throws Exception { this.recentDocIds = new String[10_000]; // ring buffer this.opsPerTick = Math.max(1, config.opsPerSec() * TICK_INTERVAL_MS / 1000); + // Pre-compute payload once instead of per-operation + int size = Math.min(Math.max(config.docSizeBytes(), 0), 10_000); + StringBuilder sb = new StringBuilder(size); + for (int i = 0; i < size; i++) { sb.append('x'); } + this.precomputedPayload = sb.toString(); + this.client = new CosmosClientBuilder() .endpoint(config.endpoint()) .key(config.key()) @@ -85,15 +92,14 @@ public void run() throws InterruptedException { CountDownLatch latch = new CountDownLatch(1); - // Simple approach: generate ops as fast as possible, bounded by concurrency + // Rate-limited ingestion: emit opsPerTick operations every TICK_INTERVAL_MS int concurrency = Math.min(config.opsPerSec(), 500); - Flux.generate(sink -> { - if (running.get()) { sink.next(seqCounter.get()); } - else { sink.complete(); } - }) - .flatMap(tick -> executeOperation() - .subscribeOn(Schedulers.boundedElastic()), concurrency) - .sample(Duration.ofMillis(TICK_INTERVAL_MS)) // pace output + Flux.interval(Duration.ofMillis(TICK_INTERVAL_MS)) + .takeWhile(tick -> running.get()) + .flatMap(tick -> Flux.range(0, opsPerTick) + .flatMap(i -> executeOperation() + .subscribeOn(Schedulers.boundedElastic()), concurrency), + concurrency) .doOnError(e -> log.error("Ingestion error", e)) .doOnComplete(latch::countDown) .subscribe(); @@ -114,10 +120,10 @@ public void run() throws InterruptedException { long f = failureCount.sum(); long total = s + f; double failRate = total > 0 ? (double) f / total : 0; - log.info("Progress: success={}, failures={}, failRate={:.1f}%, seq={}", - s, f, failRate * 100, seqCounter.get()); + log.info("Progress: success={}, failures={}, failRate={}%, seq={}", + s, f, String.format("%.1f", failRate * 100), seqCounter.get()); if (total > 100 && failRate > FAILURE_ABORT_THRESHOLD) { - log.error("Failure rate {:.1f}% exceeds threshold, aborting!", failRate * 100); + log.error("Failure rate {}% exceeds threshold, aborting!", String.format("%.1f", failRate * 100)); running.set(false); } }); @@ -241,20 +247,33 @@ private Mono doDelete() { String[] parts = recent.split("\\|"); String docId = parts[0]; String pk = parts[1]; - String eventId = UUID.randomUUID().toString(); long seq = seqCounter.incrementAndGet(); String ts = Instant.now().toString(); - return container.deleteItem(docId, new PartitionKey(pk), new CosmosItemRequestOptions()) + // Read-before-delete: stamp a delete-specific eventId into the document + // so AVAD reader's previous image contains the correct correlation key. + String eventId = UUID.randomUUID().toString(); + return container.readItem(docId, new PartitionKey(pk), ObjectNode.class) + .flatMap(readResp -> { + ObjectNode doc = readResp.getItem(); + doc.put("eventId", eventId); + doc.put("seqNo", seq); + doc.put("operationType", "delete"); + doc.put("timestamp", ts); + return container.replaceItem(doc, docId, new PartitionKey(pk), new CosmosItemRequestOptions()); + }) + .flatMap(replaceResp -> + container.deleteItem(docId, new PartitionKey(pk), new CosmosItemRequestOptions())) .doOnSuccess(resp -> { successCount.increment(); eventLog.logProduced(eventId, seq, "delete", pk, ts); reconWriter.record(eventId, seq, "delete", pk, -1, false, -1); + clearRecentId(docId + "|" + pk); }) .doOnError(e -> { failureCount.increment(); - // 404 is expected if already deleted — don't warn loudly - if (!e.getMessage().contains("404")) { + // 404 is expected if already deleted + if (e.getMessage() == null || !e.getMessage().contains("404")) { log.warn("Delete failed: docId={}, error={}", docId, e.getMessage()); } }) @@ -276,14 +295,7 @@ private ObjectNode buildDoc(String docId, String pk, long seq, } private String generatePayload() { - int size = config.docSizeBytes(); - if (size <= 0) return ""; - int len = Math.min(size, 10_000); - StringBuilder sb = new StringBuilder(len); - for (int i = 0; i < len; i++) { - sb.append('x'); - } - return sb.toString(); + return precomputedPayload; } private void trackRecentId(String idAndPk) { @@ -291,6 +303,14 @@ private void trackRecentId(String idAndPk) { recentDocIds[idx] = idAndPk; } + private void clearRecentId(String idAndPk) { + for (int i = 0; i < recentDocIds.length; i++) { + if (idAndPk.equals(recentDocIds[i])) { + recentDocIds[i] = null; + } + } + } + private String getRecentId() { long idx = recentIndex.get(); if (idx == 0) return null; diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index 5df313667151..bf5a7dfb4829 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -20,7 +20,9 @@ import java.time.Instant; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; /** @@ -52,6 +54,9 @@ public final class AvadReader implements AutoCloseable { private final LongAdder totalCreates = new LongAdder(); private final LongAdder crtsViolationCount = new LongAdder(); + // Per-partition CRTS tracking for ordering validation + private final ConcurrentHashMap lastCrtsByPartition = new ConcurrentHashMap<>(); + public AvadReader(TestConfig config) throws Exception { this.config = config; this.eventLog = new EventLog(config.consumedLogFile()); @@ -105,6 +110,14 @@ public void run() throws InterruptedException { log.info("All {} AVAD workers started", workers); + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + log.info("Shutdown signal, stopping {} AVAD CFP workers...", processors.size()); + for (ChangeFeedProcessor p : processors) { + try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } + } + latch.countDown(); + })); + latch.await(); } @@ -159,6 +172,16 @@ private void handleChanges(List items) { } } + // CRTS ordering validation per partition key + if (crts > 0 && !pk.isEmpty()) { + AtomicLong lastCrts = lastCrtsByPartition.computeIfAbsent(pk, k -> new AtomicLong(-1)); + long prev = lastCrts.getAndSet(crts); + if (prev > 0 && crts < prev) { + crtsViolationCount.increment(); + log.warn("⚠️ CRTS ordering violation: pk={}, prevCrts={}, currCrts={}", pk, prev, crts); + } + } + eventLog.logConsumedAvad(eventId, seqNo, opType, pk, timestamp, lsn, crts); reconWriter.record(eventId, seqNo, opType, pk, lsn, hasPrevious, crts); } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index 33a5d15a61d1..ed759a21c741 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -8,6 +8,7 @@ import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; import com.azure.cosmos.models.ChangeFeedProcessorOptions; import com.azure.cosmos.models.ChangeFeedProcessorItem; +import com.azure.cosmos.models.ChangeFeedMetaData; import com.azure.cosmos.ChangeFeedProcessor; import com.azure.cosmos.ChangeFeedProcessorBuilder; import com.fasterxml.jackson.databind.JsonNode; @@ -108,12 +109,13 @@ private void handleChanges(List items) { JsonNode current = item.getCurrent(); if (current == null || current.isNull()) continue; // LV mode shouldn't get null current + ChangeFeedMetaData metadata = item.getChangeFeedMetaData(); String eventId = getTextOrEmpty(current, "eventId"); long seqNo = current.has("seqNo") ? current.get("seqNo").asLong() : -1; String opType = getTextOrEmpty(current, "operationType"); String pk = getTextOrEmpty(current, "tenantId"); String timestamp = getTextOrEmpty(current, "timestamp"); - long lsn = current.has("_lsn") ? current.get("_lsn").asLong() : -1; + long lsn = metadata != null ? metadata.getLogSequenceNumber() : -1; eventLog.logConsumed(eventId, seqNo, opType, pk, timestamp, lsn); reconWriter.record(eventId, seqNo, opType, pk, lsn, false, -1); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java index 35532d180c63..6dbef5490e2f 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java @@ -101,12 +101,11 @@ private static Set loadEventIds(String file) throws IOException { /** * Check that LSN is monotonically increasing per partitionKey. * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn - * Sorts by (partitionKey, lsn) then checks for inversions. + * Sorts by seqNo (delivery order), then verifies LSN is non-decreasing. */ private static int checkOrderingByLsn(String consumedFile) throws IOException { log.info("=== LSN Ordering Check: {} ===", consumedFile); - // Load all records grouped by partition key Map> recordsByPk = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new FileReader(consumedFile))) { @@ -119,10 +118,10 @@ private static int checkOrderingByLsn(String consumedFile) throws IOException { String pk = parts[3]; long seqNo = Long.parseLong(parts[1]); long lsn = parts[5].trim().isEmpty() ? -1 : Long.parseLong(parts[5]); - if (lsn < 0) continue; // skip records without LSN + if (lsn < 0) continue; recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) - .add(new long[]{lsn, seqNo}); + .add(new long[]{seqNo, lsn}); } } @@ -130,19 +129,19 @@ private static int checkOrderingByLsn(String consumedFile) throws IOException { for (Map.Entry> entry : recordsByPk.entrySet()) { String pk = entry.getKey(); List records = entry.getValue(); - // Sort by LSN, then check seqNo is non-decreasing within same LSN batch + // Sort by seqNo (delivery order), then check LSN is non-decreasing records.sort(Comparator.comparingLong(r -> r[0])); long prevLsn = -1; for (long[] record : records) { - if (record[0] < prevLsn) { + if (prevLsn > 0 && record[1] < prevLsn) { violations++; if (violations <= 10) { - log.warn("LSN ordering violation: PK={}, prevLsn={}, currLsn={}", - pk, prevLsn, record[0]); + log.warn("LSN ordering violation: PK={}, seqNo={}, prevLsn={}, currLsn={}", + pk, record[0], prevLsn, record[1]); } } - prevLsn = record[0]; + prevLsn = record[1]; } } @@ -155,6 +154,7 @@ private static int checkOrderingByLsn(String consumedFile) throws IOException { * Check that CRTS is monotonically increasing per partitionKey. * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn,crts * Only applies to AVAD logs (7 columns). Lines without CRTS are skipped. + * Sorts by seqNo (delivery order), then verifies CRTS is non-decreasing. */ private static int checkOrderingByCrts(String consumedFile) throws IOException { log.info("=== CRTS Ordering Check: {} ===", consumedFile); @@ -166,15 +166,15 @@ private static int checkOrderingByCrts(String consumedFile) throws IOException { while ((line = reader.readLine()) != null) { if (line.trim().isEmpty()) continue; String[] parts = line.split(","); - if (parts.length < 7) continue; // CRTS is column 6, only in AVAD logs + if (parts.length < 7) continue; String pk = parts[3]; - long lsn = parts[5].trim().isEmpty() ? -1 : Long.parseLong(parts[5]); + long seqNo = Long.parseLong(parts[1]); long crts = parts[6].trim().isEmpty() ? -1 : Long.parseLong(parts[6]); if (crts < 0) continue; recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) - .add(new long[]{crts, lsn}); + .add(new long[]{seqNo, crts}); } } @@ -187,19 +187,19 @@ private static int checkOrderingByCrts(String consumedFile) throws IOException { for (Map.Entry> entry : recordsByPk.entrySet()) { String pk = entry.getKey(); List records = entry.getValue(); - // Sort by LSN (delivery order), then check CRTS is non-decreasing - records.sort(Comparator.comparingLong(r -> r[1])); + // Sort by seqNo (delivery order), then check CRTS is non-decreasing + records.sort(Comparator.comparingLong(r -> r[0])); long prevCrts = -1; for (long[] record : records) { - if (prevCrts > 0 && record[0] < prevCrts) { + if (prevCrts > 0 && record[1] < prevCrts) { violations++; if (violations <= 10) { - log.warn("CRTS ordering violation: PK={}, prevCrts={}, currCrts={}, lsn={}", - pk, prevCrts, record[0], record[1]); + log.warn("CRTS ordering violation: PK={}, seqNo={}, prevCrts={}, currCrts={}", + pk, record[0], prevCrts, record[1]); } } - prevCrts = record[0]; + prevCrts = record[1]; } } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java index 970b42081e50..7b4f5b9142a0 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -157,10 +157,11 @@ private Mono writeDoc(ObjectNode doc) { } private boolean isRetryable(Throwable e) { - String msg = e.getMessage(); - if (msg == null) return true; - // Don't retry permanent failures - return !msg.contains("404") && !msg.contains("Unauthorized") && !msg.contains("403"); + if (e instanceof com.azure.cosmos.CosmosException) { + int status = ((com.azure.cosmos.CosmosException) e).getStatusCode(); + return status != 404 && status != 401 && status != 403; + } + return true; } public long getWriteCount() { return writeCount.sum(); } @@ -170,11 +171,11 @@ private boolean isRetryable(Throwable e) { @Override public void close() { sink.tryEmitComplete(); - // Wait for the subscriber to drain all buffered writes + // Wait for the subscriber to drain buffered writes before disposing try { - subscription.dispose(); - Thread.sleep(10_000); // allow in-flight writes to complete + Thread.sleep(10_000); } catch (InterruptedException ignored) {} + subscription.dispose(); client.close(); log.info("ReconciliationWriter closed: source={}, writes={}, retries={}, errors={}, drops={}", source, writeCount.sum(), retryCount.sum(), errorCount.sum(), dropCount.sum()); From caa22d9d8c88b610e5d9b79efb6bd88671864180 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 11:59:39 -0400 Subject: [PATCH 05/28] Revert unrelated hooks.json change Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/hooks/hooks.json | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/.github/hooks/hooks.json b/.github/hooks/hooks.json index f8450fb3ba3a..938bb0dac66b 100644 --- a/.github/hooks/hooks.json +++ b/.github/hooks/hooks.json @@ -1,20 +1,9 @@ { - "version": 1, "hooks": { - "preToolUse": [ + "PostToolUse": [ { "type": "command", - "powershell": "powershell -ExecutionPolicy Bypass -File \"C:/Users/abhmohanty/.copilot/hooks/block-force-push.ps1\"", - "bash": "pwsh -ExecutionPolicy Bypass -File ~/.copilot/hooks/block-force-push.ps1", - "cwd": ".", - "timeoutSec": 10 - } - ], - "postToolUse": [ - { - "type": "command", - "powershell": "powershell -ExecutionPolicy Bypass -Command \"$raw = [Console]::In.ReadToEnd(); $j = $raw | ConvertFrom-Json; if ($j.toolName -eq 'powershell' -and $j.toolArgs -match 'git push') { Add-Content -Path 'C:/Users/abhmohanty/.copilot/hooks/push-audit.log' -Value \\\"$(Get-Date -Format o) | $($j.toolArgs)\\\" }\"", - "cwd": ".", + "command": "pwsh eng/common/scripts/azsdk_tool_telemetry.ps1", "timeoutSec": 5 } ] From 949f3e1f831383e431a2476ecbbf712d0c40f072 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 12:17:58 -0400 Subject: [PATCH 06/28] Refactor Ingestor to use Cosmos bulk API Replace individual point operations (createItem, replaceItem, upsertItem, deleteItem) with executeBulkOperations for higher throughput. Changes: - Build batch of CosmosItemOperation per tick, submit via bulk API - Merge replace into upsert (both trigger AVAD previousImage) - Operation mix: 40% create, 40% upsert, 20% delete - Tick interval increased to 100ms (larger batches, better bulk efficiency) - Track OpMeta per operation for correlating bulk responses to event log - Remove read-before-write for replaces (upsert handles it) - Delete uses docId as correlation key (no read-before-delete needed) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cosmos/avadtest/ingestor/Ingestor.java | 248 ++++++++---------- 1 file changed, 115 insertions(+), 133 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index a5e567b626bf..cb06120f896c 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -6,18 +6,22 @@ import com.azure.cosmos.avadtest.config.TestConfig; import com.azure.cosmos.avadtest.reconciliation.EventLog; import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; -import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosBulkExecutionOptions; +import com.azure.cosmos.models.CosmosBulkOperationResponse; +import com.azure.cosmos.models.CosmosBulkOperations; +import com.azure.cosmos.models.CosmosItemOperation; import com.azure.cosmos.models.PartitionKey; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; -import reactor.core.publisher.Mono; import reactor.core.scheduler.Schedulers; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; +import java.util.List; import java.util.UUID; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadLocalRandom; @@ -26,15 +30,16 @@ import java.util.concurrent.atomic.LongAdder; /** - * Ingestion workload: creates (40%), replaces (25%), upserts (15%), deletes (20%). + * Ingestion workload using the Cosmos DB bulk API. + * Operation mix: creates (40%), upserts (40% — replaces + upserts), deletes (20%). * Every operation gets a unique eventId for per-event reconciliation. - * Uses micro-batch rate limiting (batch of N ops every 10ms). + * Batch of N operations submitted via executeBulkOperations every tick. */ public final class Ingestor implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(Ingestor.class); - private static final int TICK_INTERVAL_MS = 10; - private static final double FAILURE_ABORT_THRESHOLD = 0.5; // abort if >50% failures + private static final int TICK_INTERVAL_MS = 100; + private static final double FAILURE_ABORT_THRESHOLD = 0.5; private final TestConfig config; private final CosmosAsyncClient client; @@ -44,26 +49,25 @@ public final class Ingestor implements AutoCloseable { private final AtomicLong seqCounter = new AtomicLong(0); private final AtomicBoolean running = new AtomicBoolean(true); - // Failure tracking private final LongAdder successCount = new LongAdder(); private final LongAdder failureCount = new LongAdder(); - // Track recently created doc IDs for replace/upsert/delete operations + // Track recently created doc IDs for upsert/delete operations private final String[] recentDocIds; private final AtomicLong recentIndex = new AtomicLong(0); - // Ops per tick = opsPerSec * tickIntervalMs / 1000 private final int opsPerTick; private final String precomputedPayload; + private final CosmosBulkExecutionOptions bulkOptions; public Ingestor(TestConfig config) throws Exception { this.config = config; this.eventLog = new EventLog(config.producedLogFile()); this.reconWriter = new ReconciliationWriter(config, "ingestor"); - this.recentDocIds = new String[10_000]; // ring buffer + this.recentDocIds = new String[10_000]; this.opsPerTick = Math.max(1, config.opsPerSec() * TICK_INTERVAL_MS / 1000); + this.bulkOptions = new CosmosBulkExecutionOptions(); - // Pre-compute payload once instead of per-operation int size = Math.min(Math.max(config.docSizeBytes(), 0), 10_000); StringBuilder sb = new StringBuilder(size); for (int i = 0; i < size; i++) { sb.append('x'); } @@ -80,31 +84,26 @@ public Ingestor(TestConfig config) throws Exception { .getDatabase(config.database()) .getContainer(config.feedContainer()); - log.info("Ingestor initialized: endpoint={}, db={}, container={}, ops/sec={}, opsPerTick={}", + log.info("Ingestor initialized (bulk mode): endpoint={}, db={}, container={}, ops/sec={}, opsPerTick={}", config.endpoint(), config.database(), config.feedContainer(), config.opsPerSec(), opsPerTick); } public void run() throws InterruptedException { int durationSec = config.durationSeconds(); - log.info("Starting ingestion at {} ops/sec, duration={}", + log.info("Starting bulk ingestion at {} ops/sec, duration={}", config.opsPerSec(), durationSec > 0 ? durationSec + "s" : "unlimited"); CountDownLatch latch = new CountDownLatch(1); - // Rate-limited ingestion: emit opsPerTick operations every TICK_INTERVAL_MS - int concurrency = Math.min(config.opsPerSec(), 500); + // Each tick: build a batch of operations and submit via bulk API Flux.interval(Duration.ofMillis(TICK_INTERVAL_MS)) .takeWhile(tick -> running.get()) - .flatMap(tick -> Flux.range(0, opsPerTick) - .flatMap(i -> executeOperation() - .subscribeOn(Schedulers.boundedElastic()), concurrency), - concurrency) - .doOnError(e -> log.error("Ingestion error", e)) + .concatMap(tick -> executeBulkBatch()) + .doOnError(e -> log.error("Bulk ingestion error", e)) .doOnComplete(latch::countDown) .subscribe(); - // Auto-stop after duration if (durationSec > 0) { Schedulers.single().schedule(() -> { log.info("Duration {}s reached, stopping ingestor...", durationSec); @@ -112,7 +111,6 @@ public void run() throws InterruptedException { }, durationSec, java.util.concurrent.TimeUnit.SECONDS); } - // Periodic failure rate check Flux.interval(Duration.ofSeconds(30)) .takeWhile(tick -> running.get()) .subscribe(tick -> { @@ -136,20 +134,32 @@ public void run() throws InterruptedException { latch.await(); } - private Mono executeOperation() { - int roll = ThreadLocalRandom.current().nextInt(100); - if (roll < 40) { - return doCreate(); - } else if (roll < 65) { - return doReplace(); - } else if (roll < 80) { - return doUpsert(); - } else { - return doDelete(); + private Flux executeBulkBatch() { + List operations = new ArrayList<>(opsPerTick); + List metas = new ArrayList<>(opsPerTick); + + for (int i = 0; i < opsPerTick; i++) { + int roll = ThreadLocalRandom.current().nextInt(100); + if (roll < 40) { + addCreate(operations, metas); + } else if (roll < 80) { + addUpsert(operations, metas); + } else { + addDelete(operations, metas); + } } + + if (operations.isEmpty()) { + return Flux.empty(); + } + + return container.executeBulkOperations(Flux.fromIterable(operations), bulkOptions) + .doOnNext(response -> handleBulkResponse(response, metas)) + .then() + .flux(); } - private Mono doCreate() { + private void addCreate(List ops, List metas) { String docId = UUID.randomUUID().toString(); String eventId = UUID.randomUUID().toString(); String pk = "tenant-" + ThreadLocalRandom.current().nextInt(config.logicalPartitionCount()); @@ -157,61 +167,15 @@ private Mono doCreate() { String ts = Instant.now().toString(); ObjectNode doc = buildDoc(docId, pk, seq, eventId, "create", ts); - - return container.createItem(doc, new PartitionKey(pk), new CosmosItemRequestOptions()) - .doOnSuccess(resp -> { - successCount.increment(); - eventLog.logProduced(eventId, seq, "create", pk, ts); - reconWriter.record(eventId, seq, "create", pk, -1, false, -1); - trackRecentId(docId + "|" + pk); - }) - .doOnError(e -> { - failureCount.increment(); - log.warn("Create failed: docId={}, error={}", docId, e.getMessage()); - }) - .onErrorResume(e -> Mono.empty()) - .then(); - } - - private Mono doReplace() { - String recent = getRecentId(); - if (recent == null) return doCreate(); - - String[] parts = recent.split("\\|"); - String docId = parts[0]; - String pk = parts[1]; - String eventId = UUID.randomUUID().toString(); - long seq = seqCounter.incrementAndGet(); - String ts = Instant.now().toString(); - - return container.readItem(docId, new PartitionKey(pk), ObjectNode.class) - .flatMap(readResp -> { - ObjectNode doc = readResp.getItem(); - doc.put("seqNo", seq); - doc.put("eventId", eventId); - doc.put("operationType", "replace"); - doc.put("timestamp", ts); - doc.put("payload", generatePayload()); - return container.replaceItem(doc, docId, new PartitionKey(pk), new CosmosItemRequestOptions()); - }) - .doOnSuccess(resp -> { - successCount.increment(); - eventLog.logProduced(eventId, seq, "replace", pk, ts); - reconWriter.record(eventId, seq, "replace", pk, -1, false, -1); - }) - .doOnError(e -> { - failureCount.increment(); - log.warn("Replace failed: docId={}, error={}", docId, e.getMessage()); - }) - .onErrorResume(e -> Mono.empty()) - .then(); + ops.add(CosmosBulkOperations.getCreateItemOperation(doc, new PartitionKey(pk))); + metas.add(new OpMeta(eventId, seq, "create", pk, ts, docId)); } - private Mono doUpsert() { + private void addUpsert(List ops, List metas) { String recent = getRecentId(); String docId; String pk; - if (recent != null && ThreadLocalRandom.current().nextBoolean()) { + if (recent != null) { String[] parts = recent.split("\\|"); docId = parts[0]; pk = parts[1]; @@ -225,24 +189,16 @@ private Mono doUpsert() { String ts = Instant.now().toString(); ObjectNode doc = buildDoc(docId, pk, seq, eventId, "upsert", ts); - return container.upsertItem(doc, new PartitionKey(pk), new CosmosItemRequestOptions()) - .doOnSuccess(resp -> { - successCount.increment(); - eventLog.logProduced(eventId, seq, "upsert", pk, ts); - reconWriter.record(eventId, seq, "upsert", pk, -1, false, -1); - trackRecentId(docId + "|" + pk); - }) - .doOnError(e -> { - failureCount.increment(); - log.warn("Upsert failed: docId={}, error={}", docId, e.getMessage()); - }) - .onErrorResume(e -> Mono.empty()) - .then(); + ops.add(CosmosBulkOperations.getUpsertItemOperation(doc, new PartitionKey(pk))); + metas.add(new OpMeta(eventId, seq, "upsert", pk, ts, docId)); } - private Mono doDelete() { + private void addDelete(List ops, List metas) { String recent = getRecentId(); - if (recent == null) return doCreate(); // nothing to delete yet + if (recent == null) { + addCreate(ops, metas); + return; + } String[] parts = recent.split("\\|"); String docId = parts[0]; @@ -250,35 +206,45 @@ private Mono doDelete() { long seq = seqCounter.incrementAndGet(); String ts = Instant.now().toString(); - // Read-before-delete: stamp a delete-specific eventId into the document - // so AVAD reader's previous image contains the correct correlation key. - String eventId = UUID.randomUUID().toString(); - return container.readItem(docId, new PartitionKey(pk), ObjectNode.class) - .flatMap(readResp -> { - ObjectNode doc = readResp.getItem(); - doc.put("eventId", eventId); - doc.put("seqNo", seq); - doc.put("operationType", "delete"); - doc.put("timestamp", ts); - return container.replaceItem(doc, docId, new PartitionKey(pk), new CosmosItemRequestOptions()); - }) - .flatMap(replaceResp -> - container.deleteItem(docId, new PartitionKey(pk), new CosmosItemRequestOptions())) - .doOnSuccess(resp -> { - successCount.increment(); - eventLog.logProduced(eventId, seq, "delete", pk, ts); - reconWriter.record(eventId, seq, "delete", pk, -1, false, -1); - clearRecentId(docId + "|" + pk); - }) - .doOnError(e -> { - failureCount.increment(); - // 404 is expected if already deleted - if (e.getMessage() == null || !e.getMessage().contains("404")) { - log.warn("Delete failed: docId={}, error={}", docId, e.getMessage()); - } - }) - .onErrorResume(e -> Mono.empty()) - .then(); + ops.add(CosmosBulkOperations.getDeleteItemOperation(docId, new PartitionKey(pk))); + metas.add(new OpMeta(docId, seq, "delete", pk, ts, docId)); + clearRecentId(recent); + } + + private void handleBulkResponse(CosmosBulkOperationResponse response, List metas) { + CosmosItemOperation op = response.getOperation(); + + // Find matching metadata by correlating operation index + int idx = -1; + // Use operation identity to find the matching meta + // The operations list and metas list are parallel arrays + // but executeBulkOperations may reorder responses. + // Use the operation's id to find the correct meta. + String opId = op.getId(); + for (int i = 0; i < metas.size(); i++) { + if (metas.get(i).docId.equals(opId)) { + idx = i; + break; + } + } + + if (idx < 0) return; + OpMeta meta = metas.get(idx); + + if (response.getResponse() != null && response.getResponse().isSuccessStatusCode()) { + successCount.increment(); + eventLog.logProduced(meta.eventId, meta.seq, meta.opType, meta.pk, meta.ts); + reconWriter.record(meta.eventId, meta.seq, meta.opType, meta.pk, -1, false, -1); + if (!"delete".equals(meta.opType)) { + trackRecentId(meta.docId + "|" + meta.pk); + } + } else { + failureCount.increment(); + int status = response.getResponse() != null ? response.getResponse().getStatusCode() : -1; + if (status != 404) { + log.warn("Bulk op failed: op={}, docId={}, status={}", meta.opType, meta.docId, status); + } + } } private ObjectNode buildDoc(String docId, String pk, long seq, @@ -290,14 +256,10 @@ private ObjectNode buildDoc(String docId, String pk, long seq, doc.put("seqNo", seq); doc.put("operationType", opType); doc.put("timestamp", ts); - doc.put("payload", generatePayload()); + doc.put("payload", precomputedPayload); return doc; } - private String generatePayload() { - return precomputedPayload; - } - private void trackRecentId(String idAndPk) { int idx = (int) (recentIndex.incrementAndGet() % recentDocIds.length); recentDocIds[idx] = idAndPk; @@ -316,7 +278,8 @@ private String getRecentId() { if (idx == 0) return null; int start = (int) (idx % recentDocIds.length); int offset = ThreadLocalRandom.current().nextInt(Math.min((int) idx, recentDocIds.length)); - return recentDocIds[(start - offset + recentDocIds.length) % recentDocIds.length]; + String val = recentDocIds[(start - offset + recentDocIds.length) % recentDocIds.length]; + return val; // may be null if cleared } @Override @@ -328,4 +291,23 @@ public void close() { log.info("Ingestor closed. Total ops: {}, success: {}, failures: {}", seqCounter.get(), successCount.sum(), failureCount.sum()); } + + /** Metadata for correlating bulk operation responses back to produced events. */ + private static final class OpMeta { + final String eventId; + final long seq; + final String opType; + final String pk; + final String ts; + final String docId; + + OpMeta(String eventId, long seq, String opType, String pk, String ts, String docId) { + this.eventId = eventId; + this.seq = seq; + this.opType = opType; + this.pk = pk; + this.ts = ts; + this.docId = docId; + } + } } From 20a30a3095b678e5089701b2ba0cbb853a473489 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 12:21:27 -0400 Subject: [PATCH 07/28] Remove unused SoakMetrics class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SoakMetrics was never wired to any workload component — /metrics endpoint always returned zeros. Each component already tracks its own counters via LongAdder and reports via SLF4J. Removed SoakMetrics.java and the /metrics endpoint from HealthServer. HealthServer now only serves /health and /ready (K8s probes). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../java/com/azure/cosmos/avadtest/Main.java | 10 +- .../cosmos/avadtest/health/HealthServer.java | 23 +---- .../cosmos/avadtest/metrics/SoakMetrics.java | 95 ------------------- 3 files changed, 6 insertions(+), 122 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java index 300fd0b959ff..00c89a1bd3c3 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java @@ -4,7 +4,6 @@ import com.azure.cosmos.avadtest.health.HealthMonitor; import com.azure.cosmos.avadtest.health.HealthServer; import com.azure.cosmos.avadtest.ingestor.Ingestor; -import com.azure.cosmos.avadtest.metrics.SoakMetrics; import com.azure.cosmos.avadtest.reader.AvadReader; import com.azure.cosmos.avadtest.reader.LatestVersionReader; import com.azure.cosmos.avadtest.reconciliation.Reconciler; @@ -67,8 +66,7 @@ private int run() throws Exception { } private int runIngestor() throws Exception { - SoakMetrics metrics = new SoakMetrics(); - HealthServer healthServer = new HealthServer(metrics, healthPort); + HealthServer healthServer = new HealthServer(healthPort); healthServer.start(); TestConfig config = TestConfig.fromEnv(); @@ -82,8 +80,7 @@ private int runIngestor() throws Exception { } private int runLvReader() throws Exception { - SoakMetrics metrics = new SoakMetrics(); - HealthServer healthServer = new HealthServer(metrics, healthPort); + HealthServer healthServer = new HealthServer(healthPort); healthServer.start(); TestConfig config = TestConfig.fromEnv(); @@ -97,8 +94,7 @@ private int runLvReader() throws Exception { } private int runAvadReader() throws Exception { - SoakMetrics metrics = new SoakMetrics(); - HealthServer healthServer = new HealthServer(metrics, healthPort); + HealthServer healthServer = new HealthServer(healthPort); healthServer.start(); TestConfig config = TestConfig.fromEnv(); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java index 418a6748ac79..d3e894248ee4 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java @@ -1,6 +1,5 @@ package com.azure.cosmos.avadtest.health; -import com.azure.cosmos.avadtest.metrics.SoakMetrics; import com.sun.net.httpserver.HttpServer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,10 +17,6 @@ * Endpoints: * GET /health — liveness probe (always 200 if JVM is up) * GET /ready — readiness probe (200 when workload is ready) - * GET /metrics — Micrometer-style plain text metrics - * - * Reusable contract: any workload image that implements these - * three endpoints can plug into the soak infra Helm chart. */ public final class HealthServer { @@ -30,14 +25,12 @@ public final class HealthServer { private final HttpServer server; private final AtomicBoolean ready = new AtomicBoolean(false); - private final SoakMetrics metrics; - public HealthServer(SoakMetrics metrics) throws IOException { - this(metrics, DEFAULT_PORT); + public HealthServer() throws IOException { + this(DEFAULT_PORT); } - public HealthServer(SoakMetrics metrics, int port) throws IOException { - this.metrics = metrics; + public HealthServer(int port) throws IOException { this.server = HttpServer.create(new InetSocketAddress(port), 0); this.server.setExecutor(Executors.newFixedThreadPool(2)); @@ -60,16 +53,6 @@ public HealthServer(SoakMetrics metrics, int port) throws IOException { os.write(body); } }); - - server.createContext("/metrics", exchange -> { - String metricsText = metrics.toPrometheusText(); - byte[] body = metricsText.getBytes(StandardCharsets.UTF_8); - exchange.getResponseHeaders().set("Content-Type", "text/plain"); - exchange.sendResponseHeaders(200, body.length); - try (OutputStream os = exchange.getResponseBody()) { - os.write(body); - } - }); } public void start() { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java deleted file mode 100644 index 4b64bc6ffc82..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/metrics/SoakMetrics.java +++ /dev/null @@ -1,95 +0,0 @@ -package com.azure.cosmos.avadtest.metrics; - -import java.util.concurrent.atomic.LongAdder; - -/** - * Centralized soak test metrics. Thread-safe counters for all - * workload components. - * - * Exported as Prometheus-compatible text via /metrics endpoint. - * Reusable: other workloads can extend or compose this class. - */ -public final class SoakMetrics { - - // Ingestor metrics - private final LongAdder ingestorOpsSuccess = new LongAdder(); - private final LongAdder ingestorOpsFailure = new LongAdder(); - private final LongAdder ingestorCreates = new LongAdder(); - private final LongAdder ingestorReplaces = new LongAdder(); - private final LongAdder ingestorUpserts = new LongAdder(); - private final LongAdder ingestorDeletes = new LongAdder(); - - // CFP consumer metrics - private final LongAdder cfpAvadEventsConsumed = new LongAdder(); - private final LongAdder cfpLvEventsConsumed = new LongAdder(); - private final LongAdder cfpPreviousImageMissing = new LongAdder(); - private final LongAdder cfpLsnViolations = new LongAdder(); - - // Reconciliation metrics - private final LongAdder reconWrites = new LongAdder(); - private final LongAdder reconErrors = new LongAdder(); - private final LongAdder reconDrops = new LongAdder(); - - // --- Ingestor --- - public void recordIngestorSuccess() { ingestorOpsSuccess.increment(); } - public void recordIngestorFailure() { ingestorOpsFailure.increment(); } - public void recordIngestorCreate() { ingestorCreates.increment(); } - public void recordIngestorReplace() { ingestorReplaces.increment(); } - public void recordIngestorUpsert() { ingestorUpserts.increment(); } - public void recordIngestorDelete() { ingestorDeletes.increment(); } - - // --- CFP --- - public void recordAvadEvent() { cfpAvadEventsConsumed.increment(); } - public void recordLvEvent() { cfpLvEventsConsumed.increment(); } - public void recordMissingPreviousImage() { cfpPreviousImageMissing.increment(); } - public void recordLsnViolation() { cfpLsnViolations.increment(); } - - // --- Reconciliation --- - public void recordReconWrite() { reconWrites.increment(); } - public void recordReconError() { reconErrors.increment(); } - public void recordReconDrop() { reconDrops.increment(); } - - /** - * Export all metrics as Prometheus-compatible plain text. - */ - public String toPrometheusText() { - StringBuilder sb = new StringBuilder(2048); - - appendMetric(sb, "cosmos_soak_ingestor_ops_success_total", - "Total successful ingestor operations", ingestorOpsSuccess.sum()); - appendMetric(sb, "cosmos_soak_ingestor_ops_failure_total", - "Total failed ingestor operations", ingestorOpsFailure.sum()); - appendMetric(sb, "cosmos_soak_ingestor_creates_total", - "Total create operations", ingestorCreates.sum()); - appendMetric(sb, "cosmos_soak_ingestor_replaces_total", - "Total replace operations", ingestorReplaces.sum()); - appendMetric(sb, "cosmos_soak_ingestor_upserts_total", - "Total upsert operations", ingestorUpserts.sum()); - appendMetric(sb, "cosmos_soak_ingestor_deletes_total", - "Total delete operations", ingestorDeletes.sum()); - - appendMetric(sb, "cosmos_soak_cfp_avad_events_consumed_total", - "Total AVAD change feed events consumed", cfpAvadEventsConsumed.sum()); - appendMetric(sb, "cosmos_soak_cfp_lv_events_consumed_total", - "Total LV change feed events consumed", cfpLvEventsConsumed.sum()); - appendMetric(sb, "cosmos_soak_cfp_previous_image_missing_total", - "Replace/delete events missing previousImage", cfpPreviousImageMissing.sum()); - appendMetric(sb, "cosmos_soak_cfp_lsn_violations_total", - "LSN ordering violations", cfpLsnViolations.sum()); - - appendMetric(sb, "cosmos_soak_reconciliation_writes_total", - "Total reconciliation writes", reconWrites.sum()); - appendMetric(sb, "cosmos_soak_reconciliation_errors_total", - "Total reconciliation errors", reconErrors.sum()); - appendMetric(sb, "cosmos_soak_reconciliation_drops_total", - "Total reconciliation drops", reconDrops.sum()); - - return sb.toString(); - } - - private void appendMetric(StringBuilder sb, String name, String help, long value) { - sb.append("# HELP ").append(name).append(' ').append(help).append('\n'); - sb.append("# TYPE ").append(name).append(" counter\n"); - sb.append(name).append(' ').append(value).append('\n'); - } -} From 7339285b09395863f726f9f7ac95805bb0d9edac Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 12:25:59 -0400 Subject: [PATCH 08/28] Align close() pattern across all components All components now follow the same close() sequence: 1. Log 'Closing {Component}...' 2. Stop workload-specific resources (CFP processors, running flag) 3. Close eventLog (try-catch) 4. Close reconWriter 5. Close Cosmos client 6. Log summary and 'closed' confirmation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../java/com/azure/cosmos/avadtest/health/HealthMonitor.java | 2 ++ .../main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java | 1 + .../main/java/com/azure/cosmos/avadtest/reader/AvadReader.java | 1 + .../com/azure/cosmos/avadtest/reader/LatestVersionReader.java | 1 + 4 files changed, 5 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java index 3e2b5b9afbd3..c304f17f5596 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java @@ -249,6 +249,8 @@ private void writeHealthSnapshot(Instant timestamp, } public void close() { + log.info("Closing HealthMonitor..."); client.close(); + log.info("HealthMonitor closed"); } } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index cb06120f896c..cae09cb90dd1 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -284,6 +284,7 @@ private String getRecentId() { @Override public void close() { + log.info("Closing Ingestor..."); running.set(false); try { eventLog.close(); } catch (Exception e) { /* ignore */ } reconWriter.close(); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index bf5a7dfb4829..7ca2f86a65c6 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -212,6 +212,7 @@ private static String getTextOrEmpty(JsonNode node, String field) { @Override public void close() { + log.info("Closing AvadReader..."); logCorrectnessReport(); for (ChangeFeedProcessor p : processors) { try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index ed759a21c741..87ee78609ab9 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -130,6 +130,7 @@ private static String getTextOrEmpty(JsonNode node, String field) { @Override public void close() { + log.info("Closing LatestVersionReader..."); for (ChangeFeedProcessor p : processors) { try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } } From 4966fd79267c53712de7b9e4e4bf17529db466cd Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 12:29:42 -0400 Subject: [PATCH 09/28] Share CosmosAsyncClient between workload and ReconciliationWriter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each component (Ingestor, AvadReader, LatestVersionReader) was creating 2 CosmosAsyncClient instances — one for the workload, one inside ReconciliationWriter. This wastes connections and memory. Now ReconciliationWriter accepts a shared client instead of creating its own. The component owns the client lifecycle and closes it in close(). ReconciliationWriter only drains its sink and disposes its subscription. Also: store and dispose Reactor subscriptions in Ingestor to prevent leaks, and explicitly shut down HealthServer's ExecutorService. Before: 6 CosmosAsyncClients across 3 components After: 3 CosmosAsyncClients (1 per component) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cosmos/avadtest/health/HealthServer.java | 14 ++++++++++- .../cosmos/avadtest/ingestor/Ingestor.java | 13 +++++++--- .../cosmos/avadtest/reader/AvadReader.java | 3 ++- .../avadtest/reader/LatestVersionReader.java | 3 ++- .../reconciliation/ReconciliationWriter.java | 24 +++++++------------ 5 files changed, 36 insertions(+), 21 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java index d3e894248ee4..fb103241cae1 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthServer.java @@ -8,7 +8,9 @@ import java.io.OutputStream; import java.net.InetSocketAddress; import java.nio.charset.StandardCharsets; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -24,6 +26,7 @@ public final class HealthServer { private static final int DEFAULT_PORT = 8080; private final HttpServer server; + private final ExecutorService executor; private final AtomicBoolean ready = new AtomicBoolean(false); public HealthServer() throws IOException { @@ -31,8 +34,9 @@ public HealthServer() throws IOException { } public HealthServer(int port) throws IOException { + this.executor = Executors.newFixedThreadPool(2); this.server = HttpServer.create(new InetSocketAddress(port), 0); - this.server.setExecutor(Executors.newFixedThreadPool(2)); + this.server.setExecutor(executor); server.createContext("/health", exchange -> { byte[] body = "{\"status\":\"UP\"}".getBytes(StandardCharsets.UTF_8); @@ -67,6 +71,14 @@ public void setReady(boolean isReady) { public void stop() { server.stop(2); + executor.shutdown(); + try { + if (!executor.awaitTermination(5, TimeUnit.SECONDS)) { + executor.shutdownNow(); + } + } catch (InterruptedException e) { + executor.shutdownNow(); + } log.info("Health server stopped"); } } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index cae09cb90dd1..6ab232f1918b 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -58,12 +58,15 @@ public final class Ingestor implements AutoCloseable { private final int opsPerTick; private final String precomputedPayload; + + // Reactor subscriptions — disposed on close to prevent leaks + private volatile reactor.core.Disposable mainSubscription; + private volatile reactor.core.Disposable progressSubscription; private final CosmosBulkExecutionOptions bulkOptions; public Ingestor(TestConfig config) throws Exception { this.config = config; this.eventLog = new EventLog(config.producedLogFile()); - this.reconWriter = new ReconciliationWriter(config, "ingestor"); this.recentDocIds = new String[10_000]; this.opsPerTick = Math.max(1, config.opsPerSec() * TICK_INTERVAL_MS / 1000); this.bulkOptions = new CosmosBulkExecutionOptions(); @@ -84,6 +87,8 @@ public Ingestor(TestConfig config) throws Exception { .getDatabase(config.database()) .getContainer(config.feedContainer()); + this.reconWriter = new ReconciliationWriter(client, config.database(), "ingestor"); + log.info("Ingestor initialized (bulk mode): endpoint={}, db={}, container={}, ops/sec={}, opsPerTick={}", config.endpoint(), config.database(), config.feedContainer(), config.opsPerSec(), opsPerTick); @@ -97,7 +102,7 @@ public void run() throws InterruptedException { CountDownLatch latch = new CountDownLatch(1); // Each tick: build a batch of operations and submit via bulk API - Flux.interval(Duration.ofMillis(TICK_INTERVAL_MS)) + this.mainSubscription = Flux.interval(Duration.ofMillis(TICK_INTERVAL_MS)) .takeWhile(tick -> running.get()) .concatMap(tick -> executeBulkBatch()) .doOnError(e -> log.error("Bulk ingestion error", e)) @@ -111,7 +116,7 @@ public void run() throws InterruptedException { }, durationSec, java.util.concurrent.TimeUnit.SECONDS); } - Flux.interval(Duration.ofSeconds(30)) + this.progressSubscription = Flux.interval(Duration.ofSeconds(30)) .takeWhile(tick -> running.get()) .subscribe(tick -> { long s = successCount.sum(); @@ -286,6 +291,8 @@ private String getRecentId() { public void close() { log.info("Closing Ingestor..."); running.set(false); + if (progressSubscription != null) { progressSubscription.dispose(); } + if (mainSubscription != null) { mainSubscription.dispose(); } try { eventLog.close(); } catch (Exception e) { /* ignore */ } reconWriter.close(); client.close(); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index 7ca2f86a65c6..e38779fa549a 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -60,7 +60,6 @@ public final class AvadReader implements AutoCloseable { public AvadReader(TestConfig config) throws Exception { this.config = config; this.eventLog = new EventLog(config.consumedLogFile()); - this.reconWriter = new ReconciliationWriter(config, "cfp-avad"); this.client = new CosmosClientBuilder() .endpoint(config.readerEndpoint()) @@ -77,6 +76,8 @@ public AvadReader(TestConfig config) throws Exception { .getDatabase(config.database()) .getContainer(config.leaseContainer()); + this.reconWriter = new ReconciliationWriter(client, config.database(), "cfp-avad"); + log.info("AvadReader initialized: prefix={}, endpoint={}, region={}, workers={}", LEASE_PREFIX, config.readerEndpoint(), config.preferredRegion(), config.workerCount()); } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index 87ee78609ab9..4cc0208eee3d 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -42,7 +42,6 @@ public final class LatestVersionReader implements AutoCloseable { public LatestVersionReader(TestConfig config) throws Exception { this.config = config; this.eventLog = new EventLog(config.consumedLogFile()); - this.reconWriter = new ReconciliationWriter(config, "cfp-lv"); this.client = new CosmosClientBuilder() .endpoint(config.readerEndpoint()) @@ -60,6 +59,8 @@ public LatestVersionReader(TestConfig config) throws Exception { .getDatabase(config.database()) .getContainer(config.leaseContainer()); + this.reconWriter = new ReconciliationWriter(client, config.database(), "cfp-lv"); + log.info("LatestVersionReader initialized: prefix={}, endpoint={}, region={}, workers={}", LEASE_PREFIX, config.readerEndpoint(), config.preferredRegion(), config.workerCount()); } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java index 7b4f5b9142a0..9a5a8380cb22 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -2,10 +2,8 @@ import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosAsyncContainer; -import com.azure.cosmos.CosmosClientBuilder; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; -import com.azure.cosmos.avadtest.config.TestConfig; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.PartitionKey; import com.fasterxml.jackson.databind.node.JsonNodeFactory; @@ -48,7 +46,7 @@ public final class ReconciliationWriter implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(ReconciliationWriter.class); private static final String RECONCILIATION_CONTAINER = "reconciliation"; private static final int MAX_RETRIES = 3; - private static final int MAX_REQUEUES = 2; // max times a doc can be requeued after all retries fail + private static final int MAX_REQUEUES = 2; private static final String REQUEUE_COUNT_FIELD = "_requeueCount"; private static final CosmosEndToEndOperationLatencyPolicyConfig E2E_POLICY = @@ -56,7 +54,6 @@ public final class ReconciliationWriter implements AutoCloseable { private final String source; private final CosmosAsyncContainer container; - private final CosmosAsyncClient client; private final LongAdder writeCount = new LongAdder(); private final LongAdder errorCount = new LongAdder(); private final LongAdder retryCount = new LongAdder(); @@ -65,18 +62,16 @@ public final class ReconciliationWriter implements AutoCloseable { private final Sinks.Many sink; private final reactor.core.Disposable subscription; - public ReconciliationWriter(TestConfig config, String source) { + /** + * @param client shared CosmosAsyncClient — caller owns lifecycle + * @param database database name + * @param source source identifier for reconciliation docs + */ + public ReconciliationWriter(CosmosAsyncClient client, String database, String source) { this.source = source; - this.client = new CosmosClientBuilder() - .endpoint(config.endpoint()) - .key(config.key()) - .gatewayMode() - .preferredRegions(config.preferredRegions()) - .buildAsyncClient(); - this.container = client - .getDatabase(config.database()) + .getDatabase(database) .getContainer(RECONCILIATION_CONTAINER); this.sink = Sinks.many().multicast().onBackpressureBuffer(100_000); @@ -171,12 +166,11 @@ private boolean isRetryable(Throwable e) { @Override public void close() { sink.tryEmitComplete(); - // Wait for the subscriber to drain buffered writes before disposing try { Thread.sleep(10_000); } catch (InterruptedException ignored) {} subscription.dispose(); - client.close(); + // Client is NOT closed here — caller owns the lifecycle log.info("ReconciliationWriter closed: source={}, writes={}, retries={}, errors={}, drops={}", source, writeCount.sum(), retryCount.sum(), errorCount.sum(), dropCount.sum()); } From 040a68e4daf72526ef35a0d8f961a60c68fadd72 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 12:36:25 -0400 Subject: [PATCH 10/28] Replace inline fully qualified names with imports Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../java/com/azure/cosmos/avadtest/ingestor/Ingestor.java | 3 ++- .../java/com/azure/cosmos/avadtest/reader/AvadReader.java | 3 ++- .../azure/cosmos/avadtest/reader/LatestVersionReader.java | 3 ++- .../com/azure/cosmos/avadtest/reconciliation/EventLog.java | 3 ++- .../azure/cosmos/avadtest/reconciliation/Reconciler.java | 6 ++++-- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index 6ab232f1918b..b652767ab4e2 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -25,6 +25,7 @@ import java.util.UUID; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; @@ -113,7 +114,7 @@ public void run() throws InterruptedException { Schedulers.single().schedule(() -> { log.info("Duration {}s reached, stopping ingestor...", durationSec); running.set(false); - }, durationSec, java.util.concurrent.TimeUnit.SECONDS); + }, durationSec, TimeUnit.SECONDS); } this.progressSubscription = Flux.interval(Duration.ofSeconds(30)) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index e38779fa549a..10c1ee491f04 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -18,6 +18,7 @@ import java.time.Duration; import java.time.Instant; +import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ConcurrentHashMap; @@ -94,7 +95,7 @@ public void run() throws InterruptedException { options.setStartTime(Instant.now().minus(Duration.ofDays(5))); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() - .hostName("avad-host-" + java.lang.management.ManagementFactory.getRuntimeMXBean().getName() + "-w" + workerIdx) + .hostName("avad-host-" + ManagementFactory.getRuntimeMXBean().getName() + "-w" + workerIdx) .feedContainer(feedContainer) .leaseContainer(leaseContainer) .options(options) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index 4cc0208eee3d..dbe1e7aaed0f 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -17,6 +17,7 @@ import java.time.Duration; import java.time.Instant; +import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; @@ -77,7 +78,7 @@ public void run() throws InterruptedException { options.setStartTime(Instant.now().minus(Duration.ofDays(5))); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() - .hostName("lv-host-" + java.lang.management.ManagementFactory.getRuntimeMXBean().getName() + "-w" + i) + .hostName("lv-host-" + ManagementFactory.getRuntimeMXBean().getName() + "-w" + i) .feedContainer(feedContainer) .leaseContainer(leaseContainer) .options(options) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java index 0a0f01c3ffce..47c471f8aabb 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java @@ -6,6 +6,7 @@ import java.io.*; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.concurrent.locks.ReentrantLock; @@ -27,7 +28,7 @@ public final class EventLog implements AutoCloseable { private final ReentrantLock lock = new ReentrantLock(); public EventLog(String filePath) throws IOException { - Path path = java.nio.file.Paths.get(filePath); + Path path = Paths.get(filePath); this.writer = Files.newBufferedWriter(path, StandardOpenOption.CREATE, StandardOpenOption.APPEND, StandardOpenOption.WRITE); log.info("EventLog opened: {}", path.toAbsolutePath()); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java index 6dbef5490e2f..6853f002ec88 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java @@ -6,8 +6,10 @@ import java.io.*; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Reconciler that compares produced vs consumed event logs. @@ -37,7 +39,7 @@ public static int reconcile(String producedFile, String consumedFile) throws IOE missing.removeAll(consumed); // Count duplicates (at-least-once delivery) - long totalConsumedLines = Files.lines(java.nio.file.Paths.get(consumedFile)).filter(l -> !l.trim().isEmpty()).count(); + long totalConsumedLines = Files.lines(Paths.get(consumedFile)).filter(l -> !l.trim().isEmpty()).count(); long duplicates = totalConsumedLines - consumed.size(); log.info("Produced: {} unique events", produced.size()); @@ -90,7 +92,7 @@ public static int parity(String lvFile, String avadFile) throws IOException { /** Loads unique eventIds (first field per line). */ private static Set loadEventIds(String file) throws IOException { - try (java.util.stream.Stream lines = Files.lines(java.nio.file.Paths.get(file))) { + try (Stream lines = Files.lines(Paths.get(file))) { return lines .filter(l -> !l.trim().isEmpty()) .map(l -> l.split(",")[0]) From 8a226d32704a2dc69fc80eb801735c666bf3730d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 12:39:48 -0400 Subject: [PATCH 11/28] Add JSON-based configuration with env var overrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace application.properties with a structured JSON config file. TestConfig now supports two load paths: --config config.json → JSON file with env var overrides (no --config) → env vars only (backward compatible) Precedence: env var > JSON value > built-in default. Secrets (COSMOS_KEY) should always come from env vars. JSON schema groups settings by concern: cosmos.* — endpoint, database, containers, region ingestor.* — opsPerSec, docSize, partitions, duration, workers logging.* — produced/consumed log file paths Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/application.properties | 24 ---- .../avad-soak/config.json | 21 ++++ .../java/com/azure/cosmos/avadtest/Main.java | 21 +++- .../cosmos/avadtest/config/TestConfig.java | 103 ++++++++++++++---- 4 files changed, 121 insertions(+), 48 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties create mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties deleted file mode 100644 index b939b896694b..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/application.properties +++ /dev/null @@ -1,24 +0,0 @@ -# Cosmos AVAD Test — Default Config -# Override via environment variables (same names, uppercase) - -COSMOS_ENDPOINT= -COSMOS_KEY= -COSMOS_DATABASE=graph_db -COSMOS_FEED_CONTAINER=avad-test -COSMOS_LEASE_CONTAINER=avad-test-leases -COSMOS_PREFERRED_REGION=West Central US - -# Ingestor settings -OPS_PER_SEC=5000 -DOC_SIZE_BYTES=1024 -LOGICAL_PARTITION_COUNT=100000 - -# Log files -PRODUCED_LOG=produced.log -CONSUMED_LOG=consumed-avad.log - -# Duration (0 = run forever) -DURATION_SECONDS=3600 - -# CFP worker count per reader mode -WORKER_COUNT=2 diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json new file mode 100644 index 000000000000..cf8145698ae7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json @@ -0,0 +1,21 @@ +{ + "cosmos": { + "endpoint": "", + "regionalEndpoint": "", + "database": "graph_db", + "feedContainer": "avad-test", + "leaseContainer": "avad-test-leases", + "preferredRegion": "West Central US" + }, + "ingestor": { + "opsPerSec": 5000, + "docSizeBytes": 1024, + "logicalPartitionCount": 100000, + "durationSeconds": 3600, + "workerCount": 2 + }, + "logging": { + "producedLogFile": "produced.log", + "consumedLogFile": "consumed.log" + } +} diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java index 00c89a1bd3c3..74433b8779b7 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java @@ -42,9 +42,20 @@ public final class Main { @Parameter(names = "--gap-sla-minutes", description = "Minutes before an unconsumed event is flagged as a gap") private int gapSlaMinutes = 10; + @Parameter(names = "--config", description = "Path to JSON config file (env vars override JSON values)") + private String configFile; + @Parameter(names = {"-h", "--help"}, description = "Help", help = true) private boolean help; + private TestConfig loadConfig() throws Exception { + if (configFile != null) { + log.info("Loading config from: {}", configFile); + return TestConfig.fromJson(configFile); + } + return TestConfig.fromEnv(); + } + private int run() throws Exception { log.info("Starting cosmos-avad-test in mode: {}", mode); @@ -69,7 +80,7 @@ private int runIngestor() throws Exception { HealthServer healthServer = new HealthServer(healthPort); healthServer.start(); - TestConfig config = TestConfig.fromEnv(); + TestConfig config = loadConfig(); try (Ingestor ingestor = new Ingestor(config)) { healthServer.setReady(true); ingestor.run(); @@ -83,7 +94,7 @@ private int runLvReader() throws Exception { HealthServer healthServer = new HealthServer(healthPort); healthServer.start(); - TestConfig config = TestConfig.fromEnv(); + TestConfig config = loadConfig(); try (LatestVersionReader reader = new LatestVersionReader(config)) { healthServer.setReady(true); reader.run(); @@ -97,7 +108,7 @@ private int runAvadReader() throws Exception { HealthServer healthServer = new HealthServer(healthPort); healthServer.start(); - TestConfig config = TestConfig.fromEnv(); + TestConfig config = loadConfig(); try (AvadReader reader = new AvadReader(config)) { healthServer.setReady(true); reader.run(); @@ -107,8 +118,8 @@ private int runAvadReader() throws Exception { } } - private int runHealthMonitor() { - TestConfig config = TestConfig.fromEnv(); + private int runHealthMonitor() throws Exception { + TestConfig config = loadConfig(); HealthMonitor monitor = new HealthMonitor(config, runId, gapSlaMinutes); try { return monitor.runChecks(); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java index cf2e1d0f5211..a7881966223d 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java @@ -1,14 +1,27 @@ package com.azure.cosmos.avadtest.config; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.File; +import java.io.IOException; import java.util.Collections; import java.util.List; /** - * Configuration loaded from environment variables or system properties. - * Env vars take precedence over system properties. + * Configuration loaded from a JSON file with environment variable overrides. + *

+ * Load order (highest precedence first): + *

    + *
  1. Environment variables (e.g. COSMOS_KEY — always wins for secrets)
  2. + *
  3. JSON config file (--config path)
  4. + *
  5. Built-in defaults
  6. + *
*/ public final class TestConfig { + private static final ObjectMapper MAPPER = new ObjectMapper(); + private final String endpoint; private final String regionalEndpoint; private final String key; @@ -41,11 +54,41 @@ private TestConfig(Builder builder) { this.workerCount = builder.workerCount; } + /** + * Load config from a JSON file. Environment variables override JSON values. + */ + public static TestConfig fromJson(String filePath) throws IOException { + JsonNode root = MAPPER.readTree(new File(filePath)); + JsonNode cosmos = root.path("cosmos"); + JsonNode ingestor = root.path("ingestor"); + JsonNode logging = root.path("logging"); + + return new Builder() + .endpoint(resolve("COSMOS_ENDPOINT", textOrNull(cosmos, "endpoint"), null)) + .regionalEndpoint(resolve("COSMOS_REGIONAL_ENDPOINT", textOrNull(cosmos, "regionalEndpoint"), "")) + .key(resolve("COSMOS_KEY", textOrNull(cosmos, "key"), null)) + .database(resolve("COSMOS_DATABASE", textOrNull(cosmos, "database"), "graph_db")) + .feedContainer(resolve("COSMOS_FEED_CONTAINER", textOrNull(cosmos, "feedContainer"), "avad-test")) + .leaseContainer(resolve("COSMOS_LEASE_CONTAINER", textOrNull(cosmos, "leaseContainer"), "avad-test-leases")) + .preferredRegion(resolve("COSMOS_PREFERRED_REGION", textOrNull(cosmos, "preferredRegion"), "West Central US")) + .opsPerSec(resolveInt("OPS_PER_SEC", intOrNull(ingestor, "opsPerSec"), 5000)) + .docSizeBytes(resolveInt("DOC_SIZE_BYTES", intOrNull(ingestor, "docSizeBytes"), 1024)) + .logicalPartitionCount(resolveInt("LOGICAL_PARTITION_COUNT", intOrNull(ingestor, "logicalPartitionCount"), 100000)) + .producedLogFile(resolve("PRODUCED_LOG", textOrNull(logging, "producedLogFile"), "produced.log")) + .consumedLogFile(resolve("CONSUMED_LOG", textOrNull(logging, "consumedLogFile"), "consumed.log")) + .durationSeconds(resolveInt("DURATION_SECONDS", intOrNull(ingestor, "durationSeconds"), 3600)) + .workerCount(resolveInt("WORKER_COUNT", intOrNull(ingestor, "workerCount"), 2)) + .build(); + } + + /** + * Load config from environment variables only (no JSON file). + */ public static TestConfig fromEnv() { return new Builder() - .endpoint(env("COSMOS_ENDPOINT")) + .endpoint(envRequired("COSMOS_ENDPOINT")) .regionalEndpoint(envOrDefault("COSMOS_REGIONAL_ENDPOINT", "")) - .key(env("COSMOS_KEY")) + .key(envRequired("COSMOS_KEY")) .database(envOrDefault("COSMOS_DATABASE", "graph_db")) .feedContainer(envOrDefault("COSMOS_FEED_CONTAINER", "avad-test")) .leaseContainer(envOrDefault("COSMOS_LEASE_CONTAINER", "avad-test-leases")) @@ -60,28 +103,52 @@ public static TestConfig fromEnv() { .build(); } - private static String env(String name) { + /** Resolve: env var > JSON value > default. */ + private static String resolve(String envName, String jsonValue, String defaultValue) { + String env = envOrNull(envName); + if (env != null) return env; + if (jsonValue != null && !jsonValue.trim().isEmpty()) return jsonValue; + if (defaultValue != null) return defaultValue; + throw new IllegalStateException("Required config missing: " + envName); + } + + private static int resolveInt(String envName, Integer jsonValue, int defaultValue) { + String env = envOrNull(envName); + if (env != null) return Integer.parseInt(env); + if (jsonValue != null) return jsonValue; + return defaultValue; + } + + private static String textOrNull(JsonNode parent, String field) { + JsonNode node = parent.path(field); + return node.isMissingNode() || node.isNull() ? null : node.asText(); + } + + private static Integer intOrNull(JsonNode parent, String field) { + JsonNode node = parent.path(field); + return node.isMissingNode() || node.isNull() ? null : node.asInt(); + } + + private static String envOrNull(String name) { String val = System.getenv(name); - if (val == null || val.trim().isEmpty()) { - val = System.getProperty(name); - } - if (val == null || val.trim().isEmpty()) { - throw new IllegalStateException("Required config missing: " + name); - } + if (val != null && !val.trim().isEmpty()) return val; + val = System.getProperty(name); + return (val != null && !val.trim().isEmpty()) ? val : null; + } + + private static String envRequired(String name) { + String val = envOrNull(name); + if (val == null) throw new IllegalStateException("Required config missing: " + name); return val; } private static String envOrDefault(String name, String defaultVal) { - String val = System.getenv(name); - if (val == null || val.trim().isEmpty()) { - val = System.getProperty(name); - } - return (val != null && !val.trim().isEmpty()) ? val : defaultVal; + String val = envOrNull(name); + return val != null ? val : defaultVal; } public String endpoint() { return endpoint; } public String regionalEndpoint() { return regionalEndpoint; } - /** Returns regional endpoint if set, otherwise global endpoint. For use by readers. */ public String readerEndpoint() { return (regionalEndpoint != null && !regionalEndpoint.trim().isEmpty()) ? regionalEndpoint : endpoint; } @@ -96,9 +163,7 @@ public String readerEndpoint() { public int logicalPartitionCount() { return logicalPartitionCount; } public String producedLogFile() { return producedLogFile; } public String consumedLogFile() { return consumedLogFile; } - /** Duration in seconds. 0 = run forever until killed. */ public int durationSeconds() { return durationSeconds; } - /** Number of CFP worker instances per reader mode. */ public int workerCount() { return workerCount; } public static final class Builder { From 6c42d4cde757e42ac5be20669654682b5c5d7e22 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 13:02:26 -0400 Subject: [PATCH 12/28] Add local + AKS orchestration scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local mode (run-local.sh / run-local.ps1): - Launches ingestor + avad-reader + lv-reader as local JVM processes - Each on its own health port (8080/8081/8082) - Builds module automatically on first run - Filters classpath to use logback over log4j - Monitors process health, logs to output dir - Ctrl+C stops all three cleanly AKS mode (run-soak.sh): - Cleaned up to reference only pod-kill + partition-split chaos - Removed references to deleted chaos scripts - Simplified health check (pod readiness, no /metrics scraping) Removed run-cutover.sh (SSH-to-VM orchestration — superseded). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/config.json | 10 +- .../avad-soak/run-cutover.sh | 382 ------------------ .../avad-soak/run-local.ps1 | 139 +++++++ .../avad-soak/run-local.sh | 159 ++++++++ .../avad-soak/run-soak.sh | 151 +++---- 5 files changed, 350 insertions(+), 491 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-cutover.sh create mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.ps1 create mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.sh diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json index cf8145698ae7..8999f4ec292b 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json @@ -1,6 +1,6 @@ { "cosmos": { - "endpoint": "", + "endpoint": "https://abhm-cfp-region-test.documents.azure.com:443/", "regionalEndpoint": "", "database": "graph_db", "feedContainer": "avad-test", @@ -8,10 +8,10 @@ "preferredRegion": "West Central US" }, "ingestor": { - "opsPerSec": 5000, - "docSizeBytes": 1024, - "logicalPartitionCount": 100000, - "durationSeconds": 3600, + "opsPerSec": 500, + "docSizeBytes": 512, + "logicalPartitionCount": 1000, + "durationSeconds": 1800, "workerCount": 2 }, "logging": { diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-cutover.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-cutover.sh deleted file mode 100644 index 44efcd677ec1..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-cutover.sh +++ /dev/null @@ -1,382 +0,0 @@ -#!/bin/bash -# ============================================================================= -# AVAD Cut-Over Test Orchestrator -# ============================================================================= -# Runs from your dev box. Coordinates across 2 VMs via SSH. -# -# Architecture: -# Dev box (this script) ──SSH──► EUS VM (ingestor) -# ──SSH──► WCUS VM (lv-reader, avad-reader) -# -# Sequence: -# 1. Start ingestor on EUS VM -# 2. Wait for warm-up (configurable) -# 3. Start LV CFP reader on WCUS VM -# 4. Wait for LV warm-up (verify events flowing) -# 5. Start AVAD CFP reader on WCUS VM (staggered start) -# 6. Run all 3 concurrently for PARALLEL_DURATION -# 7. Stop LV reader → verify AVAD continues -# 8. Run AVAD-only for AVAD_ONLY_DURATION -# 9. Stop all -# 10. Collect logs from both VMs -# 11. Run reconciler locally -# ============================================================================= - -set -euo pipefail - -# ── Cleanup trap — stops all remote processes on error/exit ──────────────── -INGESTOR_PID="" -LV_PID="" -AVAD_PID="" -SPARK_LV_RUN_ID="" -SPARK_AVAD_RUN_ID="" - -cleanup() { - log "=== Cleanup triggered ===" - [ -n "$AVAD_PID" ] && stop_remote "$READER_VM" "$AVAD_PID" "avad-reader" 10 2>/dev/null || true - [ -n "$LV_PID" ] && stop_remote "$READER_VM" "$LV_PID" "lv-reader" 10 2>/dev/null || true - [ -n "$INGESTOR_PID" ] && stop_remote "$INGESTOR_VM" "$INGESTOR_PID" "ingestor" 10 2>/dev/null || true - [ -n "$SPARK_LV_RUN_ID" ] && stop_spark_job "$SPARK_LV_RUN_ID" "LV Spark" 2>/dev/null || true - [ -n "$SPARK_AVAD_RUN_ID" ] && stop_spark_job "$SPARK_AVAD_RUN_ID" "AVAD Spark" 2>/dev/null || true - log "=== Cleanup complete ===" -} -trap cleanup EXIT ERR INT TERM - -# ── Configuration ────────────────────────────────────────────────────────── -INGESTOR_VM="azureuser@" -READER_VM="azureuser@" - -# Databricks workspace -DATABRICKS_HOST="${DATABRICKS_HOST:?Set DATABRICKS_HOST (e.g. https://adb-xxx.azuredatabricks.net)}" -DATABRICKS_TOKEN="${DATABRICKS_TOKEN:?Set DATABRICKS_TOKEN}" -SPARK_LV_JOB_ID="${SPARK_LV_JOB_ID:?Set SPARK_LV_JOB_ID (Databricks job ID for LV Spark reader)}" -SPARK_AVAD_JOB_ID="${SPARK_AVAD_JOB_ID:?Set SPARK_AVAD_JOB_ID (Databricks job ID for AVAD Spark reader)}" - -JAR="cosmos-avad-test-1.0-SNAPSHOT.jar" -JAR_PATH="/home/azureuser/$JAR" - -# Cosmos config (set these or export before running) -export COSMOS_ENDPOINT="${COSMOS_ENDPOINT:?Set COSMOS_ENDPOINT}" -export COSMOS_KEY="${COSMOS_KEY:?Set COSMOS_KEY}" -export COSMOS_DATABASE="${COSMOS_DATABASE:-graph_db}" -export COSMOS_FEED_CONTAINER="${COSMOS_FEED_CONTAINER:-avad-test}" -export COSMOS_LEASE_CONTAINER="${COSMOS_LEASE_CONTAINER:-avad-test-leases}" -export COSMOS_PREFERRED_REGION="${COSMOS_PREFERRED_REGION:-West Central US}" -export OPS_PER_SEC="${OPS_PER_SEC:-10}" - -# Timing -INGESTOR_WARMUP_SEC=60 # Let ingestor run before starting readers -LV_WARMUP_SEC=120 # Let LV reader run before starting AVAD -PARALLEL_DURATION_SEC=1800 # All workloads running before split #1 (30 min) -SPLIT1_SETTLE_SEC=300 # Wait for split #1 to complete (5 min) -SPLIT2_SETTLE_SEC=300 # Wait for split #2 to complete (5 min) -POST_SPLIT_DURATION_SEC=1800 # Run after splits to verify no gaps (30 min) -AVAD_ONLY_DURATION_SEC=1800 # AVAD-only after LV shutdown (30 min) -SPARK_AVAD_DELAY_SEC=120 # Delay before starting AVAD Spark after LV Spark - -# Throughput levels for 2-level split test -# Start at 10K (~1 PP) → 50K (~5 PP, split #1) → 100K (~10 PP, split #2) -INITIAL_THROUGHPUT=10000 -SPLIT1_THROUGHPUT=50000 -SPLIT2_THROUGHPUT=100000 - -# Cosmos container config (for throughput scaling) -COSMOS_ACCOUNT="${COSMOS_ACCOUNT:-abhm-cfp-region-test}" -COSMOS_RG="${COSMOS_RG:-abhm-rg}" - -# Local output directory -OUTPUT_DIR="./cutover-results-$(date +%Y%m%d-%H%M%S)" -mkdir -p "$OUTPUT_DIR" - -# ── Helper functions ─────────────────────────────────────────────────────── - -log() { echo "[$(date '+%H:%M:%S')] $*"; } - -ssh_cmd() { - local vm="$1"; shift - ssh -o StrictHostKeyChecking=no "$vm" "$@" -} - -# Start a process on a remote VM, returns the remote PID -start_remote() { - local vm="$1" - local mode="$2" - local log_file="$3" - local extra_env="${4:-}" - - log "Starting --mode $mode on $vm" - local pid - pid=$(ssh_cmd "$vm" " - export COSMOS_ENDPOINT='$COSMOS_ENDPOINT' - export COSMOS_KEY='$COSMOS_KEY' - export COSMOS_DATABASE='$COSMOS_DATABASE' - export COSMOS_FEED_CONTAINER='$COSMOS_FEED_CONTAINER' - export COSMOS_LEASE_CONTAINER='$COSMOS_LEASE_CONTAINER' - export COSMOS_PREFERRED_REGION='$COSMOS_PREFERRED_REGION' - export OPS_PER_SEC='$OPS_PER_SEC' - $extra_env - nohup java -jar $JAR_PATH --mode $mode > $log_file 2>&1 & - echo \$! - ") - log " PID: $pid" - echo "$pid" -} - -# Stop a process on a remote VM by PID with graceful timeout -stop_remote() { - local vm="$1" - local pid="$2" - local label="$3" - local timeout="${4:-30}" # default 30s graceful shutdown - - log "Stopping $label (PID $pid) on $vm, timeout=${timeout}s" - ssh_cmd "$vm" "kill $pid 2>/dev/null || true" - - # Wait for graceful exit with timeout loop - local elapsed=0 - while [ $elapsed -lt $timeout ]; do - if ! ssh_cmd "$vm" "kill -0 $pid 2>/dev/null"; then - log " $label exited gracefully after ${elapsed}s" - return 0 - fi - sleep 2 - elapsed=$((elapsed + 2)) - done - - # Force kill only if still running - log " $label still running after ${timeout}s, sending SIGKILL" - ssh_cmd "$vm" "kill -9 $pid 2>/dev/null || true" - sleep 2 - log " $label force-killed" -} - -# Collect a file from a remote VM -collect_log() { - local vm="$1" - local remote_path="$2" - local local_name="$3" - - log "Collecting $remote_path from $vm" - scp -o StrictHostKeyChecking=no "$vm:$remote_path" "$OUTPUT_DIR/$local_name" 2>/dev/null || \ - log " WARNING: Could not collect $remote_path" -} - -# ── Databricks helpers ───────────────────────────────────────────────────── - -dbx_api() { - local method="$1" - local endpoint="$2" - shift 2 - curl -s -X "$method" \ - "$DATABRICKS_HOST/api/2.1/jobs$endpoint" \ - -H "Authorization: Bearer $DATABRICKS_TOKEN" \ - -H "Content-Type: application/json" \ - "$@" -} - -# Start a Databricks job, returns run_id -start_spark_job() { - local job_id="$1" - local label="$2" - - log "Starting Spark $label (job_id=$job_id)" - local response - response=$(dbx_api POST "/run-now" -d "{\"job_id\": $job_id}") - local run_id - run_id=$(echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('run_id',''))" 2>/dev/null) - - if [ -z "$run_id" ]; then - log " ERROR: Failed to start Spark $label: $response" - echo "" - else - log " Spark $label run_id: $run_id" - echo "$run_id" - fi -} - -# Cancel a Databricks run -stop_spark_job() { - local run_id="$1" - local label="$2" - - if [ -n "$run_id" ]; then - log "Cancelling Spark $label (run_id=$run_id)" - dbx_api POST "/runs/cancel" -d "{\"run_id\": $run_id}" > /dev/null - log " Spark $label cancelled" - fi -} - -# Get Spark run status -spark_status() { - local run_id="$1" - dbx_api GET "/runs/get?run_id=$run_id" | \ - python3 -c "import sys,json; r=json.load(sys.stdin); print(r.get('state',{}).get('life_cycle_state','UNKNOWN'))" 2>/dev/null -} - -# ── Pre-flight checks ───────────────────────────────────────────────────── - -log "=== AVAD Cut-Over Test ===" -log "Ingestor VM: $INGESTOR_VM" -log "Reader VM: $READER_VM" -log "Databricks: $DATABRICKS_HOST" -log "Spark LV job: $SPARK_LV_JOB_ID" -log "Spark AVAD job: $SPARK_AVAD_JOB_ID" -log "Output dir: $OUTPUT_DIR" -log "" - -log "Checking JAR exists on both VMs..." -ssh_cmd "$INGESTOR_VM" "test -f $JAR_PATH" || { log "ERROR: JAR not found on ingestor VM"; exit 1; } -ssh_cmd "$READER_VM" "test -f $JAR_PATH" || { log "ERROR: JAR not found on reader VM"; exit 1; } -log " ✅ JAR found on both VMs" - -log "Checking Java on both VMs..." -ssh_cmd "$INGESTOR_VM" "java -version" 2>&1 | head -1 -ssh_cmd "$READER_VM" "java -version" 2>&1 | head -1 -log "" - -# ── Phase 0: Scale container to initial throughput ───────────────────────── - -log "=== Phase 0: Scaling container to ${INITIAL_THROUGHPUT} RU/s (~1 PP) ===" -az cosmosdb sql container throughput update \ - --account-name "$COSMOS_ACCOUNT" --resource-group "$COSMOS_RG" \ - --database-name "$COSMOS_DATABASE" --name "$COSMOS_FEED_CONTAINER" \ - --throughput "$INITIAL_THROUGHPUT" -o none 2>&1 || log " WARNING: throughput update failed" -log " Container at ${INITIAL_THROUGHPUT} RU/s. Waiting 60s for propagation..." -sleep 60 - -# ── Phase 1: Start ingestor ─────────────────────────────────────────────── - -log "=== Phase 1: Ingestor warm-up ($INGESTOR_WARMUP_SEC sec) ===" -INGESTOR_PID=$(start_remote "$INGESTOR_VM" "ingestor" "/home/azureuser/ingestor.log" \ - "export PRODUCED_LOG=/home/azureuser/produced.log") -sleep "$INGESTOR_WARMUP_SEC" -log " Ingestor warm-up complete" - -# ── Phase 2: Start LV reader + LV Spark ─────────────────────────────────── - -log "=== Phase 2: LV Reader + LV Spark warm-up ($LV_WARMUP_SEC sec) ===" -LV_PID=$(start_remote "$READER_VM" "lv-reader" "/home/azureuser/lv-reader.log" \ - "export CONSUMED_LOG=/home/azureuser/consumed-lv.log") -SPARK_LV_RUN_ID=$(start_spark_job "$SPARK_LV_JOB_ID" "LV reader") -sleep "$LV_WARMUP_SEC" -log " LV reader + LV Spark warm-up complete" - -# ── Phase 3: Start AVAD reader + AVAD Spark (staggered) ──────────────────── - -log "=== Phase 3: AVAD Reader + AVAD Spark starting (staggered after LV) ===" -AVAD_PID=$(start_remote "$READER_VM" "avad-reader" "/home/azureuser/avad-reader.log" \ - "export CONSUMED_LOG=/home/azureuser/consumed-avad.log") -log " AVAD CFP started, waiting ${SPARK_AVAD_DELAY_SEC}s before starting AVAD Spark..." -sleep "$SPARK_AVAD_DELAY_SEC" -SPARK_AVAD_RUN_ID=$(start_spark_job "$SPARK_AVAD_JOB_ID" "AVAD reader") -log " AVAD reader + AVAD Spark started" - -# ── Phase 4: Parallel run before splits ──────────────────────────────────── - -log "=== Phase 4: Parallel run — all 5 workloads at ${INITIAL_THROUGHPUT} RU/s ($PARALLEL_DURATION_SEC sec) ===" -log " All 5 workloads running on ~1 physical partition. Waiting..." -sleep "$PARALLEL_DURATION_SEC" - -# ── Phase 4a: Split #1 — scale 10K → 50K (~1 PP → ~5 PP) ────────────────── - -log "=== Phase 4a: Split #1 — scaling ${INITIAL_THROUGHPUT} → ${SPLIT1_THROUGHPUT} RU/s ===" -az cosmosdb sql container throughput update \ - --account-name "$COSMOS_ACCOUNT" --resource-group "$COSMOS_RG" \ - --database-name "$COSMOS_DATABASE" --name "$COSMOS_FEED_CONTAINER" \ - --throughput "$SPLIT1_THROUGHPUT" -o none 2>&1 || log " WARNING: throughput update failed" -log " Throughput update submitted. Waiting ${SPLIT1_SETTLE_SEC}s for split to complete..." -sleep "$SPLIT1_SETTLE_SEC" -log " Split #1 settle complete" - -# ── Phase 4b: Split #2 — scale 50K → 100K (~5 PP → ~10 PP) ──────────────── - -log "=== Phase 4b: Split #2 — scaling ${SPLIT1_THROUGHPUT} → ${SPLIT2_THROUGHPUT} RU/s ===" -az cosmosdb sql container throughput update \ - --account-name "$COSMOS_ACCOUNT" --resource-group "$COSMOS_RG" \ - --database-name "$COSMOS_DATABASE" --name "$COSMOS_FEED_CONTAINER" \ - --throughput "$SPLIT2_THROUGHPUT" -o none 2>&1 || log " WARNING: throughput update failed" -log " Throughput update submitted. Waiting ${SPLIT2_SETTLE_SEC}s for split to complete..." -sleep "$SPLIT2_SETTLE_SEC" -log " Split #2 settle complete" - -# ── Phase 4c: Post-split run — verify no events missed ───────────────────── - -log "=== Phase 4c: Post-split run — all workloads on ~10 PPs ($POST_SPLIT_DURATION_SEC sec) ===" -log " Verifying CFP handled both splits. Waiting..." -sleep "$POST_SPLIT_DURATION_SEC" - -# ── Phase 5: Stop LV reader + LV Spark ──────────────────────────────────── - -log "=== Phase 5: Stopping LV Reader + LV Spark (AVAD continues) ===" -stop_remote "$READER_VM" "$LV_PID" "lv-reader" -stop_spark_job "$SPARK_LV_RUN_ID" "LV reader" - -# ── Phase 6: AVAD-only run ───────────────────────────────────────────────── - -log "=== Phase 6: AVAD-only run ($AVAD_ONLY_DURATION_SEC sec) ===" -log " Ingestor + AVAD CFP + AVAD Spark running. LV stopped. Waiting..." -sleep "$AVAD_ONLY_DURATION_SEC" - -# ── Phase 7: Stop all ────────────────────────────────────────────────────── - -log "=== Phase 7: Stopping all workloads ===" -stop_remote "$READER_VM" "$AVAD_PID" "avad-reader" -stop_spark_job "$SPARK_AVAD_RUN_ID" "AVAD reader" -stop_remote "$INGESTOR_VM" "$INGESTOR_PID" "ingestor" - -# ── Phase 8: Collect logs ────────────────────────────────────────────────── - -log "=== Phase 8: Collecting logs ===" -collect_log "$INGESTOR_VM" "/home/azureuser/produced.log" "produced.log" -collect_log "$INGESTOR_VM" "/home/azureuser/ingestor.log" "ingestor.log" -collect_log "$READER_VM" "/home/azureuser/consumed-lv.log" "consumed-lv.log" -collect_log "$READER_VM" "/home/azureuser/consumed-avad.log" "consumed-avad.log" -collect_log "$READER_VM" "/home/azureuser/lv-reader.log" "lv-reader.log" -collect_log "$READER_VM" "/home/azureuser/avad-reader.log" "avad-reader.log" - -# Collect Spark run details -log "Collecting Spark run status..." -if [ -n "$SPARK_LV_RUN_ID" ]; then - dbx_api GET "/runs/get?run_id=$SPARK_LV_RUN_ID" > "$OUTPUT_DIR/spark-lv-run.json" - log " Spark LV final status: $(spark_status "$SPARK_LV_RUN_ID")" -fi -if [ -n "$SPARK_AVAD_RUN_ID" ]; then - dbx_api GET "/runs/get?run_id=$SPARK_AVAD_RUN_ID" > "$OUTPUT_DIR/spark-avad-run.json" - log " Spark AVAD final status: $(spark_status "$SPARK_AVAD_RUN_ID")" -fi - -# ── Phase 9: Reconciliation ─────────────────────────────────────────────── - -log "=== Phase 9: Running reconciliation ===" - -log "Gap detection: produced vs AVAD consumed" -java -jar "target/$JAR" --mode reconcile \ - --produced "$OUTPUT_DIR/produced.log" \ - --consumed "$OUTPUT_DIR/consumed-avad.log" \ - | tee "$OUTPUT_DIR/reconcile-avad.txt" -AVAD_EXIT=$? - -log "Gap detection: produced vs LV consumed" -java -jar "target/$JAR" --mode reconcile \ - --produced "$OUTPUT_DIR/produced.log" \ - --consumed "$OUTPUT_DIR/consumed-lv.log" \ - | tee "$OUTPUT_DIR/reconcile-lv.txt" -LV_EXIT=$? - -log "Parity check: LV vs AVAD" -java -jar "target/$JAR" --mode reconcile \ - --lv "$OUTPUT_DIR/consumed-lv.log" \ - --avad "$OUTPUT_DIR/consumed-avad.log" \ - | tee "$OUTPUT_DIR/parity.txt" -PARITY_EXIT=$? - -# ── Summary ──────────────────────────────────────────────────────────────── - -log "=== RESULTS ===" -log " AVAD gap check: $([ $AVAD_EXIT -eq 0 ] && echo '✅ PASS' || echo '❌ FAIL')" -log " LV gap check: $([ $LV_EXIT -eq 0 ] && echo '✅ PASS' || echo '❌ FAIL')" -log " LV↔AVAD parity: $([ $PARITY_EXIT -eq 0 ] && echo '✅ PASS' || echo '❌ FAIL')" -log " Results in: $OUTPUT_DIR" -log "=== DONE ===" - -exit $(( AVAD_EXIT + LV_EXIT + PARITY_EXIT )) diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.ps1 b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.ps1 new file mode 100644 index 000000000000..f6f0e4e27241 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.ps1 @@ -0,0 +1,139 @@ +<# +.SYNOPSIS + AVAD Soak Test — Local Mode (Windows) + +.DESCRIPTION + Runs ingestor + avad-reader + lv-reader as local JVM processes. + No AKS/Helm required. For dev-box validation before deploying to AKS. + +.EXAMPLE + .\run-local.ps1 -ConfigFile config.json + .\run-local.ps1 -ConfigFile config.json -DurationSeconds 1800 -OpsPerSec 200 + $env:COSMOS_KEY = "xxx"; .\run-local.ps1 -ConfigFile config.json +#> + +param( + [Parameter(Mandatory)] [string]$ConfigFile, + [int]$DurationSeconds = 0, + [int]$OpsPerSec = 0 +) + +$ErrorActionPreference = "Stop" +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$ModuleDir = Split-Path -Parent $ScriptDir + +# Apply overrides +if ($DurationSeconds -gt 0) { $env:DURATION_SECONDS = $DurationSeconds } +if ($OpsPerSec -gt 0) { $env:OPS_PER_SEC = $OpsPerSec } + +# ── Build if needed ─────────────────────────────────────────────────────── +$Jar = "$ModuleDir\target\azure-cosmos-benchmark-4.0.1-beta.1.jar" +$CpFile = "$ModuleDir\target\cp.txt" + +if (-not (Test-Path $Jar)) { + Write-Host "=== Building module ===" + Push-Location $ModuleDir + mvn package "-DskipTests" "-DskipCheckstyle" "-Dspotbugs.skip=true" "-Drevapi.skip=true" -B -q + Pop-Location +} + +if (-not (Test-Path $CpFile)) { + Push-Location $ModuleDir + mvn dependency:build-classpath "-Dmdep.outputFile=target\cp.txt" -B -q + Pop-Location +} + +# Build classpath: logback dir first, exclude log4j-slf4j-impl +$CpRaw = Get-Content $CpFile +$CpFiltered = ($CpRaw -split ';' | Where-Object { $_ -notmatch 'log4j-slf4j-impl' }) -join ';' +$Classpath = "$ScriptDir;$Jar;$CpFiltered" + +$JavaCmd = "java" +$MainClass = "com.azure.cosmos.avadtest.Main" + +# ── Output directory ────────────────────────────────────────────────────── +$RunId = Get-Date -Format "yyyyMMdd-HHmmss" +$OutputDir = "$ScriptDir\local-run-$RunId" +New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null + +function Log($msg) { + $ts = Get-Date -Format "HH:mm:ss" + $line = "[$ts] $msg" + Write-Host $line + Add-Content "$OutputDir\run.log" $line +} + +# ── Launch processes ────────────────────────────────────────────────────── + +Log "=== AVAD Local Soak Test ===" +Log "Config: $ConfigFile" +Log "Output: $OutputDir" + +$ConfigPath = Resolve-Path $ConfigFile + +Log "Starting ingestor (port 8080)..." +$ingestor = Start-Process -FilePath $JavaCmd -ArgumentList @( + "-cp", $Classpath, $MainClass, + "--mode", "ingestor", "--config", $ConfigPath, "--health-port", "8080" +) -RedirectStandardOutput "$OutputDir\ingestor.log" ` + -RedirectStandardError "$OutputDir\ingestor-err.log" ` + -PassThru -NoNewWindow +Log " Ingestor PID: $($ingestor.Id)" + +Start-Sleep -Seconds 10 + +Log "Starting avad-reader (port 8081)..." +$env:CONSUMED_LOG = "$OutputDir\consumed-avad.log" +$avadReader = Start-Process -FilePath $JavaCmd -ArgumentList @( + "-cp", $Classpath, $MainClass, + "--mode", "avad-reader", "--config", $ConfigPath, "--health-port", "8081" +) -RedirectStandardOutput "$OutputDir\avad-reader.log" ` + -RedirectStandardError "$OutputDir\avad-reader-err.log" ` + -PassThru -NoNewWindow +Log " AVAD reader PID: $($avadReader.Id)" + +Log "Starting lv-reader (port 8082)..." +$env:CONSUMED_LOG = "$OutputDir\consumed-lv.log" +$lvReader = Start-Process -FilePath $JavaCmd -ArgumentList @( + "-cp", $Classpath, $MainClass, + "--mode", "lv-reader", "--config", $ConfigPath, "--health-port", "8082" +) -RedirectStandardOutput "$OutputDir\lv-reader.log" ` + -RedirectStandardError "$OutputDir\lv-reader-err.log" ` + -PassThru -NoNewWindow +Log " LV reader PID: $($lvReader.Id)" + +Log "All 3 processes running" +Log " Ingestor log: $OutputDir\ingestor.log" +Log " AVAD log: $OutputDir\avad-reader.log" +Log " LV log: $OutputDir\lv-reader.log" + +# ── Monitor loop ────────────────────────────────────────────────────────── + +try { + Log "Monitoring (Ctrl+C to stop)..." + while ($true) { + Start-Sleep -Seconds 30 + + $dead = @() + if ($ingestor.HasExited) { $dead += "ingestor (exit $($ingestor.ExitCode))" } + if ($avadReader.HasExited) { $dead += "avad-reader (exit $($avadReader.ExitCode))" } + if ($lvReader.HasExited) { $dead += "lv-reader (exit $($lvReader.ExitCode))" } + + if ($dead.Count -gt 0) { + Log "❌ Process(es) exited: $($dead -join ', ')" + Log "Check logs in $OutputDir" + break + } + + # Print last ingestor progress + $lastLine = Get-Content "$OutputDir\ingestor.log" -Tail 1 -ErrorAction SilentlyContinue + if ($lastLine -match 'Progress:') { Log " $lastLine" } + } +} finally { + Log "=== Stopping all processes ===" + if (-not $ingestor.HasExited) { Stop-Process -Id $ingestor.Id -Force -ErrorAction SilentlyContinue } + if (-not $avadReader.HasExited) { Stop-Process -Id $avadReader.Id -Force -ErrorAction SilentlyContinue } + if (-not $lvReader.HasExited) { Stop-Process -Id $lvReader.Id -Force -ErrorAction SilentlyContinue } + Log "All processes stopped" + Log "Logs: $OutputDir" +} diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.sh new file mode 100644 index 000000000000..f9b89e018ade --- /dev/null +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-local.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# ============================================================================= +# AVAD Soak Test — Local Mode +# ============================================================================= +# Runs ingestor + avad-reader + lv-reader as local JVM processes. +# No AKS/Helm required. For dev-box validation before deploying to AKS. +# +# Usage: +# ./run-local.sh --config config.json +# ./run-local.sh --config config.json --duration 1800 +# COSMOS_KEY=xxx ./run-local.sh --config config.json +# +# Prerequisites: +# - JDK 17+ +# - Maven (for first build) +# - COSMOS_KEY env var set (or in config.json) +# - Cosmos DB containers created (see infra/scripts/setup-cosmos.sh) +# ============================================================================= + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MODULE_DIR="$SCRIPT_DIR/.." +CONFIG_FILE="" +DURATION_OVERRIDE="" +OPS_OVERRIDE="" + +# ── Parse args ──────────────────────────────────────────────────────────── +while [[ $# -gt 0 ]]; do + case "$1" in + --config) CONFIG_FILE="$2"; shift 2 ;; + --duration) DURATION_OVERRIDE="$2"; shift 2 ;; + --ops) OPS_OVERRIDE="$2"; shift 2 ;; + -h|--help) + echo "Usage: $0 --config [--duration ] [--ops ]" + exit 0 ;; + *) echo "Unknown arg: $1"; exit 1 ;; + esac +done + +if [ -z "$CONFIG_FILE" ]; then + echo "ERROR: --config is required" + exit 1 +fi + +# ── Apply overrides via env vars ────────────────────────────────────────── +[ -n "$DURATION_OVERRIDE" ] && export DURATION_SECONDS="$DURATION_OVERRIDE" +[ -n "$OPS_OVERRIDE" ] && export OPS_PER_SEC="$OPS_OVERRIDE" + +# ── Build if needed ─────────────────────────────────────────────────────── +JAR="$MODULE_DIR/target/azure-cosmos-benchmark-4.0.1-beta.1.jar" +CP_FILE="$MODULE_DIR/target/cp.txt" + +if [ ! -f "$JAR" ]; then + echo "=== Building module (first run) ===" + cd "$MODULE_DIR" + mvn package -DskipTests -DskipCheckstyle -Dspotbugs.skip=true -Drevapi.skip=true -B -q + mvn dependency:build-classpath -Dmdep.outputFile=target/cp.txt -B -q +fi + +if [ ! -f "$CP_FILE" ]; then + cd "$MODULE_DIR" + mvn dependency:build-classpath -Dmdep.outputFile=target/cp.txt -B -q +fi + +# Build classpath with logback config, excluding log4j-slf4j-impl +CP_RAW=$(cat "$CP_FILE") +CP_FILTERED=$(echo "$CP_RAW" | tr ';' '\n' | tr ':' '\n' | grep -v 'log4j-slf4j-impl' | tr '\n' ':') +CLASSPATH="$SCRIPT_DIR:$JAR:$CP_FILTERED" + +JAVA_CMD="java -cp $CLASSPATH" +MAIN_CLASS="com.azure.cosmos.avadtest.Main" + +# ── Output directory ────────────────────────────────────────────────────── +OUTPUT_DIR="$SCRIPT_DIR/local-run-$(date +%Y%m%d-%H%M%S)" +mkdir -p "$OUTPUT_DIR" + +log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$OUTPUT_DIR/run.log"; } + +# ── PIDs for cleanup ───────────────────────────────────────────────────── +INGESTOR_PID="" +AVAD_PID="" +LV_PID="" + +cleanup() { + log "=== Stopping all processes ===" + [ -n "$INGESTOR_PID" ] && kill "$INGESTOR_PID" 2>/dev/null && wait "$INGESTOR_PID" 2>/dev/null || true + [ -n "$AVAD_PID" ] && kill "$AVAD_PID" 2>/dev/null && wait "$AVAD_PID" 2>/dev/null || true + [ -n "$LV_PID" ] && kill "$LV_PID" 2>/dev/null && wait "$LV_PID" 2>/dev/null || true + log "All processes stopped" + log "Logs: $OUTPUT_DIR" +} +trap cleanup EXIT INT TERM + +# ── Launch processes ────────────────────────────────────────────────────── + +log "=== AVAD Local Soak Test ===" +log "Config: $CONFIG_FILE" +log "Output: $OUTPUT_DIR" + +# 1. Ingestor +log "Starting ingestor (port 8080)..." +$JAVA_CMD $MAIN_CLASS --mode ingestor --config "$CONFIG_FILE" --health-port 8080 \ + > "$OUTPUT_DIR/ingestor.log" 2>&1 & +INGESTOR_PID=$! +log " Ingestor PID: $INGESTOR_PID" + +# Wait for ingestor to start producing +sleep 10 + +# 2. AVAD reader +log "Starting avad-reader (port 8081)..." +CONSUMED_LOG="$OUTPUT_DIR/consumed-avad.log" \ +$JAVA_CMD $MAIN_CLASS --mode avad-reader --config "$CONFIG_FILE" --health-port 8081 \ + > "$OUTPUT_DIR/avad-reader.log" 2>&1 & +AVAD_PID=$! +log " AVAD reader PID: $AVAD_PID" + +# 3. LV reader +log "Starting lv-reader (port 8082)..." +CONSUMED_LOG="$OUTPUT_DIR/consumed-lv.log" \ +$JAVA_CMD $MAIN_CLASS --mode lv-reader --config "$CONFIG_FILE" --health-port 8082 \ + > "$OUTPUT_DIR/lv-reader.log" 2>&1 & +LV_PID=$! +log " LV reader PID: $LV_PID" + +log "All 3 processes running" +log " tail -f $OUTPUT_DIR/ingestor.log" +log " tail -f $OUTPUT_DIR/avad-reader.log" +log " tail -f $OUTPUT_DIR/lv-reader.log" + +# ── Monitor loop ────────────────────────────────────────────────────────── + +check_alive() { + local name="$1" pid="$2" + if ! kill -0 "$pid" 2>/dev/null; then + log "❌ $name (PID $pid) has exited!" + return 1 + fi + return 0 +} + +log "Monitoring processes (Ctrl+C to stop)..." +while true; do + sleep 30 + + alive=true + check_alive "ingestor" "$INGESTOR_PID" || alive=false + check_alive "avad-reader" "$AVAD_PID" || alive=false + check_alive "lv-reader" "$LV_PID" || alive=false + + if [ "$alive" = false ]; then + log "One or more processes died — check logs in $OUTPUT_DIR" + exit 1 + fi + + # Print last progress line from ingestor + tail -1 "$OUTPUT_DIR/ingestor.log" 2>/dev/null | grep -o 'Progress:.*' || true +done diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-soak.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-soak.sh index 92e85c8a2e44..f270beea9a42 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-soak.sh +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/run-soak.sh @@ -1,19 +1,21 @@ #!/bin/bash # ============================================================================= -# AVAD Soak Test Orchestrator +# AVAD Soak Test — AKS Mode # ============================================================================= -# Single script that manages the full soak test lifecycle: -# 1. Deploy workloads via Helm -# 2. Run phase-based chaos (warm-up → steady → chaos → recovery → repeat) -# 3. Continuous reconciliation (no missed changes, AVAD ⊇ LV) -# 4. Collect results on exit +# Deploys ingestor + avad-reader + lv-reader to AKS via Helm, monitors health, +# and runs chaos scenarios on a schedule. +# +# Usage: +# ./run-soak.sh +# SOAK_DURATION_HOURS=12 CHAOS_ENABLED=false ./run-soak.sh +# VALUES_OVERRIDE=values-prod.yaml ./run-soak.sh # # Prerequisites: # - AKS cluster configured (kubectl context set) # - Helm 3 installed # - ACR image pushed (see infra/scripts/setup-acr.sh) # - Cosmos containers created (see infra/scripts/setup-cosmos.sh) -# - az CLI logged in (for partition split + lease throttle) +# - K8s secrets created (see infra/README.md) # ============================================================================= set -euo pipefail @@ -25,27 +27,19 @@ NAMESPACE="${NAMESPACE:-cosmos-soak}" RELEASE="${RELEASE:-cosmos-soak}" VALUES_FILE="${VALUES_FILE:-$SCRIPT_DIR/infra/chart/values.yaml}" VALUES_OVERRIDE="${VALUES_OVERRIDE:-}" -CHAOS_SCHEDULE="${CHAOS_SCHEDULE:-$SCRIPT_DIR/chaos/chaos-schedule.yaml}" -# Timing (all in seconds) SOAK_DURATION_HOURS="${SOAK_DURATION_HOURS:-24}" -WARMUP_SEC="${WARMUP_SEC:-1800}" # 30 min -STEADY_SEC="${STEADY_SEC:-3600}" # 60 min -RECOVERY_SEC="${RECOVERY_SEC:-1800}" # 30 min default -HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-300}" # 5 min +WARMUP_SEC="${WARMUP_SEC:-1800}" +STEADY_SEC="${STEADY_SEC:-3600}" +RECOVERY_SEC="${RECOVERY_SEC:-1800}" +HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-300}" -# Behavior CHAOS_ENABLED="${CHAOS_ENABLED:-true}" ABORT_ON_GAP="${ABORT_ON_GAP:-false}" -# Cosmos (for chaos scripts) export COSMOS_ACCOUNT="${COSMOS_ACCOUNT:-abhm-cfp-region-test}" export COSMOS_RG="${COSMOS_RG:-abhm-rg}" -export COSMOS_DB="${COSMOS_DB:-graph_db}" -export FEED_CONTAINER="${FEED_CONTAINER:-avad-test}" -export LEASE_CONTAINER="${LEASE_CONTAINER:-avad-test-leases}" -# Output OUTPUT_DIR="$SCRIPT_DIR/soak-results-$(date +%Y%m%d-%H%M%S)" mkdir -p "$OUTPUT_DIR" @@ -53,18 +47,11 @@ SOAK_DURATION_SEC=$((SOAK_DURATION_HOURS * 3600)) START_TIME=$(date +%s) CHAOS_PID="" -# ── Helper functions ────────────────────────────────────────────────────── +# ── Helpers ─────────────────────────────────────────────────────────────── log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$OUTPUT_DIR/soak.log"; } - -elapsed() { - local now=$(date +%s) - echo $(( now - START_TIME )) -} - -is_expired() { - [ "$SOAK_DURATION_SEC" -gt 0 ] && [ "$(elapsed)" -ge "$SOAK_DURATION_SEC" ] -} +elapsed() { echo $(( $(date +%s) - START_TIME )); } +is_expired() { [ "$SOAK_DURATION_SEC" -gt 0 ] && [ "$(elapsed)" -ge "$SOAK_DURATION_SEC" ]; } # ── Cleanup ─────────────────────────────────────────────────────────────── @@ -79,11 +66,10 @@ cleanup() { > "$OUTPUT_DIR/${name}.log" 2>/dev/null || true done - log "Collecting health metrics..." kubectl get pods -n "$NAMESPACE" -o wide \ > "$OUTPUT_DIR/pods-final.txt" 2>/dev/null || true - log "Results saved to: $OUTPUT_DIR" + log "Results: $OUTPUT_DIR" log "=== Soak test ended (elapsed: $(elapsed)s) ===" } trap cleanup EXIT ERR INT TERM @@ -94,108 +80,72 @@ check_health() { log "Running health check..." local healthy=true - # Check all pods are running local not_ready=$(kubectl get pods -n "$NAMESPACE" \ --field-selector=status.phase!=Running \ -o name 2>/dev/null | wc -l) if [ "$not_ready" -gt 0 ]; then log " ⚠️ $not_ready pods not in Running state" + healthy=false fi - # Check metrics from ingestor pods - for pod in $(kubectl get pods -n "$NAMESPACE" \ - -l "app.kubernetes.io/component=ingestor" \ - -o jsonpath='{.items[*].metadata.name}'); do - local metrics=$(kubectl exec -n "$NAMESPACE" "$pod" -- \ - curl -s http://localhost:8080/metrics 2>/dev/null || echo "unreachable") - echo "$metrics" >> "$OUTPUT_DIR/metrics-$(date +%H%M%S).log" - - # Check for missing previousImage - local missing=$(echo "$metrics" | grep "previous_image_missing" | awk '{print $2}') - if [ -n "$missing" ] && [ "$missing" != "0" ]; then - log " ❌ Missing previousImage count: $missing" - healthy=false - fi - done + # Check pod readiness + local total_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers 2>/dev/null | wc -l) + local ready_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers 2>/dev/null \ + | awk '$2 ~ /1\/1/' | wc -l) + log " Pods: $ready_pods/$total_pods ready" if [ "$healthy" = true ]; then log " ✅ Health check passed" else log " ❌ Health check FAILED" if [ "$ABORT_ON_GAP" = "true" ]; then - log "ABORT_ON_GAP is set — stopping soak test" + log "ABORT_ON_GAP set — stopping" exit 1 fi fi } -# ── Chaos runner (background) ──────────────────────────────────────────── +# ── Chaos runner ────────────────────────────────────────────────────────── run_chaos_loop() { - log "Chaos loop starting (schedule: $CHAOS_SCHEDULE)" - - # Simple time-based chaos: iterate through scenarios + log "Chaos loop starting" local iteration=0 - while true; do + + while ! is_expired; do iteration=$((iteration + 1)) log "=== Chaos iteration $iteration ===" - # Pod kill (every 2 hours → check every loop) - if [ $((iteration % 1)) -eq 0 ]; then - log "Firing: pod-kill" - bash "$SCRIPT_DIR/chaos/scenarios/pod-kill.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" - sleep "$RECOVERY_SEC" - check_health - fi - - # Lease throttle (every other iteration) - if [ $((iteration % 2)) -eq 0 ]; then - log "Firing: lease-throttle" - bash "$SCRIPT_DIR/chaos/scenarios/lease-throttle.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" - sleep "$RECOVERY_SEC" - check_health - fi - - # Restart storm (every 4th iteration) - if [ $((iteration % 4)) -eq 0 ]; then - log "Firing: restart-storm" - bash "$SCRIPT_DIR/chaos/scenarios/restart-storm.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" - sleep "$RECOVERY_SEC" - check_health - fi + # Pod kill (every iteration) + log "Firing: pod-kill" + bash "$SCRIPT_DIR/chaos/scenarios/pod-kill.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" + sleep "$RECOVERY_SEC" + check_health - # Partition split (every 6th iteration) - if [ $((iteration % 6)) -eq 0 ]; then + # Partition split (every 3rd iteration) + if [ $((iteration % 3)) -eq 0 ]; then log "Firing: partition-split" bash "$SCRIPT_DIR/chaos/scenarios/partition-split.sh" 2>&1 | tee -a "$OUTPUT_DIR/chaos.log" sleep "$RECOVERY_SEC" check_health fi - # Steady state between chaos events log "Steady state for ${STEADY_SEC}s..." sleep "$STEADY_SEC" - - if is_expired; then - log "Soak duration expired, stopping chaos loop" - break - fi done } # ── Main ────────────────────────────────────────────────────────────────── -log "=== AVAD Soak Test Starting ===" -log "Duration: ${SOAK_DURATION_HOURS}h (${SOAK_DURATION_SEC}s)" -log "Chaos: $CHAOS_ENABLED" +log "=== AVAD AKS Soak Test ===" +log "Duration: ${SOAK_DURATION_HOURS}h | Chaos: $CHAOS_ENABLED" log "Output: $OUTPUT_DIR" -# 1. Create namespace +# 1. Namespace kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - -# 2. Deploy via Helm -log "Deploying workloads via Helm..." +# 2. Helm deploy +log "Deploying via Helm..." HELM_ARGS=( upgrade --install "$RELEASE" "$SCRIPT_DIR/infra/chart" @@ -203,40 +153,33 @@ HELM_ARGS=( --values "$VALUES_FILE" ) [ -n "$VALUES_OVERRIDE" ] && HELM_ARGS+=(--values "$VALUES_OVERRIDE") - helm "${HELM_ARGS[@]}" log "Helm deploy complete" -# 3. Wait for warm-up -log "Warm-up phase (${WARMUP_SEC}s)..." -sleep 30 # let pods start scheduling - -# Wait for all pods to be ready +# 3. Wait for pods +log "Warm-up (${WARMUP_SEC}s)..." +sleep 30 kubectl wait --for=condition=ready pods \ --all -n "$NAMESPACE" \ --timeout="${WARMUP_SEC}s" || { - log "⚠️ Not all pods ready after warm-up, continuing anyway" + log "⚠️ Not all pods ready after warm-up" } - -log "Warm-up complete" check_health -# 4. Start chaos loop in background (if enabled) +# 4. Chaos (background) if [ "$CHAOS_ENABLED" = "true" ]; then run_chaos_loop & CHAOS_PID=$! log "Chaos loop started (PID: $CHAOS_PID)" fi -# 5. Main monitoring loop -log "Entering main monitoring loop..." +# 5. Monitor +log "Monitoring (Ctrl+C to stop)..." while ! is_expired; do sleep "$HEALTH_CHECK_INTERVAL" check_health done log "=== Soak duration reached (${SOAK_DURATION_HOURS}h) ===" -log "Final health check..." check_health - log "=== Soak Test Complete ===" From 34d8a26c11a69c22b931594118d93e301683e3ed Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 13:06:28 -0400 Subject: [PATCH 13/28] Fix: remove unsupported setStartTime for AVAD CFP mode AllVersionsAndDeletes mode does not support startTime option. Also removed from LV reader (not needed for fresh soak runs). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../main/java/com/azure/cosmos/avadtest/reader/AvadReader.java | 1 - .../com/azure/cosmos/avadtest/reader/LatestVersionReader.java | 1 - 2 files changed, 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index 10c1ee491f04..7236f8e52cae 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -92,7 +92,6 @@ public void run() throws InterruptedException { ChangeFeedProcessorOptions options = new ChangeFeedProcessorOptions(); options.setLeasePrefix(LEASE_PREFIX); options.setFeedPollDelay(Duration.ofSeconds(1)); - options.setStartTime(Instant.now().minus(Duration.ofDays(5))); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() .hostName("avad-host-" + ManagementFactory.getRuntimeMXBean().getName() + "-w" + workerIdx) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index dbe1e7aaed0f..fe31bacce555 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -75,7 +75,6 @@ public void run() throws InterruptedException { ChangeFeedProcessorOptions options = new ChangeFeedProcessorOptions(); options.setLeasePrefix(LEASE_PREFIX); options.setFeedPollDelay(Duration.ofSeconds(1)); - options.setStartTime(Instant.now().minus(Duration.ofDays(5))); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() .hostName("lv-host-" + ManagementFactory.getRuntimeMXBean().getName() + "-w" + i) From 85d579bfc2b824bc71b03a53829e547200d57ef2 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 13:18:04 -0400 Subject: [PATCH 14/28] Fix: enable contentResponseOnWrite on all clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CFP lease management requires contentResponseOnWriteEnabled(true). AvadReader and Ingestor were missing it — only LatestVersionReader had it. Now all three clients set it consistently. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java | 1 + .../main/java/com/azure/cosmos/avadtest/reader/AvadReader.java | 1 + 2 files changed, 2 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index b652767ab4e2..f15aa295633f 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -81,6 +81,7 @@ public Ingestor(TestConfig config) throws Exception { .endpoint(config.endpoint()) .key(config.key()) .gatewayMode() + .contentResponseOnWriteEnabled(true) .preferredRegions(config.preferredRegions()) .buildAsyncClient(); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index 7236f8e52cae..be2cd60ba49e 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -66,6 +66,7 @@ public AvadReader(TestConfig config) throws Exception { .endpoint(config.readerEndpoint()) .key(config.key()) .gatewayMode() + .contentResponseOnWriteEnabled(true) .preferredRegions(config.preferredRegions()) .buildAsyncClient(); From 22811a67190b0b75271ccd5ba31a8af8d92568cb Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 13:55:02 -0400 Subject: [PATCH 15/28] Make ReconciliationWriter synchronous MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CFP's handleChanges callback must complete before the lease is checkpointed. The old async Sink/Flux pipeline let the lease advance before writes were persisted — causing drops on backpressure and data loss on crash. Now record() blocks until the upsert succeeds or retries exhaust. This ensures no lease checkpoint races. Retries use exponential backoff (500ms, 1s, 2s, 4s) with the same CosmosException-based retryability check. Removed: Sinks buffer, Disposable subscription, requeue logic, dropCount (writes either succeed or error — no silent drops). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../reconciliation/ReconciliationWriter.java | 114 +++++------------- 1 file changed, 28 insertions(+), 86 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java index 9a5a8380cb22..7cf7df67d8cb 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -2,6 +2,7 @@ import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosException; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.models.CosmosItemRequestOptions; @@ -10,9 +11,6 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import reactor.core.publisher.Mono; -import reactor.core.publisher.Sinks; -import reactor.core.scheduler.Schedulers; import java.time.Duration; import java.time.Instant; @@ -20,34 +18,19 @@ /** * Writes reconciliation events to a shared Cosmos container. - * All consumers (CFP LV, CFP AVAD, Spark LV, Spark AVAD, Ingestor) write here - * with a common schema, enabling a single reconciliation query across all of them. + * Synchronous — blocks until the write succeeds or is permanently dropped. + * This ensures CFP does not checkpoint the lease until the reconciliation + * record is persisted. * * Container: "reconciliation" in same database * Partition key: /correlationId - * - * Document schema: - * { - * "id": "{source}-{correlationId}", // unique per source+event - * "correlationId": "corr-uuid", - * "source": "ingestor|cfp-lv|cfp-avad|spark-lv|spark-avad", - * "seqNo": 12345, - * "opType": "create|replace|upsert|delete", - * "partitionKey": "tenant-42", - * "lsn": 999, // -1 for ingestor - * "hasPreviousImage": true, // only for AVAD sources - * "crts": 1714300800000, // conflict resolution timestamp (epoch ms), -1 if N/A - * "timestamp": "2026-04-28T12:00:00Z", - * "recordedAt": "2026-04-28T12:00:01Z" - * } */ public final class ReconciliationWriter implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(ReconciliationWriter.class); private static final String RECONCILIATION_CONTAINER = "reconciliation"; private static final int MAX_RETRIES = 3; - private static final int MAX_REQUEUES = 2; - private static final String REQUEUE_COUNT_FIELD = "_requeueCount"; + private static final long RETRY_BASE_MS = 500; private static final CosmosEndToEndOperationLatencyPolicyConfig E2E_POLICY = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(6)).build(); @@ -57,10 +40,6 @@ public final class ReconciliationWriter implements AutoCloseable { private final LongAdder writeCount = new LongAdder(); private final LongAdder errorCount = new LongAdder(); private final LongAdder retryCount = new LongAdder(); - private final LongAdder dropCount = new LongAdder(); - - private final Sinks.Many sink; - private final reactor.core.Disposable subscription; /** * @param client shared CosmosAsyncClient — caller owns lifecycle @@ -74,20 +53,13 @@ public ReconciliationWriter(CosmosAsyncClient client, String database, String so .getDatabase(database) .getContainer(RECONCILIATION_CONTAINER); - this.sink = Sinks.many().multicast().onBackpressureBuffer(100_000); - - this.subscription = sink.asFlux() - .flatMap(this::writeDoc, 50) - .subscribeOn(Schedulers.boundedElastic()) - .subscribe(); - log.info("ReconciliationWriter initialized: source={}, container={}", source, RECONCILIATION_CONTAINER); } /** * Record a produced or consumed event for reconciliation. - * Non-blocking — buffers internally and writes async. + * Blocks until the write succeeds or all retries are exhausted. */ public void record(String eventId, long seqNo, String opType, String partitionKey, long lsn, boolean hasPreviousImage, long crts) { @@ -102,58 +74,35 @@ public void record(String eventId, long seqNo, String opType, doc.put("hasPreviousImage", hasPreviousImage); doc.put("crts", crts); doc.put("timestamp", Instant.now().toString()); - doc.put(REQUEUE_COUNT_FIELD, 0); - - Sinks.EmitResult result = sink.tryEmitNext(doc); - if (result.isFailure()) { - dropCount.increment(); - log.warn("Reconciliation sink full/closed, dropping event: eventId={}", eventId); - } - } - - private Mono writeDoc(ObjectNode doc) { - String eventId = doc.get("correlationId").asText(); CosmosItemRequestOptions options = new CosmosItemRequestOptions(); options.setCosmosEndToEndOperationLatencyPolicyConfig(E2E_POLICY); - return container.upsertItem(doc, new PartitionKey(eventId), options) - .doOnSuccess(r -> writeCount.increment()) - .retryWhen(reactor.util.retry.Retry.backoff(MAX_RETRIES, Duration.ofMillis(500)) - .maxBackoff(Duration.ofSeconds(2)) - .filter(this::isRetryable) - .doBeforeRetry(signal -> { - retryCount.increment(); - log.warn("Reconciliation write retry #{} for id={}: {}", - signal.totalRetries() + 1, - doc.get("id").asText(), - signal.failure().getMessage()); - })) - .doOnError(e -> { - int requeueCount = doc.has(REQUEUE_COUNT_FIELD) ? doc.get(REQUEUE_COUNT_FIELD).asInt() : 0; - if (requeueCount < MAX_REQUEUES && isRetryable(e)) { - doc.put(REQUEUE_COUNT_FIELD, requeueCount + 1); - Sinks.EmitResult result = sink.tryEmitNext(doc); - if (result.isSuccess()) { - log.warn("Requeueing (attempt {}): id={}", requeueCount + 1, doc.get("id").asText()); - } else { - errorCount.increment(); - log.error("Requeue failed (sink full/closed): id={}", doc.get("id").asText()); - } - } else { + for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + container.upsertItem(doc, new PartitionKey(eventId), options) + .block(Duration.ofSeconds(10)); + writeCount.increment(); + return; + } catch (Exception e) { + if (!isRetryable(e) || attempt == MAX_RETRIES) { errorCount.increment(); - dropCount.increment(); - log.error("Permanently dropped: id={}, requeues={}, error={}", - doc.get("id").asText(), requeueCount, e.getMessage()); + log.error("Reconciliation write failed (attempt {}): id={}, error={}", + attempt + 1, doc.get("id").asText(), e.getMessage()); + return; } - }) - .onErrorResume(e -> Mono.empty()) - .then(); + retryCount.increment(); + long backoff = RETRY_BASE_MS * (1L << attempt); + log.warn("Reconciliation write retry {} for id={}: {}", attempt + 1, + doc.get("id").asText(), e.getMessage()); + try { Thread.sleep(backoff); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); return; } + } + } } private boolean isRetryable(Throwable e) { - if (e instanceof com.azure.cosmos.CosmosException) { - int status = ((com.azure.cosmos.CosmosException) e).getStatusCode(); + if (e instanceof CosmosException) { + int status = ((CosmosException) e).getStatusCode(); return status != 404 && status != 401 && status != 403; } return true; @@ -161,17 +110,10 @@ private boolean isRetryable(Throwable e) { public long getWriteCount() { return writeCount.sum(); } public long getErrorCount() { return errorCount.sum(); } - public long getDropCount() { return dropCount.sum(); } @Override public void close() { - sink.tryEmitComplete(); - try { - Thread.sleep(10_000); - } catch (InterruptedException ignored) {} - subscription.dispose(); - // Client is NOT closed here — caller owns the lifecycle - log.info("ReconciliationWriter closed: source={}, writes={}, retries={}, errors={}, drops={}", - source, writeCount.sum(), retryCount.sum(), errorCount.sum(), dropCount.sum()); + log.info("ReconciliationWriter closed: source={}, writes={}, retries={}, errors={}", + source, writeCount.sum(), retryCount.sum(), errorCount.sum()); } } From fb6d5e11a971aa941a7989cea970287a696893f6 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 13:59:19 -0400 Subject: [PATCH 16/28] Fix ingestor bulk response correlation with IdentityHashMap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old approach matched bulk responses to metadata by scanning a List for matching docId — this failed when multiple ops targeted the same docId or when op.getId() didn't match expectations. Now uses IdentityHashMap keyed by object reference. executeBulkOperations returns the same CosmosItemOperation in the response, so reference equality gives O(1) lookup with no ambiguity. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cosmos/avadtest/ingestor/Ingestor.java | 58 ++++++++----------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index f15aa295633f..b93d7c80cb6e 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -21,7 +21,9 @@ import java.time.Duration; import java.time.Instant; import java.util.ArrayList; +import java.util.IdentityHashMap; import java.util.List; +import java.util.Map; import java.util.UUID; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadLocalRandom; @@ -143,16 +145,16 @@ public void run() throws InterruptedException { private Flux executeBulkBatch() { List operations = new ArrayList<>(opsPerTick); - List metas = new ArrayList<>(opsPerTick); + Map opToMeta = new IdentityHashMap<>(opsPerTick); for (int i = 0; i < opsPerTick; i++) { int roll = ThreadLocalRandom.current().nextInt(100); if (roll < 40) { - addCreate(operations, metas); + addCreate(operations, opToMeta); } else if (roll < 80) { - addUpsert(operations, metas); + addUpsert(operations, opToMeta); } else { - addDelete(operations, metas); + addDelete(operations, opToMeta); } } @@ -161,12 +163,12 @@ private Flux executeBulkBatch() { } return container.executeBulkOperations(Flux.fromIterable(operations), bulkOptions) - .doOnNext(response -> handleBulkResponse(response, metas)) + .doOnNext(response -> handleBulkResponse(response, opToMeta)) .then() .flux(); } - private void addCreate(List ops, List metas) { + private void addCreate(List ops, Map opToMeta) { String docId = UUID.randomUUID().toString(); String eventId = UUID.randomUUID().toString(); String pk = "tenant-" + ThreadLocalRandom.current().nextInt(config.logicalPartitionCount()); @@ -174,11 +176,12 @@ private void addCreate(List ops, List metas) { String ts = Instant.now().toString(); ObjectNode doc = buildDoc(docId, pk, seq, eventId, "create", ts); - ops.add(CosmosBulkOperations.getCreateItemOperation(doc, new PartitionKey(pk))); - metas.add(new OpMeta(eventId, seq, "create", pk, ts, docId)); + CosmosItemOperation op = CosmosBulkOperations.getCreateItemOperation(doc, new PartitionKey(pk)); + ops.add(op); + opToMeta.put(op, new OpMeta(eventId, seq, "create", pk, ts, docId)); } - private void addUpsert(List ops, List metas) { + private void addUpsert(List ops, Map opToMeta) { String recent = getRecentId(); String docId; String pk; @@ -196,14 +199,15 @@ private void addUpsert(List ops, List metas) { String ts = Instant.now().toString(); ObjectNode doc = buildDoc(docId, pk, seq, eventId, "upsert", ts); - ops.add(CosmosBulkOperations.getUpsertItemOperation(doc, new PartitionKey(pk))); - metas.add(new OpMeta(eventId, seq, "upsert", pk, ts, docId)); + CosmosItemOperation op = CosmosBulkOperations.getUpsertItemOperation(doc, new PartitionKey(pk)); + ops.add(op); + opToMeta.put(op, new OpMeta(eventId, seq, "upsert", pk, ts, docId)); } - private void addDelete(List ops, List metas) { + private void addDelete(List ops, Map opToMeta) { String recent = getRecentId(); if (recent == null) { - addCreate(ops, metas); + addCreate(ops, opToMeta); return; } @@ -213,30 +217,16 @@ private void addDelete(List ops, List metas) { long seq = seqCounter.incrementAndGet(); String ts = Instant.now().toString(); - ops.add(CosmosBulkOperations.getDeleteItemOperation(docId, new PartitionKey(pk))); - metas.add(new OpMeta(docId, seq, "delete", pk, ts, docId)); + CosmosItemOperation op = CosmosBulkOperations.getDeleteItemOperation(docId, new PartitionKey(pk)); + ops.add(op); + opToMeta.put(op, new OpMeta(docId, seq, "delete", pk, ts, docId)); clearRecentId(recent); } - private void handleBulkResponse(CosmosBulkOperationResponse response, List metas) { - CosmosItemOperation op = response.getOperation(); - - // Find matching metadata by correlating operation index - int idx = -1; - // Use operation identity to find the matching meta - // The operations list and metas list are parallel arrays - // but executeBulkOperations may reorder responses. - // Use the operation's id to find the correct meta. - String opId = op.getId(); - for (int i = 0; i < metas.size(); i++) { - if (metas.get(i).docId.equals(opId)) { - idx = i; - break; - } - } - - if (idx < 0) return; - OpMeta meta = metas.get(idx); + private void handleBulkResponse(CosmosBulkOperationResponse response, + Map opToMeta) { + OpMeta meta = opToMeta.get(response.getOperation()); + if (meta == null) return; if (response.getResponse() != null && response.getResponse().isSuccessStatusCode()) { successCount.increment(); From 05c6fa294d0c55e19fbce5a246eea1250cb18d18 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 14:11:50 -0400 Subject: [PATCH 17/28] Fix ingestor: replace Flux.interval+concatMap with blocking loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flux.interval + concatMap causes OverflowException when bulk batch takes longer than the tick interval — concatMap requests 1 at a time but interval keeps emitting ticks that can't be buffered. Now uses a simple while loop: submit batch (blocking via toStream), sleep for remaining tick time. This guarantees rate limiting without reactive backpressure issues. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cosmos/avadtest/ingestor/Ingestor.java | 57 +++++++++---------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index b93d7c80cb6e..b68967d7207a 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -16,7 +16,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; -import reactor.core.scheduler.Schedulers; import java.time.Duration; import java.time.Instant; @@ -25,9 +24,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; -import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; @@ -63,7 +60,6 @@ public final class Ingestor implements AutoCloseable { private final String precomputedPayload; // Reactor subscriptions — disposed on close to prevent leaks - private volatile reactor.core.Disposable mainSubscription; private volatile reactor.core.Disposable progressSubscription; private final CosmosBulkExecutionOptions bulkOptions; @@ -103,22 +99,7 @@ public void run() throws InterruptedException { log.info("Starting bulk ingestion at {} ops/sec, duration={}", config.opsPerSec(), durationSec > 0 ? durationSec + "s" : "unlimited"); - CountDownLatch latch = new CountDownLatch(1); - - // Each tick: build a batch of operations and submit via bulk API - this.mainSubscription = Flux.interval(Duration.ofMillis(TICK_INTERVAL_MS)) - .takeWhile(tick -> running.get()) - .concatMap(tick -> executeBulkBatch()) - .doOnError(e -> log.error("Bulk ingestion error", e)) - .doOnComplete(latch::countDown) - .subscribe(); - - if (durationSec > 0) { - Schedulers.single().schedule(() -> { - log.info("Duration {}s reached, stopping ingestor...", durationSec); - running.set(false); - }, durationSec, TimeUnit.SECONDS); - } + long deadline = durationSec > 0 ? System.currentTimeMillis() + (durationSec * 1000L) : Long.MAX_VALUE; this.progressSubscription = Flux.interval(Duration.ofSeconds(30)) .takeWhile(tick -> running.get()) @@ -140,10 +121,30 @@ public void run() throws InterruptedException { running.set(false); })); - latch.await(); + // Main ingestion loop: submit a bulk batch, then sleep for the remaining tick interval + while (running.get() && System.currentTimeMillis() < deadline) { + long tickStart = System.currentTimeMillis(); + + try { + executeBulkBatch(); + } catch (Exception e) { + log.error("Bulk batch error", e); + } + + long elapsed = System.currentTimeMillis() - tickStart; + long sleepMs = TICK_INTERVAL_MS - elapsed; + if (sleepMs > 0) { + Thread.sleep(sleepMs); + } + } + + if (System.currentTimeMillis() >= deadline) { + log.info("Duration {}s reached, stopping ingestor...", durationSec); + } + running.set(false); } - private Flux executeBulkBatch() { + private void executeBulkBatch() { List operations = new ArrayList<>(opsPerTick); Map opToMeta = new IdentityHashMap<>(opsPerTick); @@ -158,14 +159,11 @@ private Flux executeBulkBatch() { } } - if (operations.isEmpty()) { - return Flux.empty(); - } + if (operations.isEmpty()) return; - return container.executeBulkOperations(Flux.fromIterable(operations), bulkOptions) - .doOnNext(response -> handleBulkResponse(response, opToMeta)) - .then() - .flux(); + container.executeBulkOperations(Flux.fromIterable(operations), bulkOptions) + .toStream() + .forEach(response -> handleBulkResponse(response, opToMeta)); } private void addCreate(List ops, Map opToMeta) { @@ -284,7 +282,6 @@ public void close() { log.info("Closing Ingestor..."); running.set(false); if (progressSubscription != null) { progressSubscription.dispose(); } - if (mainSubscription != null) { mainSubscription.dispose(); } try { eventLog.close(); } catch (Exception e) { /* ignore */ } reconWriter.close(); client.close(); From 9f910d07b7e55365626216d986851e3b40adccd4 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 14:34:57 -0400 Subject: [PATCH 18/28] Remove inline CRTS tracking from AvadReader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CRTS ordering validation belongs in the Reconciler (offline), not in the reader. The reader should just record what it sees — the Reconciler already has checkOrderingByCrts() for this. Removes ConcurrentHashMap that would grow unboundedly over multi-day runs (one entry per partition key). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cosmos/avadtest/reader/AvadReader.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index be2cd60ba49e..a9c3a44a671c 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -21,9 +21,7 @@ import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; /** @@ -53,10 +51,6 @@ public final class AvadReader implements AutoCloseable { private final LongAdder totalReplaces = new LongAdder(); private final LongAdder totalDeletes = new LongAdder(); private final LongAdder totalCreates = new LongAdder(); - private final LongAdder crtsViolationCount = new LongAdder(); - - // Per-partition CRTS tracking for ordering validation - private final ConcurrentHashMap lastCrtsByPartition = new ConcurrentHashMap<>(); public AvadReader(TestConfig config) throws Exception { this.config = config; @@ -174,16 +168,6 @@ private void handleChanges(List items) { } } - // CRTS ordering validation per partition key - if (crts > 0 && !pk.isEmpty()) { - AtomicLong lastCrts = lastCrtsByPartition.computeIfAbsent(pk, k -> new AtomicLong(-1)); - long prev = lastCrts.getAndSet(crts); - if (prev > 0 && crts < prev) { - crtsViolationCount.increment(); - log.warn("⚠️ CRTS ordering violation: pk={}, prevCrts={}, currCrts={}", pk, prev, crts); - } - } - eventLog.logConsumedAvad(eventId, seqNo, opType, pk, timestamp, lsn, crts); reconWriter.record(eventId, seqNo, opType, pk, lsn, hasPrevious, crts); } @@ -202,10 +186,6 @@ private void logCorrectnessReport() { } else { log.info("✅ All replace/delete events have previous image"); } - long crtsViolations = crtsViolationCount.sum(); - if (crtsViolations > 0) { - log.error("❌ {} CRTS ordering violations detected", crtsViolations); - } } private static String getTextOrEmpty(JsonNode node, String field) { From 2c82dea73b01fb1431a45fcc3f8e7d2b22e86b09 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 14:38:34 -0400 Subject: [PATCH 19/28] Propagate ReconciliationWriter failures to handleChanges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit record() now throws RuntimeException on permanent write failure instead of silently returning. This causes CFP's handleChanges to fail, which prevents the lease continuation token from advancing. CFP will retry the batch on the next poll. Without this, a recon write failure would silently drop the event and advance the lease — making the gap undetectable. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avadtest/reconciliation/ReconciliationWriter.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java index 7cf7df67d8cb..cce8f55ad49f 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -60,6 +60,8 @@ public ReconciliationWriter(CosmosAsyncClient client, String database, String so /** * Record a produced or consumed event for reconciliation. * Blocks until the write succeeds or all retries are exhausted. + * Throws on permanent failure so the caller (CFP handleChanges) can + * fail the batch and prevent the lease continuation from advancing. */ public void record(String eventId, long seqNo, String opType, String partitionKey, long lsn, boolean hasPreviousImage, long crts) { @@ -87,9 +89,8 @@ public void record(String eventId, long seqNo, String opType, } catch (Exception e) { if (!isRetryable(e) || attempt == MAX_RETRIES) { errorCount.increment(); - log.error("Reconciliation write failed (attempt {}): id={}, error={}", - attempt + 1, doc.get("id").asText(), e.getMessage()); - return; + throw new RuntimeException( + "Reconciliation write failed after " + (attempt + 1) + " attempts: id=" + doc.get("id").asText(), e); } retryCount.increment(); long backoff = RETRY_BASE_MS * (1L << attempt); From de96e884be8792da88236bd71218ed0c513b0104 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 15:49:23 -0400 Subject: [PATCH 20/28] =?UTF-8?q?Remove=20soak-health=20container=20?= =?UTF-8?q?=E2=80=94=20use=20offline=20Reconciler=20instead?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HealthMonitor no longer writes snapshots to soak-health. It still queries the reconciliation container for gap/parity/previousImage checks and logs results, but doesn't persist them. The offline Reconciler is the authoritative validation path. soak-health was redundant and produced misleading UNHEALTHY entries due to flaky N+1 gap detection queries. Removed: healthContainer field, writeHealthSnapshot method, soak-health from setup-cosmos.sh, unused Jackson imports. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/infra/scripts/setup-cosmos.sh | 12 ------ .../cosmos/avadtest/health/HealthMonitor.java | 37 ------------------- .../reconciliation/ReconciliationWriter.java | 2 +- 3 files changed, 1 insertion(+), 50 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-cosmos.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-cosmos.sh index 7bd5e33e31de..b0a9d02e98a6 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-cosmos.sh +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-cosmos.sh @@ -51,16 +51,4 @@ az cosmosdb sql container create \ --ttl 86400 \ --output none 2>/dev/null || echo " already exists" -# Soak health container (/runId PK, TTL 30 days) -echo "Creating soak-health container" -az cosmosdb sql container create \ - --account-name "$COSMOS_ACCOUNT" \ - --resource-group "$COSMOS_RG" \ - --database-name "$COSMOS_DB" \ - --name "soak-health" \ - --partition-key-path "/runId" \ - --throughput 400 \ - --ttl 2592000 \ - --output none 2>/dev/null || echo " already exists" - echo "=== All containers ready ===" diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java index c304f17f5596..5a8e7a90575f 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/health/HealthMonitor.java @@ -7,8 +7,6 @@ import com.azure.cosmos.models.CosmosQueryRequestOptions; import com.azure.cosmos.models.PartitionKey; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.JsonNodeFactory; -import com.fasterxml.jackson.databind.node.ObjectNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.publisher.Mono; @@ -37,11 +35,9 @@ public final class HealthMonitor { private static final Logger log = LoggerFactory.getLogger(HealthMonitor.class); private static final String RECONCILIATION_CONTAINER = "reconciliation"; - private static final String HEALTH_CONTAINER = "soak-health"; private final CosmosAsyncClient client; private final CosmosAsyncContainer reconContainer; - private final CosmosAsyncContainer healthContainer; private final String runId; private final int gapSlaMinutes; @@ -59,10 +55,6 @@ public HealthMonitor(TestConfig config, String runId, int gapSlaMinutes) { this.reconContainer = client .getDatabase(config.database()) .getContainer(RECONCILIATION_CONTAINER); - - this.healthContainer = client - .getDatabase(config.database()) - .getContainer(HEALTH_CONTAINER); } /** @@ -116,10 +108,6 @@ public int runChecks() { healthy = false; } - // Write health snapshot - writeHealthSnapshot(now, producedCount, avadConsumed, - lvConsumed, gapCount, parityGaps, missingPrev, healthy); - String status = healthy ? "✅ HEALTHY" : "❌ UNHEALTHY"; log.info(" Status: {}", status); return healthy ? 0 : 1; @@ -223,31 +211,6 @@ private long countMissingPreviousImage() { } } - private void writeHealthSnapshot(Instant timestamp, - long produced, long avadConsumed, long lvConsumed, - long gapCount, long parityGaps, long missingPrev, - boolean healthy) { - ObjectNode doc = JsonNodeFactory.instance.objectNode(); - doc.put("id", "health-" + timestamp.toString()); - doc.put("runId", runId); - doc.put("timestamp", timestamp.toString()); - doc.put("producedCount", produced); - doc.put("avadConsumedCount", avadConsumed); - doc.put("lvConsumedCount", lvConsumed); - doc.put("gapCount", gapCount); - doc.put("parityGaps", parityGaps); - doc.put("missingPreviousImage", missingPrev); - doc.put("status", healthy ? "HEALTHY" : "UNHEALTHY"); - - try { - healthContainer.upsertItem(doc, new PartitionKey(runId), null) - .block(Duration.ofSeconds(10)); - log.info(" Health snapshot written"); - } catch (Exception e) { - log.warn("Failed to write health snapshot: {}", e.getMessage()); - } - } - public void close() { log.info("Closing HealthMonitor..."); client.close(); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java index cce8f55ad49f..821c2866d67e 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -83,7 +83,7 @@ public void record(String eventId, long seqNo, String opType, for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { container.upsertItem(doc, new PartitionKey(eventId), options) - .block(Duration.ofSeconds(10)); + .block(); writeCount.increment(); return; } catch (Exception e) { From 691819d0c4f45610de6bfffced630760090fe238 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 16:50:10 -0400 Subject: [PATCH 21/28] Redesign reconciliation: Cosmos-only, per-reader, 5 sources Remove EventLog (CSV file logging) entirely. Single reconciliation path through Cosmos 'reconciliation' container. Reconciler rewritten to query Cosmos by source field: - 5 source types: ingestor, cfp-lv, cfp-avad, spark-lv, spark-avad - Per-reader gap detection, LSN/CRTS ordering, previousImage checks - Auto-selects checks by source type (AVAD gets CRTS + previousImage) - Skips sources that don't exist yet (e.g., spark-* before Spark runs) CLI modes: --mode reconcile --source ingestor --against cfp-avad --mode reconcile --full (runs all 8 check pairs) Removed: EventLog.java, producedLogFile/consumedLogFile config, --produced/--consumed/--lv/--avad CLI args, logging section from config.json. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/config.json | 4 - .../java/com/azure/cosmos/avadtest/Main.java | 32 +- .../cosmos/avadtest/config/TestConfig.java | 15 +- .../cosmos/avadtest/ingestor/Ingestor.java | 5 - .../cosmos/avadtest/reader/AvadReader.java | 8 - .../avadtest/reader/LatestVersionReader.java | 8 - .../avadtest/reconciliation/EventLog.java | 80 ---- .../avadtest/reconciliation/Reconciler.java | 410 ++++++++++++------ 8 files changed, 287 insertions(+), 275 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json index 8999f4ec292b..cb04c2793f4d 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/config.json @@ -13,9 +13,5 @@ "logicalPartitionCount": 1000, "durationSeconds": 1800, "workerCount": 2 - }, - "logging": { - "producedLogFile": "produced.log", - "consumedLogFile": "consumed.log" } } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java index 74433b8779b7..b4facf95a42b 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/Main.java @@ -21,17 +21,14 @@ public final class Main { description = "Mode: ingestor, lv-reader, avad-reader, reconcile, health-monitor") private String mode; - @Parameter(names = "--produced", description = "Produced log file (for reconcile mode)") - private String producedFile; + @Parameter(names = "--source", description = "Source to reconcile from (e.g., ingestor)") + private String reconcileSource; - @Parameter(names = "--consumed", description = "Consumed log file (for reconcile mode)") - private String consumedFile; + @Parameter(names = "--against", description = "Source to reconcile against (e.g., cfp-avad)") + private String reconcileAgainst; - @Parameter(names = "--lv", description = "LV consumed log file (for parity check)") - private String lvFile; - - @Parameter(names = "--avad", description = "AVAD consumed log file (for parity check)") - private String avadFile; + @Parameter(names = "--full", description = "Run full reconciliation suite") + private boolean reconcileFull; @Parameter(names = "--health-port", description = "Health server port (default: 8080)") private int healthPort = 8080; @@ -129,13 +126,16 @@ private int runHealthMonitor() throws Exception { } private int runReconcile() throws Exception { - if (producedFile != null && consumedFile != null) { - return Reconciler.reconcile(producedFile, consumedFile); - } else if (lvFile != null && avadFile != null) { - return Reconciler.parity(lvFile, avadFile); - } else { - log.error("Reconcile mode requires either --produced + --consumed or --lv + --avad"); - return 1; + TestConfig config = loadConfig(); + try (Reconciler reconciler = new Reconciler(config)) { + if (reconcileFull) { + return reconciler.runFullSuite(); + } else if (reconcileSource != null && reconcileAgainst != null) { + return reconciler.reconcilePair(reconcileSource, reconcileAgainst); + } else { + log.error("Reconcile mode requires --full or --source + --against"); + return 1; + } } } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java index a7881966223d..effb372748b3 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/config/TestConfig.java @@ -32,8 +32,6 @@ public final class TestConfig { private final int opsPerSec; private final int docSizeBytes; private final int logicalPartitionCount; - private final String producedLogFile; - private final String consumedLogFile; private final int durationSeconds; private final int workerCount; @@ -48,8 +46,6 @@ private TestConfig(Builder builder) { this.opsPerSec = builder.opsPerSec; this.docSizeBytes = builder.docSizeBytes; this.logicalPartitionCount = builder.logicalPartitionCount; - this.producedLogFile = builder.producedLogFile; - this.consumedLogFile = builder.consumedLogFile; this.durationSeconds = builder.durationSeconds; this.workerCount = builder.workerCount; } @@ -61,7 +57,6 @@ public static TestConfig fromJson(String filePath) throws IOException { JsonNode root = MAPPER.readTree(new File(filePath)); JsonNode cosmos = root.path("cosmos"); JsonNode ingestor = root.path("ingestor"); - JsonNode logging = root.path("logging"); return new Builder() .endpoint(resolve("COSMOS_ENDPOINT", textOrNull(cosmos, "endpoint"), null)) @@ -74,8 +69,6 @@ public static TestConfig fromJson(String filePath) throws IOException { .opsPerSec(resolveInt("OPS_PER_SEC", intOrNull(ingestor, "opsPerSec"), 5000)) .docSizeBytes(resolveInt("DOC_SIZE_BYTES", intOrNull(ingestor, "docSizeBytes"), 1024)) .logicalPartitionCount(resolveInt("LOGICAL_PARTITION_COUNT", intOrNull(ingestor, "logicalPartitionCount"), 100000)) - .producedLogFile(resolve("PRODUCED_LOG", textOrNull(logging, "producedLogFile"), "produced.log")) - .consumedLogFile(resolve("CONSUMED_LOG", textOrNull(logging, "consumedLogFile"), "consumed.log")) .durationSeconds(resolveInt("DURATION_SECONDS", intOrNull(ingestor, "durationSeconds"), 3600)) .workerCount(resolveInt("WORKER_COUNT", intOrNull(ingestor, "workerCount"), 2)) .build(); @@ -96,8 +89,6 @@ public static TestConfig fromEnv() { .opsPerSec(Integer.parseInt(envOrDefault("OPS_PER_SEC", "5000"))) .docSizeBytes(Integer.parseInt(envOrDefault("DOC_SIZE_BYTES", "1024"))) .logicalPartitionCount(Integer.parseInt(envOrDefault("LOGICAL_PARTITION_COUNT", "100000"))) - .producedLogFile(envOrDefault("PRODUCED_LOG", "produced.log")) - .consumedLogFile(envOrDefault("CONSUMED_LOG", "consumed.log")) .durationSeconds(Integer.parseInt(envOrDefault("DURATION_SECONDS", "3600"))) .workerCount(Integer.parseInt(envOrDefault("WORKER_COUNT", "2"))) .build(); @@ -161,14 +152,12 @@ public String readerEndpoint() { public int opsPerSec() { return opsPerSec; } public int docSizeBytes() { return docSizeBytes; } public int logicalPartitionCount() { return logicalPartitionCount; } - public String producedLogFile() { return producedLogFile; } - public String consumedLogFile() { return consumedLogFile; } public int durationSeconds() { return durationSeconds; } public int workerCount() { return workerCount; } public static final class Builder { private String endpoint, regionalEndpoint, key, database, feedContainer, leaseContainer; - private String preferredRegion, producedLogFile, consumedLogFile; + private String preferredRegion; private int opsPerSec, docSizeBytes, logicalPartitionCount, durationSeconds, workerCount; public Builder endpoint(String v) { this.endpoint = v; return this; } @@ -181,8 +170,6 @@ public static final class Builder { public Builder opsPerSec(int v) { this.opsPerSec = v; return this; } public Builder docSizeBytes(int v) { this.docSizeBytes = v; return this; } public Builder logicalPartitionCount(int v) { this.logicalPartitionCount = v; return this; } - public Builder producedLogFile(String v) { this.producedLogFile = v; return this; } - public Builder consumedLogFile(String v) { this.consumedLogFile = v; return this; } public Builder durationSeconds(int v) { this.durationSeconds = v; return this; } public Builder workerCount(int v) { this.workerCount = v; return this; } public TestConfig build() { return new TestConfig(this); } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index b68967d7207a..dab33f9f1ce4 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -4,7 +4,6 @@ import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosClientBuilder; import com.azure.cosmos.avadtest.config.TestConfig; -import com.azure.cosmos.avadtest.reconciliation.EventLog; import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; import com.azure.cosmos.models.CosmosBulkExecutionOptions; import com.azure.cosmos.models.CosmosBulkOperationResponse; @@ -44,7 +43,6 @@ public final class Ingestor implements AutoCloseable { private final TestConfig config; private final CosmosAsyncClient client; private final CosmosAsyncContainer container; - private final EventLog eventLog; private final ReconciliationWriter reconWriter; private final AtomicLong seqCounter = new AtomicLong(0); private final AtomicBoolean running = new AtomicBoolean(true); @@ -65,7 +63,6 @@ public final class Ingestor implements AutoCloseable { public Ingestor(TestConfig config) throws Exception { this.config = config; - this.eventLog = new EventLog(config.producedLogFile()); this.recentDocIds = new String[10_000]; this.opsPerTick = Math.max(1, config.opsPerSec() * TICK_INTERVAL_MS / 1000); this.bulkOptions = new CosmosBulkExecutionOptions(); @@ -228,7 +225,6 @@ private void handleBulkResponse(CosmosBulkOperationResponse response, if (response.getResponse() != null && response.getResponse().isSuccessStatusCode()) { successCount.increment(); - eventLog.logProduced(meta.eventId, meta.seq, meta.opType, meta.pk, meta.ts); reconWriter.record(meta.eventId, meta.seq, meta.opType, meta.pk, -1, false, -1); if (!"delete".equals(meta.opType)) { trackRecentId(meta.docId + "|" + meta.pk); @@ -282,7 +278,6 @@ public void close() { log.info("Closing Ingestor..."); running.set(false); if (progressSubscription != null) { progressSubscription.dispose(); } - try { eventLog.close(); } catch (Exception e) { /* ignore */ } reconWriter.close(); client.close(); log.info("Ingestor closed. Total ops: {}, success: {}, failures: {}", diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index a9c3a44a671c..5d7ce50fd4e4 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -4,7 +4,6 @@ import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosClientBuilder; import com.azure.cosmos.avadtest.config.TestConfig; -import com.azure.cosmos.avadtest.reconciliation.EventLog; import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; import com.azure.cosmos.models.ChangeFeedProcessorOptions; import com.azure.cosmos.models.ChangeFeedProcessorItem; @@ -42,7 +41,6 @@ public final class AvadReader implements AutoCloseable { private final CosmosAsyncClient client; private final CosmosAsyncContainer feedContainer; private final CosmosAsyncContainer leaseContainer; - private final EventLog eventLog; private final ReconciliationWriter reconWriter; private final List processors = new ArrayList<>(); @@ -54,8 +52,6 @@ public final class AvadReader implements AutoCloseable { public AvadReader(TestConfig config) throws Exception { this.config = config; - this.eventLog = new EventLog(config.consumedLogFile()); - this.client = new CosmosClientBuilder() .endpoint(config.readerEndpoint()) .key(config.key()) @@ -168,11 +164,8 @@ private void handleChanges(List items) { } } - eventLog.logConsumedAvad(eventId, seqNo, opType, pk, timestamp, lsn, crts); reconWriter.record(eventId, seqNo, opType, pk, lsn, hasPrevious, crts); } - - eventLog.flush(); } private void logCorrectnessReport() { @@ -199,7 +192,6 @@ public void close() { for (ChangeFeedProcessor p : processors) { try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } } - try { eventLog.close(); } catch (Exception e) { /* ignore */ } reconWriter.close(); client.close(); log.info("AvadReader closed ({} workers)", processors.size()); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index fe31bacce555..c522848f2e0e 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -4,7 +4,6 @@ import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosClientBuilder; import com.azure.cosmos.avadtest.config.TestConfig; -import com.azure.cosmos.avadtest.reconciliation.EventLog; import com.azure.cosmos.avadtest.reconciliation.ReconciliationWriter; import com.azure.cosmos.models.ChangeFeedProcessorOptions; import com.azure.cosmos.models.ChangeFeedProcessorItem; @@ -36,14 +35,11 @@ public final class LatestVersionReader implements AutoCloseable { private final CosmosAsyncClient client; private final CosmosAsyncContainer feedContainer; private final CosmosAsyncContainer leaseContainer; - private final EventLog eventLog; private final ReconciliationWriter reconWriter; private final List processors = new ArrayList<>(); public LatestVersionReader(TestConfig config) throws Exception { this.config = config; - this.eventLog = new EventLog(config.consumedLogFile()); - this.client = new CosmosClientBuilder() .endpoint(config.readerEndpoint()) .key(config.key()) @@ -118,11 +114,8 @@ private void handleChanges(List items) { String timestamp = getTextOrEmpty(current, "timestamp"); long lsn = metadata != null ? metadata.getLogSequenceNumber() : -1; - eventLog.logConsumed(eventId, seqNo, opType, pk, timestamp, lsn); reconWriter.record(eventId, seqNo, opType, pk, lsn, false, -1); } - - eventLog.flush(); } private static String getTextOrEmpty(JsonNode node, String field) { @@ -135,7 +128,6 @@ public void close() { for (ChangeFeedProcessor p : processors) { try { p.stop().block(Duration.ofSeconds(30)); } catch (Exception e) { /* ignore */ } } - try { eventLog.close(); } catch (Exception e) { /* ignore */ } reconWriter.close(); client.close(); log.info("LatestVersionReader closed ({} workers)", processors.size()); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java deleted file mode 100644 index 47c471f8aabb..000000000000 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/EventLog.java +++ /dev/null @@ -1,80 +0,0 @@ -package com.azure.cosmos.avadtest.reconciliation; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.*; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.concurrent.locks.ReentrantLock; - -/** - * Append-only event log for reconciliation. - * - * Format per line: - * correlationId,seqNo,opType,partitionKey,timestamp,lsn[,crts] - * - * Ingestor writes: correlationId,seqNo,opType,partitionKey,timestamp, - * LV reader writes: correlationId,seqNo,opType,partitionKey,timestamp,lsn - * AVAD reader writes: correlationId,seqNo,opType,partitionKey,timestamp,lsn,crts - */ -public final class EventLog implements AutoCloseable { - - private static final Logger log = LoggerFactory.getLogger(EventLog.class); - - private final BufferedWriter writer; - private final ReentrantLock lock = new ReentrantLock(); - - public EventLog(String filePath) throws IOException { - Path path = Paths.get(filePath); - this.writer = Files.newBufferedWriter(path, - StandardOpenOption.CREATE, StandardOpenOption.APPEND, StandardOpenOption.WRITE); - log.info("EventLog opened: {}", path.toAbsolutePath()); - } - - public void logProduced(String correlationId, long seqNo, String opType, - String partitionKey, String timestamp) { - writeLine(String.format("%s,%d,%s,%s,%s,", correlationId, seqNo, opType, partitionKey, timestamp)); - } - - public void logConsumed(String correlationId, long seqNo, String opType, - String partitionKey, String timestamp, long lsn) { - writeLine(String.format("%s,%d,%s,%s,%s,%d", correlationId, seqNo, opType, partitionKey, timestamp, lsn)); - } - - public void logConsumedAvad(String correlationId, long seqNo, String opType, - String partitionKey, String timestamp, long lsn, long crts) { - writeLine(String.format("%s,%d,%s,%s,%s,%d,%d", correlationId, seqNo, opType, partitionKey, timestamp, lsn, crts)); - } - - private void writeLine(String line) { - lock.lock(); - try { - writer.write(line); - writer.newLine(); - } catch (IOException e) { - log.error("Failed to write event log", e); - } finally { - lock.unlock(); - } - } - - public void flush() { - lock.lock(); - try { - writer.flush(); - } catch (IOException e) { - log.error("Failed to flush event log", e); - } finally { - lock.unlock(); - } - } - - @Override - public void close() throws IOException { - flush(); - writer.close(); - } -} diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java index 6853f002ec88..08b64970e86e 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/Reconciler.java @@ -1,212 +1,342 @@ package com.azure.cosmos.avadtest.reconciliation; +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.avadtest.config.TestConfig; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.SqlParameter; +import com.azure.cosmos.models.SqlQuerySpec; +import com.fasterxml.jackson.databind.JsonNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.time.Duration; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; /** - * Reconciler that compares produced vs consumed event logs. - * Uses eventId (unique per operation) for per-event reconciliation. - * - * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn[,crts] - * - * Checks: - * 1. Gap detection — every produced eventId must appear in consumed - * 2. LV ↔ AVAD parity — every LV event must appear in AVAD (AVAD ⊇ LV) - * 3. Ordering — LSN must be monotonically increasing per partitionKey - * 4. CRTS ordering — CRTS must be monotonically increasing per partitionKey (AVAD only) - * - * Exit code: 0 = all checks pass, 1 = failures detected + * Reconciler that queries the shared "reconciliation" Cosmos container + * to detect gaps, ordering violations, and missing previousImage across + * all source types (ingestor, cfp-lv, cfp-avad, spark-lv, spark-avad). */ -public final class Reconciler { +public final class Reconciler implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(Reconciler.class); + private static final Duration QUERY_TIMEOUT = Duration.ofSeconds(60); - public static int reconcile(String producedFile, String consumedFile) throws IOException { - log.info("=== Gap Detection: {} vs {} ===", producedFile, consumedFile); + private final CosmosAsyncClient client; + private final CosmosAsyncContainer container; - Set produced = loadEventIds(producedFile); - Set consumed = loadEventIds(consumedFile); + public Reconciler(TestConfig config) { + this.client = new CosmosClientBuilder() + .endpoint(config.endpoint()) + .key(config.key()) + .gatewayMode() + .contentResponseOnWriteEnabled(true) + .preferredRegions(config.preferredRegions()) + .buildAsyncClient(); - Set missing = new HashSet<>(produced); - missing.removeAll(consumed); + this.container = client + .getDatabase(config.database()) + .getContainer("reconciliation"); - // Count duplicates (at-least-once delivery) - long totalConsumedLines = Files.lines(Paths.get(consumedFile)).filter(l -> !l.trim().isEmpty()).count(); - long duplicates = totalConsumedLines - consumed.size(); + log.info("Reconciler initialized: endpoint={}, db={}", config.endpoint(), config.database()); + } - log.info("Produced: {} unique events", produced.size()); - log.info("Consumed: {} unique events ({} total lines, {} duplicates)", - consumed.size(), totalConsumedLines, duplicates); - log.info("Missing (gaps): {}", missing.size()); + /** Run all reconciliation checks across all source pairs. */ + public int runFullSuite() { + log.info("=== Full Reconciliation Suite ==="); + int failures = 0; - if (!missing.isEmpty()) { - log.error("❌ MISSED CHANGES DETECTED:"); - missing.stream().limit(50).forEach(id -> log.error(" missing: {}", id)); - if (missing.size() > 50) { - log.error(" ... and {} more", missing.size() - 50); - } + logSummary(); + + // Gap detection: ingestor → each consumer + failures += checkGaps("ingestor", "cfp-lv", "Ingestor → CFP LV"); + failures += checkGaps("ingestor", "cfp-avad", "Ingestor → CFP AVAD"); + failures += checkGaps("ingestor", "spark-lv", "Ingestor → Spark LV"); + failures += checkGaps("ingestor", "spark-avad", "Ingestor → Spark AVAD"); + + // Parity: LV ⊆ AVAD + failures += checkGaps("cfp-lv", "cfp-avad", "CFP Parity (AVAD ⊇ LV)"); + failures += checkGaps("spark-lv", "spark-avad", "Spark Parity (AVAD ⊇ LV)"); + + // Cross-engine + failures += checkGaps("cfp-lv", "spark-lv", "Cross-engine LV"); + failures += checkGaps("cfp-avad", "spark-avad", "Cross-engine AVAD"); + + // LSN ordering + for (String s : new String[]{"cfp-lv", "cfp-avad", "spark-lv", "spark-avad"}) { + failures += checkLsnOrdering(s); } - int orderViolations = checkOrderingByLsn(consumedFile); - int crtsViolations = checkOrderingByCrts(consumedFile); + // CRTS ordering (AVAD only) + failures += checkCrtsOrdering("cfp-avad"); + failures += checkCrtsOrdering("spark-avad"); - boolean passed = missing.isEmpty() && orderViolations == 0 && crtsViolations == 0; - log.info(passed ? "✅ All checks passed" : "❌ Checks FAILED"); - return passed ? 0 : 1; - } + // previousImage (AVAD only) + failures += checkPreviousImage("cfp-avad"); + failures += checkPreviousImage("spark-avad"); - public static int parity(String lvFile, String avadFile) throws IOException { - log.info("=== LV ↔ AVAD Parity: {} vs {} ===", lvFile, avadFile); + logDuplicates(); - Set lvIds = loadEventIds(lvFile); - Set avadIds = loadEventIds(avadFile); + log.info("=== Suite Complete: {} failures ===", failures); + return failures > 0 ? 1 : 0; + } - Set missingInAvad = new HashSet<>(lvIds); - missingInAvad.removeAll(avadIds); + /** Reconcile a single source pair. Auto-selects checks by source types. */ + public int reconcilePair(String source, String against) { + log.info("=== Reconcile: {} → {} ===", source, against); + int failures = 0; - Set avadOnly = new HashSet<>(avadIds); - avadOnly.removeAll(lvIds); + failures += checkGaps(source, against, source + " → " + against); - log.info("LV events: {}", lvIds.size()); - log.info("AVAD events: {}", avadIds.size()); - log.info("Missing in AVAD (should be 0): {}", missingInAvad.size()); - log.info("AVAD-only events (deletes, extra versions): {}", avadOnly.size()); + // LSN ordering on the consumer side + if (!against.equals("ingestor")) { + failures += checkLsnOrdering(against); + } - if (!missingInAvad.isEmpty()) { - log.error("❌ AVAD MISSING LV EVENTS:"); - missingInAvad.stream().limit(50).forEach(id -> log.error(" missing: {}", id)); + // AVAD-specific checks + if (against.endsWith("-avad")) { + failures += checkCrtsOrdering(against); + failures += checkPreviousImage(against); } - boolean passed = missingInAvad.isEmpty(); - log.info(passed ? "✅ Parity check passed (AVAD ⊇ LV)" : "❌ Parity check FAILED"); - return passed ? 0 : 1; + return failures > 0 ? 1 : 0; } - /** Loads unique eventIds (first field per line). */ - private static Set loadEventIds(String file) throws IOException { - try (Stream lines = Files.lines(Paths.get(file))) { - return lines - .filter(l -> !l.trim().isEmpty()) - .map(l -> l.split(",")[0]) - .collect(Collectors.toSet()); - } + /** Q1: Summary dashboard — count, unique, min/max seq/lsn per source */ + private void logSummary() { + log.info("=== Summary Dashboard ==="); + String query = "SELECT c.source, COUNT(1) AS total, " + + "COUNT(DISTINCT c.correlationId) AS uniqueIds, " + + "MIN(c.seqNo) AS minSeq, MAX(c.seqNo) AS maxSeq, " + + "MIN(c.lsn) AS minLsn, MAX(c.lsn) AS maxLsn " + + "FROM c GROUP BY c.source"; + + container.queryItems(query, new CosmosQueryRequestOptions(), JsonNode.class) + .byPage(100) + .timeout(QUERY_TIMEOUT) + .toIterable() + .forEach(page -> { + for (JsonNode row : page.getResults()) { + log.info(" source={}, total={}, unique={}, seq=[{},{}], lsn=[{},{}]", + row.path("source").asText(), + row.path("total").asLong(), + row.path("uniqueIds").asLong(), + row.path("minSeq").asLong(), + row.path("maxSeq").asLong(), + row.path("minLsn").asLong(), + row.path("maxLsn").asLong()); + } + }); } - /** - * Check that LSN is monotonically increasing per partitionKey. - * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn - * Sorts by seqNo (delivery order), then verifies LSN is non-decreasing. - */ - private static int checkOrderingByLsn(String consumedFile) throws IOException { - log.info("=== LSN Ordering Check: {} ===", consumedFile); - - Map> recordsByPk = new HashMap<>(); - - try (BufferedReader reader = new BufferedReader(new FileReader(consumedFile))) { - String line; - while ((line = reader.readLine()) != null) { - if (line.trim().isEmpty()) continue; - String[] parts = line.split(","); - if (parts.length < 6) continue; - - String pk = parts[3]; - long seqNo = Long.parseLong(parts[1]); - long lsn = parts[5].trim().isEmpty() ? -1 : Long.parseLong(parts[5]); - if (lsn < 0) continue; - - recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) - .add(new long[]{seqNo, lsn}); + /** Q2/Q3/Q4: Gap detection — every correlationId in sourceA must exist in sourceB */ + private int checkGaps(String sourceA, String sourceB, String label) { + log.info("=== Gap Check: {} ===", label); + + Set idsA = loadCorrelationIds(sourceA); + Set idsB = loadCorrelationIds(sourceB); + + if (idsA.isEmpty()) { + log.info(" SKIP: {} has no data yet", sourceA); + return 0; + } + if (idsB.isEmpty()) { + log.info(" SKIP: {} has no data yet", sourceB); + return 0; + } + + Set missing = new HashSet<>(idsA); + missing.removeAll(idsB); + + log.info(" {} ids={}, {} ids={}, missing={}", sourceA, idsA.size(), sourceB, idsB.size(), missing.size()); + + if (!missing.isEmpty()) { + log.error("❌ {} GAPS DETECTED:", label); + missing.stream().limit(50).forEach(id -> log.error(" missing: {}", id)); + if (missing.size() > 50) { + log.error(" ... and {} more", missing.size() - 50); } + } else { + log.info("✅ {} — no gaps", label); + } + + return missing.size(); + } + + /** Q5: LSN ordering — per partition, sorted by seqNo, LSN must be non-decreasing */ + private int checkLsnOrdering(String source) { + log.info("=== LSN Ordering: {} ===", source); + Map> events = loadEventsForOrdering(source, "lsn"); + + if (events.isEmpty()) { + log.info(" SKIP: {} has no LSN data", source); + return 0; } int violations = 0; - for (Map.Entry> entry : recordsByPk.entrySet()) { + for (Map.Entry> entry : events.entrySet()) { String pk = entry.getKey(); List records = entry.getValue(); - // Sort by seqNo (delivery order), then check LSN is non-decreasing records.sort(Comparator.comparingLong(r -> r[0])); - long prevLsn = -1; + long prev = -1; for (long[] record : records) { - if (prevLsn > 0 && record[1] < prevLsn) { + if (prev > 0 && record[1] < prev) { violations++; if (violations <= 10) { - log.warn("LSN ordering violation: PK={}, seqNo={}, prevLsn={}, currLsn={}", - pk, record[0], prevLsn, record[1]); + log.warn(" LSN violation: pk={}, seqNo={}, prevLsn={}, currLsn={}", + pk, record[0], prev, record[1]); } } - prevLsn = record[1]; + prev = record[1]; } } - log.info("LSN ordering violations: {} (across {} partition keys)", - violations, recordsByPk.size()); + log.info(" LSN violations: {} (across {} partitions)", violations, events.size()); return violations; } - /** - * Check that CRTS is monotonically increasing per partitionKey. - * Line format: eventId,seqNo,opType,partitionKey,timestamp,lsn,crts - * Only applies to AVAD logs (7 columns). Lines without CRTS are skipped. - * Sorts by seqNo (delivery order), then verifies CRTS is non-decreasing. - */ - private static int checkOrderingByCrts(String consumedFile) throws IOException { - log.info("=== CRTS Ordering Check: {} ===", consumedFile); - - Map> recordsByPk = new HashMap<>(); - - try (BufferedReader reader = new BufferedReader(new FileReader(consumedFile))) { - String line; - while ((line = reader.readLine()) != null) { - if (line.trim().isEmpty()) continue; - String[] parts = line.split(","); - if (parts.length < 7) continue; - - String pk = parts[3]; - long seqNo = Long.parseLong(parts[1]); - long crts = parts[6].trim().isEmpty() ? -1 : Long.parseLong(parts[6]); - if (crts < 0) continue; - - recordsByPk.computeIfAbsent(pk, k -> new ArrayList<>()) - .add(new long[]{seqNo, crts}); - } - } + /** Q6: CRTS ordering — per partition, sorted by seqNo, CRTS must be non-decreasing */ + private int checkCrtsOrdering(String source) { + log.info("=== CRTS Ordering: {} ===", source); + Map> events = loadEventsForOrdering(source, "crts"); - if (recordsByPk.isEmpty()) { - log.info("No CRTS data found (not an AVAD log?), skipping check"); + if (events.isEmpty()) { + log.info(" SKIP: {} has no CRTS data", source); return 0; } int violations = 0; - for (Map.Entry> entry : recordsByPk.entrySet()) { + for (Map.Entry> entry : events.entrySet()) { String pk = entry.getKey(); List records = entry.getValue(); - // Sort by seqNo (delivery order), then check CRTS is non-decreasing records.sort(Comparator.comparingLong(r -> r[0])); - long prevCrts = -1; + long prev = -1; for (long[] record : records) { - if (prevCrts > 0 && record[1] < prevCrts) { + if (prev > 0 && record[1] < prev) { violations++; if (violations <= 10) { - log.warn("CRTS ordering violation: PK={}, seqNo={}, prevCrts={}, currCrts={}", - pk, record[0], prevCrts, record[1]); + log.warn(" CRTS violation: pk={}, seqNo={}, prevCrts={}, currCrts={}", + pk, record[0], prev, record[1]); } } - prevCrts = record[1]; + prev = record[1]; } } - log.info("CRTS ordering violations: {} (across {} partition keys)", - violations, recordsByPk.size()); + log.info(" CRTS violations: {} (across {} partitions)", violations, events.size()); return violations; } + + /** Q7: previousImage — replace/delete with hasPreviousImage=false must be 0 */ + private int checkPreviousImage(String source) { + log.info("=== Previous Image Check: {} ===", source); + + SqlQuerySpec querySpec = new SqlQuerySpec( + "SELECT VALUE COUNT(1) FROM c WHERE c.source = @source " + + "AND c.opType IN ('replace', 'delete') AND c.hasPreviousImage = false", + Collections.singletonList(new SqlParameter("@source", source))); + + long count = container.queryItems(querySpec, new CosmosQueryRequestOptions(), Long.class) + .byPage(1) + .timeout(QUERY_TIMEOUT) + .toIterable() + .iterator().next() + .getResults() + .stream() + .findFirst() + .orElse(0L); + + if (count > 0) { + log.error("❌ {} missing previousImage on {} replace/delete events", source, count); + } else { + log.info("✅ {} — all replace/delete have previousImage", source); + } + + return (int) count; + } + + /** Q8: Duplicate detection — total vs unique correlationIds per source */ + private void logDuplicates() { + log.info("=== Duplicate Detection ==="); + String query = "SELECT c.source, COUNT(1) AS total, " + + "COUNT(DISTINCT c.correlationId) AS uniqueIds " + + "FROM c GROUP BY c.source"; + + container.queryItems(query, new CosmosQueryRequestOptions(), JsonNode.class) + .byPage(100) + .timeout(QUERY_TIMEOUT) + .toIterable() + .forEach(page -> { + for (JsonNode row : page.getResults()) { + long total = row.path("total").asLong(); + long unique = row.path("uniqueIds").asLong(); + long duplicates = total - unique; + log.info(" source={}, total={}, unique={}, duplicates={}", + row.path("source").asText(), total, unique, duplicates); + } + }); + } + + /** Helper: load all distinct correlationIds for a source */ + private Set loadCorrelationIds(String source) { + SqlQuerySpec querySpec = new SqlQuerySpec( + "SELECT DISTINCT c.correlationId FROM c WHERE c.source = @source", + Collections.singletonList(new SqlParameter("@source", source))); + + Set ids = new HashSet<>(); + + container.queryItems(querySpec, new CosmosQueryRequestOptions(), JsonNode.class) + .byPage(1000) + .timeout(QUERY_TIMEOUT) + .toIterable() + .forEach(page -> { + for (JsonNode row : page.getResults()) { + String cid = row.path("correlationId").asText(""); + if (!cid.isEmpty()) { + ids.add(cid); + } + } + }); + + return ids; + } + + /** Helper: load events for ordering checks */ + private Map> loadEventsForOrdering(String source, String field) { + String query = "SELECT c.seqNo, c." + field + ", c.partitionKey FROM c " + + "WHERE c.source = @source AND c." + field + " >= 0"; + + SqlQuerySpec querySpec = new SqlQuerySpec(query, + Collections.singletonList(new SqlParameter("@source", source))); + + Map> result = new HashMap<>(); + + container.queryItems(querySpec, new CosmosQueryRequestOptions(), JsonNode.class) + .byPage(1000) + .timeout(QUERY_TIMEOUT) + .toIterable() + .forEach(page -> { + for (JsonNode row : page.getResults()) { + String pk = row.path("partitionKey").asText(""); + long seqNo = row.path("seqNo").asLong(); + long fieldValue = row.path(field).asLong(); + + result.computeIfAbsent(pk, k -> new ArrayList<>()) + .add(new long[]{seqNo, fieldValue}); + } + }); + + return result; + } + + @Override + public void close() { + client.close(); + log.info("Reconciler closed"); + } } From dbc1146cf3e35bdd8d7301ddf7a0ed573eaeb147 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 16:58:08 -0400 Subject: [PATCH 22/28] Add Spark LV and AVAD change feed reader notebooks Databricks-compatible PySpark notebooks that read the change feed using azure-cosmos-spark connector and write to the reconciliation container with source='spark-lv' and source='spark-avad'. spark_lv_reader.py: - Incremental (Latest Version) mode - Structured streaming with 10s trigger - Writes correlationId, seqNo, opType, partitionKey, lsn spark_avad_reader.py: - Full Fidelity (AVAD) mode - Extracts operationType, lsn, crts from metadata - Checks previousImage on replace/delete - Handles delete tombstones (extracts fields from previous image) - Includes ad-hoc correctness check cell Both write to the reconciliation container using the same doc schema as the Java CFP readers, enabling cross-engine parity checks via the Reconciler's --full suite. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/spark/spark_avad_reader.py | 232 ++++++++++++++++++ .../avad-soak/spark/spark_lv_reader.py | 138 +++++++++++ 2 files changed, 370 insertions(+) create mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py create mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py new file mode 100644 index 000000000000..c231f045c6f5 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py @@ -0,0 +1,232 @@ +# Databricks notebook source +# MAGIC %md +# MAGIC # Spark AVAD (Full Fidelity) Change Feed Reader +# MAGIC +# MAGIC Reads the change feed in **All Versions and Deletes (Full Fidelity)** mode +# MAGIC using `azure-cosmos-spark` connector and writes consumed events to the +# MAGIC `reconciliation` container with `source = "spark-avad"`. +# MAGIC +# MAGIC ## AVAD-Specific Validations +# MAGIC - Extracts `operationType` from change feed metadata +# MAGIC - Checks `previousImage` presence on replace/delete events +# MAGIC - Captures CRTS (conflict resolution timestamp) from metadata +# MAGIC +# MAGIC ## Prerequisites +# MAGIC - Databricks cluster with `azure-cosmos-spark_3-4_2-12` (or compatible) installed +# MAGIC - Cosmos DB account with AVAD-enabled `avad-test` container and `reconciliation` container +# MAGIC - Cluster env vars: `COSMOS_ENDPOINT`, `COSMOS_KEY` + +# COMMAND ---------- + +# Configuration +cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") if "cosmos_endpoint" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.endpoint", "") +cosmos_key = dbutils.widgets.get("cosmos_key") if "cosmos_key" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.key", "") +database = dbutils.widgets.get("database") if "database" in [w.name for w in dbutils.widgets.getAll()] else "graph_db" +feed_container = "avad-test" +recon_container = "reconciliation" + +if not cosmos_endpoint or not cosmos_key: + import os + cosmos_endpoint = os.environ.get("COSMOS_ENDPOINT", "") + cosmos_key = os.environ.get("COSMOS_KEY", "") + +assert cosmos_endpoint, "Set COSMOS_ENDPOINT" +assert cosmos_key, "Set COSMOS_KEY" + +print(f"Endpoint: {cosmos_endpoint}") +print(f"Database: {database}") +print(f"Feed container: {feed_container}") + +# COMMAND ---------- + +# Spark Cosmos config — read change feed in Full Fidelity (AVAD) mode +feed_cfg = { + "spark.cosmos.accountEndpoint": cosmos_endpoint, + "spark.cosmos.accountKey": cosmos_key, + "spark.cosmos.database": database, + "spark.cosmos.container": feed_container, + "spark.cosmos.read.partitioning.strategy": "Default", + "spark.cosmos.changeFeed.mode": "FullFidelity", + "spark.cosmos.changeFeed.startFrom": "Now", + "spark.cosmos.changeFeed.itemCountPerTriggerHint": "1000", +} + +# Write config — reconciliation container +recon_cfg = { + "spark.cosmos.accountEndpoint": cosmos_endpoint, + "spark.cosmos.accountKey": cosmos_key, + "spark.cosmos.database": database, + "spark.cosmos.container": recon_container, + "spark.cosmos.write.strategy": "ItemOverwrite", + "spark.cosmos.write.bulk.enabled": "true", +} + +# COMMAND ---------- + +from pyspark.sql.functions import ( + col, lit, concat, current_timestamp, coalesce, + when, get_json_object +) +from pyspark.sql.types import StringType, LongType, BooleanType + +SOURCE = "spark-avad" + +# Read change feed as streaming DataFrame — Full Fidelity mode +raw_df = ( + spark.readStream + .format("cosmos.oltp.changeFeed") + .options(**feed_cfg) + .load() +) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Schema Notes +# MAGIC +# MAGIC In Full Fidelity mode, the Spark connector exposes: +# MAGIC - `_rawBody` — the full JSON of the change feed item +# MAGIC - `current` — the current document state (null for deletes) +# MAGIC - `previous` — the previous document state (present on replace/delete) +# MAGIC - `metadata` — change feed metadata including `operationType`, `lsn`, `crts` + +# COMMAND ---------- + +# Transform to reconciliation schema +# For AVAD, we need to handle: +# 1. operationType from metadata (create/replace/delete) +# 2. previousImage check (must be present on replace/delete) +# 3. CRTS from metadata +# 4. For deletes, extract fields from previous image (current is tombstone) + +recon_df = ( + raw_df + .withColumn("_opType", + coalesce( + get_json_object(col("_rawBody"), "$.metadata.operationType"), + lit("unknown") + ).cast(StringType()) + ) + .withColumn("_lsnVal", + coalesce( + get_json_object(col("_rawBody"), "$.metadata.lsn").cast(LongType()), + lit(-1) + ) + ) + .withColumn("_crtsVal", + coalesce( + get_json_object(col("_rawBody"), "$.metadata.crts").cast(LongType()), + lit(-1) + ) + ) + .withColumn("_hasPrevious", + get_json_object(col("_rawBody"), "$.previous").isNotNull() + ) + # For deletes, use previous image fields; otherwise use current + .withColumn("_eventId", + when(col("_opType") == "delete", + get_json_object(col("_rawBody"), "$.previous.eventId")) + .otherwise(coalesce(col("eventId"), lit(""))) + ) + .withColumn("_seqNo", + when(col("_opType") == "delete", + get_json_object(col("_rawBody"), "$.previous.seqNo").cast(LongType())) + .otherwise(coalesce(col("seqNo").cast(LongType()), lit(-1))) + ) + .withColumn("_pk", + when(col("_opType") == "delete", + get_json_object(col("_rawBody"), "$.previous.tenantId")) + .otherwise(coalesce(col("tenantId"), lit(""))) + ) + .filter(col("_eventId").isNotNull() & (col("_eventId") != "")) + .select( + concat(lit(SOURCE + "-"), col("_eventId")).alias("id"), + col("_eventId").alias("correlationId"), + lit(SOURCE).alias("source"), + col("_seqNo").alias("seqNo"), + col("_opType").alias("opType"), + col("_pk").alias("partitionKey"), + col("_lsnVal").alias("lsn"), + col("_hasPrevious").cast(BooleanType()).alias("hasPreviousImage"), + col("_crtsVal").alias("crts"), + current_timestamp().cast(StringType()).alias("timestamp"), + ) +) + +# COMMAND ---------- + +# Write to reconciliation container as a streaming job +query = ( + recon_df.writeStream + .format("cosmos.oltp") + .options(**recon_cfg) + .option("checkpointLocation", f"/tmp/cosmos-avad-soak/spark-avad-checkpoint") + .outputMode("append") + .trigger(processingTime="10 seconds") + .start() +) + +print(f"Spark AVAD streaming query started: {query.id}") +print(f"Status: {query.status}") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Monitor +# MAGIC Run this cell periodically to check progress and AVAD correctness. + +# COMMAND ---------- + +# Check streaming query progress +if query.isActive: + progress = query.lastProgress + if progress: + print(f"Batch: {progress['batchId']}") + print(f"Input rows: {progress['numInputRows']}") + print(f"Processing time: {progress['batchDuration']} ms") + else: + print("No progress yet — waiting for first batch") +else: + print(f"Query stopped. Exception: {query.exception()}") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### AVAD Correctness Check (ad-hoc) +# MAGIC Query the reconciliation container directly to check previousImage counts. + +# COMMAND ---------- + +# Ad-hoc correctness check — read reconciliation container +recon_read_cfg = { + "spark.cosmos.accountEndpoint": cosmos_endpoint, + "spark.cosmos.accountKey": cosmos_key, + "spark.cosmos.database": database, + "spark.cosmos.container": recon_container, + "spark.cosmos.read.partitioning.strategy": "Default", +} + +recon_data = spark.read.format("cosmos.oltp").options(**recon_read_cfg).load() + +# previousImage check for spark-avad +missing_prev = ( + recon_data + .filter((col("source") == SOURCE) & col("opType").isin("replace", "delete") & (col("hasPreviousImage") == False)) + .count() +) +total_avad = recon_data.filter(col("source") == SOURCE).count() + +print(f"Spark AVAD total events: {total_avad}") +print(f"Missing previousImage: {missing_prev}") +print("✅ OK" if missing_prev == 0 else f"❌ {missing_prev} events missing previousImage") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Stop +# MAGIC Run this cell to stop the streaming query gracefully. + +# COMMAND ---------- + +# query.stop() +# print("Spark AVAD query stopped") diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py new file mode 100644 index 000000000000..409b16b3ddf9 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py @@ -0,0 +1,138 @@ +# Databricks notebook source +# MAGIC %md +# MAGIC # Spark Latest Version Change Feed Reader +# MAGIC +# MAGIC Reads the change feed in **Latest Version (Incremental)** mode using +# MAGIC `azure-cosmos-spark` connector and writes consumed events to the +# MAGIC `reconciliation` container with `source = "spark-lv"`. +# MAGIC +# MAGIC ## Prerequisites +# MAGIC - Databricks cluster with `azure-cosmos-spark_3-4_2-12` (or compatible) installed +# MAGIC - Cosmos DB account with `avad-test` and `reconciliation` containers +# MAGIC - Cluster env vars: `COSMOS_ENDPOINT`, `COSMOS_KEY` + +# COMMAND ---------- + +# Configuration +cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") if "cosmos_endpoint" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.endpoint", "") +cosmos_key = dbutils.widgets.get("cosmos_key") if "cosmos_key" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.key", "") +database = dbutils.widgets.get("database") if "database" in [w.name for w in dbutils.widgets.getAll()] else "graph_db" +feed_container = "avad-test" +recon_container = "reconciliation" + +if not cosmos_endpoint or not cosmos_key: + import os + cosmos_endpoint = os.environ.get("COSMOS_ENDPOINT", "") + cosmos_key = os.environ.get("COSMOS_KEY", "") + +assert cosmos_endpoint, "Set COSMOS_ENDPOINT" +assert cosmos_key, "Set COSMOS_KEY" + +print(f"Endpoint: {cosmos_endpoint}") +print(f"Database: {database}") +print(f"Feed container: {feed_container}") + +# COMMAND ---------- + +# Spark Cosmos config — read change feed in incremental (LV) mode +feed_cfg = { + "spark.cosmos.accountEndpoint": cosmos_endpoint, + "spark.cosmos.accountKey": cosmos_key, + "spark.cosmos.database": database, + "spark.cosmos.container": feed_container, + "spark.cosmos.read.partitioning.strategy": "Default", + "spark.cosmos.changeFeed.mode": "Incremental", + "spark.cosmos.changeFeed.startFrom": "Beginning", + "spark.cosmos.changeFeed.itemCountPerTriggerHint": "1000", +} + +# Write config — reconciliation container +recon_cfg = { + "spark.cosmos.accountEndpoint": cosmos_endpoint, + "spark.cosmos.accountKey": cosmos_key, + "spark.cosmos.database": database, + "spark.cosmos.container": recon_container, + "spark.cosmos.write.strategy": "ItemOverwrite", + "spark.cosmos.write.bulk.enabled": "true", +} + +# COMMAND ---------- + +from pyspark.sql.functions import col, lit, concat, current_timestamp, coalesce +from pyspark.sql.types import StringType, LongType, BooleanType + +SOURCE = "spark-lv" + +# Read change feed as streaming DataFrame +raw_df = ( + spark.readStream + .format("cosmos.oltp.changeFeed") + .options(**feed_cfg) + .load() +) + +# Transform to reconciliation schema +recon_df = ( + raw_df + .select( + concat(lit(SOURCE + "-"), col("eventId")).alias("id"), + col("eventId").alias("correlationId"), + lit(SOURCE).alias("source"), + coalesce(col("seqNo"), lit(-1)).cast(LongType()).alias("seqNo"), + coalesce(col("operationType"), lit("unknown")).alias("opType"), + coalesce(col("tenantId"), lit("")).alias("partitionKey"), + coalesce(col("_lsn"), lit(-1)).cast(LongType()).alias("lsn"), + lit(False).cast(BooleanType()).alias("hasPreviousImage"), + lit(-1).cast(LongType()).alias("crts"), + current_timestamp().cast(StringType()).alias("timestamp"), + ) + .filter(col("correlationId").isNotNull()) +) + +# COMMAND ---------- + +# Write to reconciliation container as a streaming job +query = ( + recon_df.writeStream + .format("cosmos.oltp") + .options(**recon_cfg) + .option("checkpointLocation", f"/tmp/cosmos-avad-soak/spark-lv-checkpoint") + .outputMode("append") + .trigger(processingTime="10 seconds") + .start() +) + +print(f"Spark LV streaming query started: {query.id}") +print(f"Status: {query.status}") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Monitor +# MAGIC Run this cell periodically to check progress. + +# COMMAND ---------- + +# Check streaming query progress +if query.isActive: + progress = query.lastProgress + if progress: + print(f"Batch: {progress['batchId']}") + print(f"Input rows: {progress['numInputRows']}") + print(f"Processing time: {progress['batchDuration']} ms") + print(f"Sources: {progress['sources']}") + else: + print("No progress yet — waiting for first batch") +else: + print(f"Query stopped. Exception: {query.exception()}") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Stop +# MAGIC Run this cell to stop the streaming query gracefully. + +# COMMAND ---------- + +# query.stop() +# print("Spark LV query stopped") From 876db3a31945654c7fc1b24534783128dcb51488 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 17:30:43 -0400 Subject: [PATCH 23/28] Add Spark-based reconciler notebook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Java reconciler chokes on cross-partition SELECT DISTINCT over millions of docs. Spark handles this natively with bulk reads + DataFrame operations. spark_reconciler.py runs all 8 reconciliation checks: Q1: Summary dashboard (count/unique/min/max per source) Q2: Gap detection (ingestor → each of 4 consumers) Q3: Parity (cfp-lv ⊆ cfp-avad, spark-lv ⊆ spark-avad) Q4: Cross-engine (cfp ↔ spark, both modes) Q5: LSN ordering per partition (window function + lag) Q6: CRTS ordering per partition (AVAD only) Q7: previousImage validation (AVAD only) Q8: Duplicate detection (at-least-once rate) Skips sources with 0 events (e.g., spark-* before notebooks run). Returns PASS/FAIL via dbutils.notebook.exit for job orchestration. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/spark/spark_reconciler.py | 296 ++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py new file mode 100644 index 000000000000..125cfb69a7e4 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py @@ -0,0 +1,296 @@ +# Databricks notebook source +# MAGIC %md +# MAGIC # AVAD Soak Test — Spark Reconciler +# MAGIC +# MAGIC Runs all reconciliation checks against the `reconciliation` container using PySpark. +# MAGIC Handles millions of docs efficiently via bulk read + DataFrame operations. +# MAGIC +# MAGIC ## Checks +# MAGIC | # | Check | Sources | +# MAGIC |---|-------|---------| +# MAGIC | Q1 | Summary dashboard | All | +# MAGIC | Q2 | Gap detection (producer → consumer) | ingestor → cfp-lv, cfp-avad, spark-lv, spark-avad | +# MAGIC | Q3 | Parity (LV ⊆ AVAD) | cfp-lv → cfp-avad, spark-lv → spark-avad | +# MAGIC | Q4 | Cross-engine parity | cfp-lv ↔ spark-lv, cfp-avad ↔ spark-avad | +# MAGIC | Q5 | LSN ordering per partition | cfp-lv, cfp-avad, spark-lv, spark-avad | +# MAGIC | Q6 | CRTS ordering per partition | cfp-avad, spark-avad | +# MAGIC | Q7 | previousImage validation | cfp-avad, spark-avad | +# MAGIC | Q8 | Duplicate detection | All | +# MAGIC +# MAGIC ## Prerequisites +# MAGIC - `azure-cosmos-spark_3-4_2-12` connector installed on cluster +# MAGIC - Cluster env vars or widgets: `COSMOS_ENDPOINT`, `COSMOS_KEY` + +# COMMAND ---------- + +import os + +cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") if "cosmos_endpoint" in [w.name for w in dbutils.widgets.getAll()] else os.environ.get("COSMOS_ENDPOINT", "") +cosmos_key = dbutils.widgets.get("cosmos_key") if "cosmos_key" in [w.name for w in dbutils.widgets.getAll()] else os.environ.get("COSMOS_KEY", "") +database = dbutils.widgets.get("database") if "database" in [w.name for w in dbutils.widgets.getAll()] else "graph_db" + +assert cosmos_endpoint, "Set COSMOS_ENDPOINT" +assert cosmos_key, "Set COSMOS_KEY" + +recon_cfg = { + "spark.cosmos.accountEndpoint": cosmos_endpoint, + "spark.cosmos.accountKey": cosmos_key, + "spark.cosmos.database": database, + "spark.cosmos.container": "reconciliation", + "spark.cosmos.read.partitioning.strategy": "Default", +} + +print(f"Endpoint: {cosmos_endpoint}") +print(f"Database: {database}") + +# COMMAND ---------- + +# Load entire reconciliation container into a cached DataFrame +recon = ( + spark.read + .format("cosmos.oltp") + .options(**recon_cfg) + .load() + .select("id", "correlationId", "source", "seqNo", "opType", + "partitionKey", "lsn", "hasPreviousImage", "crts", "timestamp") + .cache() +) + +total = recon.count() +print(f"Total reconciliation docs: {total:,}") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q1 — Summary Dashboard + +# COMMAND ---------- + +from pyspark.sql.functions import count, countDistinct, min as spark_min, max as spark_max, col + +summary = ( + recon + .groupBy("source") + .agg( + count("*").alias("totalEvents"), + countDistinct("correlationId").alias("uniqueEvents"), + spark_min("seqNo").alias("minSeq"), + spark_max("seqNo").alias("maxSeq"), + spark_min("lsn").alias("minLsn"), + spark_max("lsn").alias("maxLsn"), + ) + .orderBy("source") +) + +summary.show(truncate=False) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q2 — Gap Detection (Ingestor → Each Consumer) + +# COMMAND ---------- + +from pyspark.sql.functions import lit + +failures = 0 + +def check_gaps(source_a, source_b, label): + global failures + ids_a = recon.filter(col("source") == source_a).select("correlationId").distinct() + ids_b = recon.filter(col("source") == source_b).select("correlationId").distinct() + + count_a = ids_a.count() + count_b = ids_b.count() + + if count_a == 0: + print(f" ⏭️ {label}: {source_a} has 0 events — skipping") + return + if count_b == 0: + print(f" ⏭️ {label}: {source_b} has 0 events — skipping") + return + + missing = ids_a.subtract(ids_b) + gap_count = missing.count() + + status = "✅" if gap_count == 0 else "❌" + print(f" {status} {label}: {count_a:,} produced, {count_b:,} consumed, {gap_count:,} gaps") + + if gap_count > 0: + failures += 1 + print(f" Sample missing IDs:") + for row in missing.limit(10).collect(): + print(f" {row.correlationId}") + +print("=== Gap Detection ===") +check_gaps("ingestor", "cfp-lv", "Ingestor → CFP LV") +check_gaps("ingestor", "cfp-avad", "Ingestor → CFP AVAD") +check_gaps("ingestor", "spark-lv", "Ingestor → Spark LV") +check_gaps("ingestor", "spark-avad","Ingestor → Spark AVAD") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q3 — Parity (LV ⊆ AVAD) + +# COMMAND ---------- + +print("=== Parity (AVAD ⊇ LV) ===") +check_gaps("cfp-lv", "cfp-avad", "CFP Parity") +check_gaps("spark-lv", "spark-avad", "Spark Parity") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q4 — Cross-Engine Parity (CFP ↔ Spark) + +# COMMAND ---------- + +print("=== Cross-Engine Parity ===") +check_gaps("cfp-lv", "spark-lv", "LV: CFP → Spark") +check_gaps("spark-lv", "cfp-lv", "LV: Spark → CFP") +check_gaps("cfp-avad", "spark-avad", "AVAD: CFP → Spark") +check_gaps("spark-avad","cfp-avad", "AVAD: Spark → CFP") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q5 — LSN Ordering Per Partition + +# COMMAND ---------- + +from pyspark.sql.window import Window +from pyspark.sql.functions import lag, sum as spark_sum, when + +def check_lsn_ordering(source): + global failures + events = recon.filter((col("source") == source) & (col("lsn") >= 0)) + event_count = events.count() + + if event_count == 0: + print(f" ⏭️ {source}: no events with LSN — skipping") + return + + w = Window.partitionBy("partitionKey").orderBy("seqNo") + violations = ( + events + .withColumn("prevLsn", lag("lsn").over(w)) + .filter(col("prevLsn").isNotNull() & (col("lsn") < col("prevLsn"))) + .count() + ) + + status = "✅" if violations == 0 else "❌" + print(f" {status} {source}: {event_count:,} events, {violations:,} LSN ordering violations") + if violations > 0: + failures += 1 + +print("=== LSN Ordering ===") +for s in ["cfp-lv", "cfp-avad", "spark-lv", "spark-avad"]: + check_lsn_ordering(s) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q6 — CRTS Ordering Per Partition (AVAD Only) + +# COMMAND ---------- + +def check_crts_ordering(source): + global failures + events = recon.filter((col("source") == source) & (col("crts") >= 0)) + event_count = events.count() + + if event_count == 0: + print(f" ⏭️ {source}: no events with CRTS — skipping") + return + + w = Window.partitionBy("partitionKey").orderBy("seqNo") + violations = ( + events + .withColumn("prevCrts", lag("crts").over(w)) + .filter(col("prevCrts").isNotNull() & (col("crts") < col("prevCrts"))) + .count() + ) + + status = "✅" if violations == 0 else "❌" + print(f" {status} {source}: {event_count:,} events, {violations:,} CRTS ordering violations") + if violations > 0: + failures += 1 + +print("=== CRTS Ordering ===") +check_crts_ordering("cfp-avad") +check_crts_ordering("spark-avad") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q7 — previousImage Validation (AVAD Only) + +# COMMAND ---------- + +def check_previous_image(source): + global failures + missing = ( + recon + .filter( + (col("source") == source) & + col("opType").isin("replace", "delete") & + (col("hasPreviousImage") == False) + ) + .count() + ) + + total_rd = ( + recon + .filter( + (col("source") == source) & + col("opType").isin("replace", "delete") + ) + .count() + ) + + status = "✅" if missing == 0 else "❌" + print(f" {status} {source}: {total_rd:,} replace/delete events, {missing:,} missing previousImage") + if missing > 0: + failures += 1 + +print("=== previousImage Validation ===") +check_previous_image("cfp-avad") +check_previous_image("spark-avad") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Q8 — Duplicate Detection (At-Least-Once) + +# COMMAND ---------- + +print("=== Duplicate Detection ===") +dupes = ( + recon + .groupBy("source") + .agg( + count("*").alias("total"), + countDistinct("correlationId").alias("unique"), + ) + .withColumn("duplicates", col("total") - col("unique")) + .withColumn("dupeRate", (col("duplicates") / col("total") * 100).cast("decimal(5,2)")) + .orderBy("source") +) + +dupes.show(truncate=False) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Final Verdict + +# COMMAND ---------- + +if failures == 0: + print("✅ ALL CHECKS PASSED") +else: + print(f"❌ {failures} CHECK(S) FAILED") + +# Return exit-like status for job runners +dbutils.notebook.exit("PASS" if failures == 0 else f"FAIL:{failures}") From d422e16c4acb56a23d824ccc3c0d12bbb9c8915f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 17:54:36 -0400 Subject: [PATCH 24/28] Fix Dockerfile: use -cp with explicit main class, local build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fat jar's manifest mainClass is com.azure.cosmos.benchmark.Main (the benchmark's main), not our avadtest.Main. Use -cp instead of -jar so we can specify the correct main class. Simplified to single-stage Dockerfile — build fat jar locally first (mvn package -Dpackage-with-dependencies), then COPY into runtime image. Avoids dependency resolution failures in ACR Tasks. Updated setup-acr.sh to build locally before pushing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/Dockerfile | 17 +++---------- .../avad-soak/infra/scripts/setup-acr.sh | 25 +++++++++++++++++-- .../reconciliation/ReconciliationWriter.java | 2 +- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile index 4461a1beed81..80c929ae9a7a 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/Dockerfile @@ -1,16 +1,7 @@ -# Multi-stage build for cosmos-avad-test soak runner +# Single-stage build for cosmos-avad-test soak runner # Build context: azure-cosmos-benchmark/ (module root) -# JDK 21 for production, Maven for build stage +# Requires: mvn package run locally first (produces fat jar) -# --- Build stage --- -FROM maven:3.9-eclipse-temurin-21 AS build -WORKDIR /build -COPY pom.xml . -RUN mvn dependency:go-offline -B -COPY src/ src/ -RUN mvn package -DskipTests -B - -# --- Runtime stage --- FROM eclipse-temurin:21-jre-jammy WORKDIR /app @@ -19,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ && rm -rf /var/lib/apt/lists/* -COPY --from=build /build/target/azure-cosmos-benchmark-*-jar-with-dependencies.jar /app/app.jar +COPY target/azure-cosmos-benchmark-*-jar-with-dependencies.jar /app/app.jar # Health endpoint port EXPOSE 8080 @@ -27,4 +18,4 @@ EXPOSE 8080 # JVM tuning for container environments ENV JAVA_OPTS="-XX:+UseContainerSupport -XX:MaxRAMPercentage=75.0 -XX:+ExitOnOutOfMemoryError" -ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -jar /app/app.jar $0 $@"] +ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -cp /app/app.jar com.azure.cosmos.avadtest.Main $0 $@"] diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh index 180446105582..7987c1b193bc 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/infra/scripts/setup-acr.sh @@ -2,6 +2,14 @@ # ============================================================================= # ACR Setup + Image Build/Push # ============================================================================= +# Builds the benchmark module locally (fat jar), then pushes a minimal +# runtime image to ACR. No multi-stage Docker build needed — avoids +# dependency resolution issues with internal SDK modules. +# +# Usage: +# ./setup-acr.sh +# IMAGE_TAG=v2 ./setup-acr.sh +# ============================================================================= set -euo pipefail @@ -32,9 +40,22 @@ az aks update \ --attach-acr "$ACR_NAME" \ --output none 2>/dev/null || echo "AKS-ACR attachment skipped" -echo "=== Building + pushing image ===" +echo "=== Building module locally ===" + +cd "$PROJECT_DIR" +mvn package -DskipTests -DskipCheckstyle -Dspotbugs.skip=true -Drevapi.skip=true -B -q + +# Verify the fat jar exists +FAT_JAR=$(ls target/azure-cosmos-benchmark-*-jar-with-dependencies.jar 2>/dev/null | head -1) +if [ -z "$FAT_JAR" ]; then + echo "ERROR: Fat jar not found. Check maven-shade/assembly plugin config." + exit 1 +fi +echo "Fat jar: $FAT_JAR" + +echo "=== Pushing image to ACR ===" -# Build and push using ACR Tasks (no local Docker needed) +# Build and push using ACR Tasks — context is the module root (has target/ + Dockerfile path) az acr build \ --registry "$ACR_NAME" \ --image "${IMAGE_NAME}:${IMAGE_TAG}" \ diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java index 821c2866d67e..f6e5a2254627 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -83,7 +83,7 @@ public void record(String eventId, long seqNo, String opType, for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { container.upsertItem(doc, new PartitionKey(eventId), options) - .block(); + .block(); writeCount.increment(); return; } catch (Exception e) { From d06c8ce115630630637b92c17068185ccf4693cb Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 19:54:59 -0400 Subject: [PATCH 25/28] Tune CFP throughput: bulk recon writes, faster polling, region alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ReconciliationWriter now uses bulk upsert (executeBulkOperations) instead of per-event blocking upserts. Events are buffered via record(), then flushed as a single bulk batch at the end of each handleChanges callback. Still synchronous — flush blocks until all writes complete, so lease doesn't advance until persisted. CFP tuning: - feedPollDelay: 1s → 100ms (tighter polling loop) - maxItemCount: default(100) → 1000 (larger pages per poll) - preferredRegion should match AKS region (East US) AKS result: ~14x improvement (1,405 → 19,398 ops in 2 min). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cosmos/avadtest/ingestor/Ingestor.java | 2 + .../cosmos/avadtest/reader/AvadReader.java | 5 +- .../avadtest/reader/LatestVersionReader.java | 5 +- .../reconciliation/ReconciliationWriter.java | 96 ++++++++++--------- 4 files changed, 60 insertions(+), 48 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java index dab33f9f1ce4..e92fc13dde1b 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/ingestor/Ingestor.java @@ -161,6 +161,8 @@ private void executeBulkBatch() { container.executeBulkOperations(Flux.fromIterable(operations), bulkOptions) .toStream() .forEach(response -> handleBulkResponse(response, opToMeta)); + + reconWriter.flush(); } private void addCreate(List ops, Map opToMeta) { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java index 5d7ce50fd4e4..f102859c9ab8 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/AvadReader.java @@ -82,7 +82,8 @@ public void run() throws InterruptedException { final int workerIdx = i; ChangeFeedProcessorOptions options = new ChangeFeedProcessorOptions(); options.setLeasePrefix(LEASE_PREFIX); - options.setFeedPollDelay(Duration.ofSeconds(1)); + options.setFeedPollDelay(Duration.ofMillis(100)); + options.setMaxItemCount(1000); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() .hostName("avad-host-" + ManagementFactory.getRuntimeMXBean().getName() + "-w" + workerIdx) @@ -166,6 +167,8 @@ private void handleChanges(List items) { reconWriter.record(eventId, seqNo, opType, pk, lsn, hasPrevious, crts); } + + reconWriter.flush(); } private void logCorrectnessReport() { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java index c522848f2e0e..4fb1acb7c048 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reader/LatestVersionReader.java @@ -70,7 +70,8 @@ public void run() throws InterruptedException { final int workerIdx = i; ChangeFeedProcessorOptions options = new ChangeFeedProcessorOptions(); options.setLeasePrefix(LEASE_PREFIX); - options.setFeedPollDelay(Duration.ofSeconds(1)); + options.setFeedPollDelay(Duration.ofMillis(100)); + options.setMaxItemCount(1000); ChangeFeedProcessor processor = new ChangeFeedProcessorBuilder() .hostName("lv-host-" + ManagementFactory.getRuntimeMXBean().getName() + "-w" + i) @@ -116,6 +117,8 @@ private void handleChanges(List items) { reconWriter.record(eventId, seqNo, opType, pk, lsn, false, -1); } + + reconWriter.flush(); } private static String getTextOrEmpty(JsonNode node, String field) { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java index f6e5a2254627..c96d602364f6 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/avadtest/reconciliation/ReconciliationWriter.java @@ -3,24 +3,27 @@ import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosException; -import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; -import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; -import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosBulkExecutionOptions; +import com.azure.cosmos.models.CosmosBulkOperationResponse; +import com.azure.cosmos.models.CosmosBulkOperations; +import com.azure.cosmos.models.CosmosItemOperation; import com.azure.cosmos.models.PartitionKey; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; -import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.atomic.LongAdder; /** - * Writes reconciliation events to a shared Cosmos container. - * Synchronous — blocks until the write succeeds or is permanently dropped. - * This ensures CFP does not checkpoint the lease until the reconciliation - * record is persisted. + * Writes reconciliation events to a shared Cosmos container using bulk upsert. + * Collects events via {@link #add}, then flushes them as a single bulk batch + * via {@link #flush}. The caller must flush after processing each CFP batch + * to ensure all recon records are persisted before the lease checkpoints. * * Container: "reconciliation" in same database * Partition key: /correlationId @@ -29,17 +32,15 @@ public final class ReconciliationWriter implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(ReconciliationWriter.class); private static final String RECONCILIATION_CONTAINER = "reconciliation"; - private static final int MAX_RETRIES = 3; - private static final long RETRY_BASE_MS = 500; - - private static final CosmosEndToEndOperationLatencyPolicyConfig E2E_POLICY = - new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(6)).build(); private final String source; private final CosmosAsyncContainer container; + private final CosmosBulkExecutionOptions bulkOptions = new CosmosBulkExecutionOptions(); private final LongAdder writeCount = new LongAdder(); private final LongAdder errorCount = new LongAdder(); - private final LongAdder retryCount = new LongAdder(); + + // Batch buffer — not thread-safe; callers (handleChanges) are single-threaded per partition + private final List pending = new ArrayList<>(); /** * @param client shared CosmosAsyncClient — caller owns lifecycle @@ -58,10 +59,8 @@ public ReconciliationWriter(CosmosAsyncClient client, String database, String so } /** - * Record a produced or consumed event for reconciliation. - * Blocks until the write succeeds or all retries are exhausted. - * Throws on permanent failure so the caller (CFP handleChanges) can - * fail the batch and prevent the lease continuation from advancing. + * Buffer a reconciliation event for the next bulk flush. + * Does not write to Cosmos — call {@link #flush} after the batch. */ public void record(String eventId, long seqNo, String opType, String partitionKey, long lsn, boolean hasPreviousImage, long crts) { @@ -77,36 +76,39 @@ public void record(String eventId, long seqNo, String opType, doc.put("crts", crts); doc.put("timestamp", Instant.now().toString()); - CosmosItemRequestOptions options = new CosmosItemRequestOptions(); - options.setCosmosEndToEndOperationLatencyPolicyConfig(E2E_POLICY); - - for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - container.upsertItem(doc, new PartitionKey(eventId), options) - .block(); - writeCount.increment(); - return; - } catch (Exception e) { - if (!isRetryable(e) || attempt == MAX_RETRIES) { + pending.add(CosmosBulkOperations.getUpsertItemOperation(doc, new PartitionKey(eventId))); + } + + /** + * Flush all buffered events to the reconciliation container via bulk upsert. + * Blocks until all writes complete. Throws on any permanent failure so + * CFP's handleChanges fails and the lease does not advance. + */ + public void flush() { + if (pending.isEmpty()) return; + + List batch = new ArrayList<>(pending); + pending.clear(); + + List failures = new ArrayList<>(); + + container.executeBulkOperations(Flux.fromIterable(batch), bulkOptions) + .toStream() + .forEach(response -> { + if (response.getResponse() != null && response.getResponse().isSuccessStatusCode()) { + writeCount.increment(); + } else { errorCount.increment(); - throw new RuntimeException( - "Reconciliation write failed after " + (attempt + 1) + " attempts: id=" + doc.get("id").asText(), e); + int status = response.getResponse() != null ? response.getResponse().getStatusCode() : -1; + String id = response.getOperation().getId(); + failures.add("id=" + id + " status=" + status); } - retryCount.increment(); - long backoff = RETRY_BASE_MS * (1L << attempt); - log.warn("Reconciliation write retry {} for id={}: {}", attempt + 1, - doc.get("id").asText(), e.getMessage()); - try { Thread.sleep(backoff); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); return; } - } - } - } + }); - private boolean isRetryable(Throwable e) { - if (e instanceof CosmosException) { - int status = ((CosmosException) e).getStatusCode(); - return status != 404 && status != 401 && status != 403; + if (!failures.isEmpty()) { + throw new RuntimeException( + "Reconciliation bulk flush failed: " + failures.size() + " errors. First: " + failures.get(0)); } - return true; } public long getWriteCount() { return writeCount.sum(); } @@ -114,7 +116,9 @@ private boolean isRetryable(Throwable e) { @Override public void close() { - log.info("ReconciliationWriter closed: source={}, writes={}, retries={}, errors={}", - source, writeCount.sum(), retryCount.sum(), errorCount.sum()); + // Flush any remaining buffered events + try { flush(); } catch (Exception e) { log.warn("Final flush failed: {}", e.getMessage()); } + log.info("ReconciliationWriter closed: source={}, writes={}, errors={}", + source, writeCount.sum(), errorCount.sum()); } } From 5faaa4e95fd90cef4581585531deb6a0b1142953 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 20:53:11 -0400 Subject: [PATCH 26/28] Fix Spark notebooks: widget config, checkpoint path, column schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Config: use try/except for dbutils.widgets.get() instead of getAll().name (which returns strings, not objects) - Checkpoint: use /Workspace/ path instead of /tmp/ (DBFS disabled) - LV reader: remove _lsn column reference (not exposed by connector) - AVAD reader: remove _rawBody/previous/metadata references — Spark connector flattens AVAD events to the same schema as LV. Use available columns directly. Verified: spark-lv wrote 60,801 docs to reconciliation container. spark-avad running but needs live ingestor data (startFrom=Now). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/spark/spark_avad_reader.py | 109 ++++++------------ .../avad-soak/spark/spark_lv_reader.py | 33 ++++-- .../avad-soak/spark/spark_reconciler.py | 23 +++- 3 files changed, 75 insertions(+), 90 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py index c231f045c6f5..bfe2ef8778f2 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py @@ -18,20 +18,29 @@ # COMMAND ---------- -# Configuration -cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") if "cosmos_endpoint" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.endpoint", "") -cosmos_key = dbutils.widgets.get("cosmos_key") if "cosmos_key" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.key", "") -database = dbutils.widgets.get("database") if "database" in [w.name for w in dbutils.widgets.getAll()] else "graph_db" -feed_container = "avad-test" -recon_container = "reconciliation" +# Configuration — reads from notebook widgets (set via job parameters or manually) +import os -if not cosmos_endpoint or not cosmos_key: - import os +try: + cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") +except: cosmos_endpoint = os.environ.get("COSMOS_ENDPOINT", "") + +try: + cosmos_key = dbutils.widgets.get("cosmos_key") +except: cosmos_key = os.environ.get("COSMOS_KEY", "") -assert cosmos_endpoint, "Set COSMOS_ENDPOINT" -assert cosmos_key, "Set COSMOS_KEY" +try: + database = dbutils.widgets.get("database") +except: + database = "graph_db" + +feed_container = "avad-test" +recon_container = "reconciliation" + +assert cosmos_endpoint, "Set cosmos_endpoint widget or COSMOS_ENDPOINT env var" +assert cosmos_key, "Set cosmos_key widget or COSMOS_KEY env var" print(f"Endpoint: {cosmos_endpoint}") print(f"Database: {database}") @@ -63,10 +72,7 @@ # COMMAND ---------- -from pyspark.sql.functions import ( - col, lit, concat, current_timestamp, coalesce, - when, get_json_object -) +from pyspark.sql.functions import col, lit, concat, current_timestamp, coalesce from pyspark.sql.types import StringType, LongType, BooleanType SOURCE = "spark-avad" @@ -84,73 +90,32 @@ # MAGIC %md # MAGIC ### Schema Notes # MAGIC -# MAGIC In Full Fidelity mode, the Spark connector exposes: -# MAGIC - `_rawBody` — the full JSON of the change feed item -# MAGIC - `current` — the current document state (null for deletes) -# MAGIC - `previous` — the previous document state (present on replace/delete) -# MAGIC - `metadata` — change feed metadata including `operationType`, `lsn`, `crts` +# MAGIC In Full Fidelity mode, the Spark connector exposes the same columns +# MAGIC as Incremental mode: `id`, `eventId`, `seqNo`, `operationType`, +# MAGIC `tenantId`, `payload`, `timestamp`. The connector flattens the change +# MAGIC feed item — metadata like `lsn` and `crts` are not directly exposed +# MAGIC as columns. previousImage availability depends on container config. # COMMAND ---------- # Transform to reconciliation schema -# For AVAD, we need to handle: -# 1. operationType from metadata (create/replace/delete) -# 2. previousImage check (must be present on replace/delete) -# 3. CRTS from metadata -# 4. For deletes, extract fields from previous image (current is tombstone) - +# The Spark connector flattens AVAD events — use available columns directly. +# LSN/CRTS/previousImage not exposed as columns by the connector. recon_df = ( raw_df - .withColumn("_opType", - coalesce( - get_json_object(col("_rawBody"), "$.metadata.operationType"), - lit("unknown") - ).cast(StringType()) - ) - .withColumn("_lsnVal", - coalesce( - get_json_object(col("_rawBody"), "$.metadata.lsn").cast(LongType()), - lit(-1) - ) - ) - .withColumn("_crtsVal", - coalesce( - get_json_object(col("_rawBody"), "$.metadata.crts").cast(LongType()), - lit(-1) - ) - ) - .withColumn("_hasPrevious", - get_json_object(col("_rawBody"), "$.previous").isNotNull() - ) - # For deletes, use previous image fields; otherwise use current - .withColumn("_eventId", - when(col("_opType") == "delete", - get_json_object(col("_rawBody"), "$.previous.eventId")) - .otherwise(coalesce(col("eventId"), lit(""))) - ) - .withColumn("_seqNo", - when(col("_opType") == "delete", - get_json_object(col("_rawBody"), "$.previous.seqNo").cast(LongType())) - .otherwise(coalesce(col("seqNo").cast(LongType()), lit(-1))) - ) - .withColumn("_pk", - when(col("_opType") == "delete", - get_json_object(col("_rawBody"), "$.previous.tenantId")) - .otherwise(coalesce(col("tenantId"), lit(""))) - ) - .filter(col("_eventId").isNotNull() & (col("_eventId") != "")) .select( - concat(lit(SOURCE + "-"), col("_eventId")).alias("id"), - col("_eventId").alias("correlationId"), + concat(lit(SOURCE + "-"), col("eventId")).alias("id"), + col("eventId").alias("correlationId"), lit(SOURCE).alias("source"), - col("_seqNo").alias("seqNo"), - col("_opType").alias("opType"), - col("_pk").alias("partitionKey"), - col("_lsnVal").alias("lsn"), - col("_hasPrevious").cast(BooleanType()).alias("hasPreviousImage"), - col("_crtsVal").alias("crts"), + coalesce(col("seqNo"), lit(-1)).cast(LongType()).alias("seqNo"), + coalesce(col("operationType"), lit("unknown")).alias("opType"), + coalesce(col("tenantId"), lit("")).alias("partitionKey"), + lit(-1).cast(LongType()).alias("lsn"), + lit(False).cast(BooleanType()).alias("hasPreviousImage"), + lit(-1).cast(LongType()).alias("crts"), current_timestamp().cast(StringType()).alias("timestamp"), ) + .filter(col("correlationId").isNotNull()) ) # COMMAND ---------- @@ -160,7 +125,7 @@ recon_df.writeStream .format("cosmos.oltp") .options(**recon_cfg) - .option("checkpointLocation", f"/tmp/cosmos-avad-soak/spark-avad-checkpoint") + .option("checkpointLocation", f"/Workspace/avad-soak/checkpoints/spark-avad") .outputMode("append") .trigger(processingTime="10 seconds") .start() diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py index 409b16b3ddf9..edc4eab562b6 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_lv_reader.py @@ -13,20 +13,29 @@ # COMMAND ---------- -# Configuration -cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") if "cosmos_endpoint" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.endpoint", "") -cosmos_key = dbutils.widgets.get("cosmos_key") if "cosmos_key" in [w.name for w in dbutils.widgets.getAll()] else spark.conf.get("spark.cosmos.key", "") -database = dbutils.widgets.get("database") if "database" in [w.name for w in dbutils.widgets.getAll()] else "graph_db" -feed_container = "avad-test" -recon_container = "reconciliation" +# Configuration — reads from notebook widgets (set via job parameters or manually) +import os -if not cosmos_endpoint or not cosmos_key: - import os +try: + cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") +except: cosmos_endpoint = os.environ.get("COSMOS_ENDPOINT", "") + +try: + cosmos_key = dbutils.widgets.get("cosmos_key") +except: cosmos_key = os.environ.get("COSMOS_KEY", "") -assert cosmos_endpoint, "Set COSMOS_ENDPOINT" -assert cosmos_key, "Set COSMOS_KEY" +try: + database = dbutils.widgets.get("database") +except: + database = "graph_db" + +feed_container = "avad-test" +recon_container = "reconciliation" + +assert cosmos_endpoint, "Set cosmos_endpoint widget or COSMOS_ENDPOINT env var" +assert cosmos_key, "Set cosmos_key widget or COSMOS_KEY env var" print(f"Endpoint: {cosmos_endpoint}") print(f"Database: {database}") @@ -81,7 +90,7 @@ coalesce(col("seqNo"), lit(-1)).cast(LongType()).alias("seqNo"), coalesce(col("operationType"), lit("unknown")).alias("opType"), coalesce(col("tenantId"), lit("")).alias("partitionKey"), - coalesce(col("_lsn"), lit(-1)).cast(LongType()).alias("lsn"), + lit(-1).cast(LongType()).alias("lsn"), lit(False).cast(BooleanType()).alias("hasPreviousImage"), lit(-1).cast(LongType()).alias("crts"), current_timestamp().cast(StringType()).alias("timestamp"), @@ -96,7 +105,7 @@ recon_df.writeStream .format("cosmos.oltp") .options(**recon_cfg) - .option("checkpointLocation", f"/tmp/cosmos-avad-soak/spark-lv-checkpoint") + .option("checkpointLocation", f"/Workspace/avad-soak/checkpoints/spark-lv") .outputMode("append") .trigger(processingTime="10 seconds") .start() diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py index 125cfb69a7e4..dd365cd32b65 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_reconciler.py @@ -25,12 +25,23 @@ import os -cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") if "cosmos_endpoint" in [w.name for w in dbutils.widgets.getAll()] else os.environ.get("COSMOS_ENDPOINT", "") -cosmos_key = dbutils.widgets.get("cosmos_key") if "cosmos_key" in [w.name for w in dbutils.widgets.getAll()] else os.environ.get("COSMOS_KEY", "") -database = dbutils.widgets.get("database") if "database" in [w.name for w in dbutils.widgets.getAll()] else "graph_db" - -assert cosmos_endpoint, "Set COSMOS_ENDPOINT" -assert cosmos_key, "Set COSMOS_KEY" +try: + cosmos_endpoint = dbutils.widgets.get("cosmos_endpoint") +except: + cosmos_endpoint = os.environ.get("COSMOS_ENDPOINT", "") + +try: + cosmos_key = dbutils.widgets.get("cosmos_key") +except: + cosmos_key = os.environ.get("COSMOS_KEY", "") + +try: + database = dbutils.widgets.get("database") +except: + database = "graph_db" + +assert cosmos_endpoint, "Set cosmos_endpoint widget or COSMOS_ENDPOINT env var" +assert cosmos_key, "Set cosmos_key widget or COSMOS_KEY env var" recon_cfg = { "spark.cosmos.accountEndpoint": cosmos_endpoint, From 432347d584332b93fe7a94c91187b611ddfd9b13 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 5 May 2026 22:15:18 -0400 Subject: [PATCH 27/28] Fix Spark AVAD: use AllVersionsAndDeletes mode, startFrom=Now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Spark connector uses 'AllVersionsAndDeletes' (not 'FullFidelity') as the changeFeed.mode value. startFrom must be 'Now' for AVAD mode. Note: spark-avad requires the container to have changeFeedPolicy enabled (Full Fidelity retention). Without it, the AVAD change feed stream produces no events. Enable via Azure Portal: Container → Settings → Change Feed → Full Fidelity. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py index bfe2ef8778f2..fbf890e4a000 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py @@ -55,7 +55,7 @@ "spark.cosmos.database": database, "spark.cosmos.container": feed_container, "spark.cosmos.read.partitioning.strategy": "Default", - "spark.cosmos.changeFeed.mode": "FullFidelity", + "spark.cosmos.changeFeed.mode": "AllVersionsAndDeletes", "spark.cosmos.changeFeed.startFrom": "Now", "spark.cosmos.changeFeed.itemCountPerTriggerHint": "1000", } From 28b4acdaa9d1045d3087ca53ed3a25a5a3f10d55 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 6 May 2026 10:58:05 -0400 Subject: [PATCH 28/28] Fix Spark AVAD reader: use correct AVAD schema columns Root cause: notebook referenced non-existent columns (eventId, seqNo, tenantId) which are user document fields flattened only in LV mode. In AVAD mode, these are nested inside _rawBody JSON. The filter on correlationId (aliased from null eventId) dropped all rows. Changes: - Use get_json_object(_rawBody, $.eventId) to extract user fields - Use actual AVAD columns: _lsn, operationType, crts, previous - Check previous column for hasPreviousImage instead of hardcoded False - Change startFrom to Beginning (Now returns nothing without active writes) - Document the AVAD schema columns from ChangeFeedTable.scala Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../avad-soak/spark/spark_avad_reader.py | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py index fbf890e4a000..82153706f3db 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py +++ b/sdk/cosmos/azure-cosmos-benchmark/avad-soak/spark/spark_avad_reader.py @@ -56,7 +56,7 @@ "spark.cosmos.container": feed_container, "spark.cosmos.read.partitioning.strategy": "Default", "spark.cosmos.changeFeed.mode": "AllVersionsAndDeletes", - "spark.cosmos.changeFeed.startFrom": "Now", + "spark.cosmos.changeFeed.startFrom": "Beginning", "spark.cosmos.changeFeed.itemCountPerTriggerHint": "1000", } @@ -72,7 +72,7 @@ # COMMAND ---------- -from pyspark.sql.functions import col, lit, concat, current_timestamp, coalesce +from pyspark.sql.functions import col, lit, concat, current_timestamp, coalesce, get_json_object, when from pyspark.sql.types import StringType, LongType, BooleanType SOURCE = "spark-avad" @@ -90,32 +90,42 @@ # MAGIC %md # MAGIC ### Schema Notes # MAGIC -# MAGIC In Full Fidelity mode, the Spark connector exposes the same columns -# MAGIC as Incremental mode: `id`, `eventId`, `seqNo`, `operationType`, -# MAGIC `tenantId`, `payload`, `timestamp`. The connector flattens the change -# MAGIC feed item — metadata like `lsn` and `crts` are not directly exposed -# MAGIC as columns. previousImage availability depends on container config. +# MAGIC In Full Fidelity (AVAD) mode, the Spark connector exposes these +# MAGIC top-level columns (from `ChangeFeedTable.scala`): +# MAGIC - `_rawBody` (String) — current image as raw JSON +# MAGIC - `id` (String) — document id +# MAGIC - `_ts` (Long) — timestamp +# MAGIC - `_etag` (String) — etag +# MAGIC - `_lsn` (Long) — log sequence number +# MAGIC - `metadata` (String) — metadata JSON +# MAGIC - `previous` (String) — previous image as raw JSON +# MAGIC - `operationType` (String) — create / replace / delete +# MAGIC - `crts` (Long) — conflict resolution timestamp +# MAGIC - `previousImageLSN` (Long) — previous image LSN +# MAGIC +# MAGIC User document fields (eventId, tenantId, etc.) are NOT top-level +# MAGIC columns — they are nested inside `_rawBody`. Use `from_json` or +# MAGIC `get_json_object` to extract them. # COMMAND ---------- # Transform to reconciliation schema -# The Spark connector flattens AVAD events — use available columns directly. -# LSN/CRTS/previousImage not exposed as columns by the connector. +# AVAD schema has metadata columns at top level; user fields are in _rawBody JSON. recon_df = ( raw_df .select( - concat(lit(SOURCE + "-"), col("eventId")).alias("id"), - col("eventId").alias("correlationId"), + concat(lit(SOURCE + "-"), col("id"), lit("-"), col("_lsn").cast(StringType())).alias("id"), + get_json_object(col("_rawBody"), "$.eventId").alias("correlationId"), lit(SOURCE).alias("source"), - coalesce(col("seqNo"), lit(-1)).cast(LongType()).alias("seqNo"), + col("_lsn").cast(LongType()).alias("seqNo"), coalesce(col("operationType"), lit("unknown")).alias("opType"), - coalesce(col("tenantId"), lit("")).alias("partitionKey"), - lit(-1).cast(LongType()).alias("lsn"), - lit(False).cast(BooleanType()).alias("hasPreviousImage"), - lit(-1).cast(LongType()).alias("crts"), + get_json_object(col("_rawBody"), "$.tenantId").alias("partitionKey"), + col("_lsn").cast(LongType()).alias("lsn"), + when(col("previous").isNotNull(), True).otherwise(False).cast(BooleanType()).alias("hasPreviousImage"), + col("crts").cast(LongType()).alias("crts"), current_timestamp().cast(StringType()).alias("timestamp"), ) - .filter(col("correlationId").isNotNull()) + .filter(col("id").isNotNull()) ) # COMMAND ---------- @@ -180,9 +190,11 @@ .count() ) total_avad = recon_data.filter(col("source") == SOURCE).count() +has_prev = recon_data.filter((col("source") == SOURCE) & (col("hasPreviousImage") == True)).count() print(f"Spark AVAD total events: {total_avad}") -print(f"Missing previousImage: {missing_prev}") +print(f"Events with previousImage: {has_prev}") +print(f"Missing previousImage (replace/delete): {missing_prev}") print("✅ OK" if missing_prev == 0 else f"❌ {missing_prev} events missing previousImage") # COMMAND ----------