diff --git a/.github/workflows/run-performance-tests.yml b/.github/workflows/run-performance-tests.yml index 3d9519966ae2..6f2ad7f492b5 100644 --- a/.github/workflows/run-performance-tests.yml +++ b/.github/workflows/run-performance-tests.yml @@ -1,7 +1,7 @@ # Performance test workflow to compare a baseline against a candidate DHIS2 version # You can run the workflow using the GitHub CLI like so # gh workflow run run-performance-tests.yml \ -# --field simulation_class="org.hisp.dhis.test.tracker.EnrollmentsTest" \ +# --field simulation_class="org.hisp.dhis.test.tracker.TrackerTest" \ # --field dhis2_image_baseline="dhis2/core:2.42.1" \ # --field dhis2_image_candidate="dhis2/core-dev:latest" # @@ -16,7 +16,7 @@ on: workflow_dispatch: inputs: simulation_class: - description: 'Fully qualified Gatling simulation class to run (e.g., org.hisp.dhis.test.EnrollmentsTest)' + description: 'Fully qualified Gatling simulation class to run (e.g., org.hisp.dhis.test.TrackerTest)' required: true type: string mvn_args: @@ -74,9 +74,10 @@ jobs: - name: Run performance tests - baseline run: | + # Skip baseline assertion failures to allow tightening performance thresholds based on candidate improvements DHIS2_IMAGE="${{ inputs.dhis2_image_baseline }}" \ SIMULATION_CLASS="${{ inputs.simulation_class }}" \ - MVN_ARGS="${{ inputs.mvn_args }}" \ + MVN_ARGS="${{ inputs.mvn_args }} -Dgatling.failOnError=false" \ DHIS2_DB_DUMP_URL="${{ inputs.dhis2_db_dump_url }}" \ DHIS2_DB_IMAGE_SUFFIX="${{ inputs.dhis2_db_image_suffix }}" \ ./run-simulation.sh @@ -97,12 +98,13 @@ jobs: # binary simulation.log into a simulation.csv. CLI releases can be downloaded from # https://github.com/dhis2/gatling/releases. The CLI is installed on the self-hosted runner. - name: Convert binary simulation.log to simulation.csv + id: convert-logs if: always() run: | glog --config ./src/test/resources/gatling.conf --scan-subdirs target/gatling - name: Upload Gatling report - if: always() # reports should always be uploaded as tests can fail due to our performance assertions + if: steps.convert-logs.outcome == 'success' uses: actions/upload-artifact@v4 with: name: gatling-report-${{ github.run_id }}-${{ github.sha }} diff --git a/dhis-2/dhis-test-e2e/pom.xml b/dhis-2/dhis-test-e2e/pom.xml index a1941e2eb8aa..854a141094f1 100644 --- a/dhis-2/dhis-test-e2e/pom.xml +++ b/dhis-2/dhis-test-e2e/pom.xml @@ -21,7 +21,7 @@ 2.20 33.5.0-jre 1.8.1 - 5.4.0 + 5.3.0 1.0.2 5.12.0 1.0.12 diff --git a/dhis-2/dhis-test-performance/README.md b/dhis-2/dhis-test-performance/README.md new file mode 100644 index 000000000000..6410d0f9e1b0 --- /dev/null +++ b/dhis-2/dhis-test-performance/README.md @@ -0,0 +1,43 @@ +# DHIS2 Performance Tests + +Run Gatling performance tests against DHIS2 Docker instances locally and in CI. + +## Quick Start + +```sh +DHIS2_IMAGE=dhis2/core-dev:latest \ +SIMULATION_CLASS=org.hisp.dhis.test.tracker.TrackerTest \ +./run-simulation.sh +``` + +Run `./run-simulation.sh` for full usage including profiling and database options. + +## Results + +Test results are saved to `target/gatling/-/`: + +* `index.html` - Gatling HTML report +* `simulation.log` - Binary response times +* `simulation.csv` - Response times (automated in CI only, [see below](#simulationcsv)) +* `simulation-run.txt` - Run metadata +* `profile.html` - Flamegraph (when profiling enabled) +* `profile.jfr` - JFR (Java flight recorder) profiling data (when profiling enabled) + +### simulation.csv + +If `index.html` doesn't provide the analysis you need, convert `simulation.log` to `simulation.csv` +for advanced analysis with [gatling-statistics](https://github.com/dhis2/gatling-statistics). + +Since Gatling 3.12, test results are written in binary format. Use +[glog](https://github.com/dhis2/gatling/releases) (a CLI from our Gatling fork) to convert: + +```sh +glog --config ./src/test/resources/gatling.conf --scan-subdirs target/gatling +``` + +## CI Usage + +`./run-simulation.sh` is used in +[`../../.github/workflows/run-performance-tests.yml`](../../.github/workflows/run-performance-tests.yml) +to compare performance between baseline and candidate DHIS2 versions. + diff --git a/dhis-2/dhis-test-performance/docker-compose.profile.yml b/dhis-2/dhis-test-performance/docker-compose.profile.yml new file mode 100644 index 000000000000..122dec642a07 --- /dev/null +++ b/dhis-2/dhis-test-performance/docker-compose.profile.yml @@ -0,0 +1,49 @@ +services: + web: + # Required for async-profiler to access perf_events + # See: https://github.com/async-profiler/async-profiler/blob/master/docs/ProfilingInContainer.md + cap_add: + - SYS_ADMIN # Access to performance counters + security_opt: + - seccomp:unconfined # Allow profiling syscalls + environment: + JAVA_OPTS: + "-Dlog4j2.configurationFile=/opt/dhis2/log4j2.xml \ + -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:8081 \ + -XX:+UnlockDiagnosticVMOptions \ + -XX:+DebugNonSafepoints" # Accurate profiling at all code locations + volumes: + - async-profiler:/usr/local + - async-profiler-output:/profiler-output + ports: + - "127.0.0.1:8081:8081" # Debugger: connect using commandline flag -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:8081 + depends_on: + async-profiler-setup: + condition: service_completed_successfully + + async-profiler-setup: + image: busybox + command: | + sh -c ' + if [ ! -f /usr/local/bin/asprof ]; then + echo "Downloading async-profiler..." + wget -O /tmp/async-profiler.tar.gz https://github.com/async-profiler/async-profiler/releases/download/v4.0/async-profiler-4.0-linux-x64.tar.gz && + mkdir -p /usr/local/bin /usr/local/lib && + tar -xzf /tmp/async-profiler.tar.gz -C /tmp && + cp /tmp/async-profiler-4.0-linux-x64/bin/* /usr/local/bin/ && + cp /tmp/async-profiler-4.0-linux-x64/lib/* /usr/local/lib/ && + chmod +x /usr/local/bin/asprof && + chmod +x /usr/local/bin/jfrconv && + echo "async-profiler installed to /usr/local" + else + echo "async-profiler already installed" + fi && + chmod 777 /profiler-output + ' + volumes: + - async-profiler:/usr/local + - async-profiler-output:/profiler-output + +volumes: + async-profiler: {} + async-profiler-output: {} diff --git a/dhis-2/dhis-test-performance/pom.xml b/dhis-2/dhis-test-performance/pom.xml index 1185de690447..ed5deca44ee0 100644 --- a/dhis-2/dhis-test-performance/pom.xml +++ b/dhis-2/dhis-test-performance/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.dhis - performance-tests-gatling + dhis-test-performance 1.0 Performance tests for DHIS2 using Gatling framework @@ -16,6 +16,7 @@ 3.14.3 4.19.0 2.46.1 + 3.14.0 @@ -33,6 +34,15 @@ + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + 17 + UTF-8 + + io.gatling gatling-maven-plugin @@ -73,5 +83,6 @@ + src/test diff --git a/dhis-2/dhis-test-performance/run-simulation.sh b/dhis-2/dhis-test-performance/run-simulation.sh index 3808f96b9e87..6d39247d4e92 100755 --- a/dhis-2/dhis-test-performance/run-simulation.sh +++ b/dhis-2/dhis-test-performance/run-simulation.sh @@ -1,16 +1,39 @@ #!/bin/bash # Run Gatling simulations against a DHIS2 instance running in Docker -# -# Usage: DHIS2_IMAGE= SIMULATION_CLASS= [DHIS2_DB_DUMP_URL=] [MVN_ARGS=] ./run-simulation.sh -# Example: DHIS2_IMAGE=dhis2/core-dev:local SIMULATION_CLASS=org.hisp.dhis.test.EnrollmentsTest MVN_ARGS="-DpageSize=100" ./run-simulation.sh -# Available Docker image tags: https://github.com/dhis2/dhis2-core/blob/master/docker/DOCKERHUB.md set -euo pipefail show_usage() { - echo "Usage: DHIS2_IMAGE= SIMULATION_CLASS= [DHIS2_DB_DUMP_URL=] $0" - echo "Example: DHIS2_IMAGE=dhis2/core-dev:latest SIMULATION_CLASS=org.hisp.dhis.test.EnrollmentsTest $0" - echo "Optional: DHIS2_DB_DUMP_URL defaults to https://databases.dhis2.org/sierra-leone/dev/dhis2-db-sierra-leone.sql.gz" - echo "Available Docker image tags: https://github.com/dhis2/dhis2-core/blob/master/docker/DOCKERHUB.md" + echo "" + echo "USAGE:" + echo " DHIS2_IMAGE= SIMULATION_CLASS= [OPTIONS] $0" + echo "" + echo "REQUIRED:" + echo " DHIS2_IMAGE Docker image tag for DHIS2" + echo " Available tags: https://github.com/dhis2/dhis2-core/blob/master/docker/DOCKERHUB.md" + echo " SIMULATION_CLASS Fully qualified Gatling Simulation class name" + echo "" + echo "OPTIONS:" + echo " DHIS2_DB_DUMP_URL Database dump URL" + echo " Available database dumps: https://databases.dhis2.org" + echo " Default: https://databases.dhis2.org/sierra-leone/dev/dhis2-db-sierra-leone.sql.gz" + echo " DHIS2_DB_IMAGE_SUFFIX Docker image suffix for DB (default: sierra-leone-dev)" + echo " WARNING: Must match the version in DHIS2_DB_DUMP_URL" + echo " PROF_ARGS Async-profiler arguments (enables profiling)" + echo " Options: https://github.com/async-profiler/async-profiler/blob/master/docs/ProfilerOptions.md" + echo " MVN_ARGS Additional Maven arguments passed to mvn gatling:test" + echo " HEALTHCHECK_TIMEOUT Max wait time for DHIS2 startup in seconds (default: 300)" + echo " HEALTHCHECK_INTERVAL Check interval for DHIS2 startup in seconds (default: 10)" + echo "" + echo "EXAMPLES:" + echo " # Basic test run" + echo " DHIS2_IMAGE=dhis2/core-dev:latest \\" + echo " SIMULATION_CLASS=org.hisp.dhis.test.tracker.TrackerTest $0" + echo "" + echo " # With CPU profiling" + echo " PROF_ARGS=\"-e cpu\" \\" + echo " DHIS2_IMAGE=dhis2/core-dev:latest \\" + echo " SIMULATION_CLASS=org.hisp.dhis.test.tracker.TrackerTest $0" + echo "" } if [ -z "${DHIS2_IMAGE:-}" ]; then @@ -30,15 +53,51 @@ DHIS2_DB_DUMP_URL=${DHIS2_DB_DUMP_URL:-"https://databases.dhis2.org/sierra-leone DHIS2_DB_IMAGE_SUFFIX=${DHIS2_DB_IMAGE_SUFFIX:-"sierra-leone-dev"} HEALTHCHECK_TIMEOUT=${HEALTHCHECK_TIMEOUT:-300} # default of 5min HEALTHCHECK_INTERVAL=${HEALTHCHECK_INTERVAL:-10} # default of 10s +PROF_ARGS=${PROF_ARGS:=""} + +parse_prof_args() { + if [ -z "$PROF_ARGS" ]; then + EVENT_FLAG="" + THREAD_FLAG="" + return 0 + fi + + [[ $PROF_ARGS =~ -e[[:space:]]+([^[:space:]]+) ]] || return 1 + EVENT_FLAG="${BASH_REMATCH[1]}" + + if [[ $PROF_ARGS =~ -t|--threads ]]; then + THREAD_FLAG="threads" + else + THREAD_FLAG="" + fi +} + +parse_prof_args cleanup() { echo "" echo "Cleaning up..." - docker compose down --volumes + if [ -n "$PROF_ARGS" ]; then + docker compose -f docker-compose.yml -f docker-compose.profile.yml down --volumes + else + docker compose down --volumes + fi } trap cleanup EXIT INT +start_containers() { + echo "Testing with image: $DHIS2_IMAGE" + + if [ -n "$PROF_ARGS" ]; then + docker compose -f docker-compose.yml -f docker-compose.profile.yml down --volumes + docker compose -f docker-compose.yml -f docker-compose.profile.yml up --detach + else + docker compose down --volumes + docker compose up --detach + fi +} + wait_for_health() { echo "Waiting for DHIS2 to start..." local start_time @@ -55,43 +114,110 @@ wait_for_health() { echo "DHIS2 is ready! (took $(($(date +%s) - start_time))s)" } -echo "Testing with image: $DHIS2_IMAGE" +save_profiler_data() { + local gatling_dir="$1" + + if [ -z "$PROF_ARGS" ]; then + return 0 + fi + + echo "Saving profiler data..." + + docker compose cp web:/profiler-output/. "$gatling_dir/" + + echo "Profiler data saved to $gatling_dir" +} + +post_process_profiler_data() { + local gatling_dir="$1" -docker compose down --volumes -docker compose up --detach + if [ -z "$PROF_ARGS" ]; then + return 0 + fi + + echo "Post-processing profiler data..." + + local jfrconv_flags="--${EVENT_FLAG}" + if [[ -n "$THREAD_FLAG" ]]; then + jfrconv_flags="$jfrconv_flags --${THREAD_FLAG}" + fi + if [[ "$EVENT_FLAG" == "alloc" || "$EVENT_FLAG" == "lock" ]]; then + jfrconv_flags="$jfrconv_flags --total" + fi + + local title="$SIMULATION_CLASS on $DHIS2_IMAGE (async-profiler $PROF_ARGS)" + # generate flamegraph and collapsed stack traces using jfrconv from async-profiler + docker compose exec --workdir /profiler-output web \ + jfrconv "$jfrconv_flags" --dot --title "$title" profile.jfr profile.html + docker compose exec --workdir /profiler-output web \ + jfrconv "$jfrconv_flags" --dot profile.jfr profile.collapsed + + docker compose cp web:/profiler-output/. "$gatling_dir/" + + echo "Post-processing profiler data complete. Files saved to $gatling_dir" +} + +prepare_database() { + echo "Preparing database..." + docker compose exec db psql -U dhis -c 'VACUUM;' +} + +start_profiler() { + if [ -n "$PROF_ARGS" ]; then + docker compose exec --workdir /profiler-output web asprof start $PROF_ARGS -f profile.jfr 1 > /dev/null + fi +} + +run_simulation() { + echo "Running $SIMULATION_CLASS..." + mvn gatling:test \ + -Dgatling.simulationClass="$SIMULATION_CLASS" \ + $MVN_ARGS +} + +stop_profiler() { + if [ -n "$PROF_ARGS" ]; then + echo "Stopping profiler..." + docker compose exec web asprof stop 1 > /dev/null + fi +} + +generate_metadata() { + local gatling_run_dir="$1" + local simulation_run_file="$gatling_run_dir/simulation-run.txt" + + echo "Generating run metadata..." + { + echo "RUN_DIR=$gatling_run_dir" + echo "COMMAND=DHIS2_IMAGE=$DHIS2_IMAGE DHIS2_DB_DUMP_URL=$DHIS2_DB_DUMP_URL SIMULATION_CLASS=$SIMULATION_CLASS${MVN_ARGS:+ MVN_ARGS=$MVN_ARGS}${HEALTHCHECK_TIMEOUT:+ HEALTHCHECK_TIMEOUT=$HEALTHCHECK_TIMEOUT}${HEALTHCHECK_INTERVAL:+ HEALTHCHECK_INTERVAL=$HEALTHCHECK_INTERVAL} $0" + echo "SCRIPT_NAME=$0" + echo "SCRIPT_ARGS=$*" + echo "DHIS2_IMAGE=$DHIS2_IMAGE" + echo "DHIS2_DB_DUMP_URL=$DHIS2_DB_DUMP_URL" + echo "DHIS2_DB_IMAGE_SUFFIX=$DHIS2_DB_IMAGE_SUFFIX" + echo "SIMULATION_CLASS=$SIMULATION_CLASS" + echo "MVN_ARGS=$MVN_ARGS" + echo "HEALTHCHECK_TIMEOUT=$HEALTHCHECK_TIMEOUT" + echo "HEALTHCHECK_INTERVAL=$HEALTHCHECK_INTERVAL" + echo "GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'unknown')" + echo "GIT_COMMIT=$(git rev-parse HEAD 2>/dev/null || echo 'unknown')" + echo "GIT_DIRTY=\$([ -n \"\$(git status --porcelain 2>/dev/null)\" ] && echo 'true' || echo 'false')" + } > "$simulation_run_file" +} +start_containers wait_for_health +prepare_database +start_profiler +run_simulation +stop_profiler -# vacuum to get up to date PostgreSQL statistics -docker compose exec db psql -U dhis -c 'VACUUM;' - -echo "Running $SIMULATION_CLASS..." -mvn gatling:test \ - -Dgatling.simulationClass="$SIMULATION_CLASS" \ - $MVN_ARGS - -gatling_run_dir="target/gatling/$(cat target/gatling/lastRun.txt)" - -# Create simulation run metadata file in key=value format -simulation_run_file="$gatling_run_dir/simulation-run.txt" -{ - echo "RUN_DIR=$gatling_run_dir" - echo "COMMAND=DHIS2_IMAGE=$DHIS2_IMAGE DHIS2_DB_DUMP_URL=$DHIS2_DB_DUMP_URL SIMULATION_CLASS=$SIMULATION_CLASS${MVN_ARGS:+ MVN_ARGS=$MVN_ARGS}${HEALTHCHECK_TIMEOUT:+ HEALTHCHECK_TIMEOUT=$HEALTHCHECK_TIMEOUT}${HEALTHCHECK_INTERVAL:+ HEALTHCHECK_INTERVAL=$HEALTHCHECK_INTERVAL} $0" - echo "SCRIPT_NAME=$0" - echo "SCRIPT_ARGS=$*" - echo "DHIS2_IMAGE=$DHIS2_IMAGE" - echo "DHIS2_DB_DUMP_URL=$DHIS2_DB_DUMP_URL" - echo "DHIS2_DB_IMAGE_SUFFIX=$DHIS2_DB_IMAGE_SUFFIX" - echo "SIMULATION_CLASS=$SIMULATION_CLASS" - echo "MVN_ARGS=$MVN_ARGS" - echo "HEALTHCHECK_TIMEOUT=$HEALTHCHECK_TIMEOUT" - echo "HEALTHCHECK_INTERVAL=$HEALTHCHECK_INTERVAL" - echo "GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'unknown')" - echo "GIT_COMMIT=$(git rev-parse HEAD 2>/dev/null || echo 'unknown')" - echo "GIT_DIRTY=$([ -n "$(git status --porcelain 2>/dev/null)" ] && echo 'true' || echo 'false')" -} > "$simulation_run_file" +gatling_run_dir="target/gatling/$(head -n 1 target/gatling/lastRun.txt)" +save_profiler_data "$gatling_run_dir" +post_process_profiler_data "$gatling_run_dir" +generate_metadata "$gatling_run_dir" echo "Completed test for $DHIS2_IMAGE" echo "Gatling test results are in: $gatling_run_dir" -echo "Gatling run metadata is in: $simulation_run_file" +echo "Gatling run metadata is in: $gatling_run_dir/simulation-run.txt"