diff --git a/.github/rulesets/branches/default-branch.json b/.github/rulesets/branches/default-branch.json index f6d1bce..89e7f05 100644 --- a/.github/rulesets/branches/default-branch.json +++ b/.github/rulesets/branches/default-branch.json @@ -25,6 +25,7 @@ "parameters": { "required_approving_review_count": 1, "dismiss_stale_reviews_on_push": true, + "required_reviewers": [], "require_code_owner_review": false, "require_last_push_approval": false, "required_review_thread_resolution": true, @@ -32,6 +33,19 @@ "merge" ] } + }, + { + "type": "required_status_checks", + "parameters": { + "strict_required_status_checks_policy": true, + "do_not_enforce_on_create": false, + "required_status_checks": [ + { + "context": "crucible-ci-complete", + "integration_id": 15368 + } + ] + } } ], "bypass_actors": [] diff --git a/.github/workflows/crucible-ci.yaml b/.github/workflows/crucible-ci.yaml new file mode 100644 index 0000000..406e469 --- /dev/null +++ b/.github/workflows/crucible-ci.yaml @@ -0,0 +1,58 @@ +name: crucible-ci + +on: + pull_request: + branches: [ main ] + workflow_dispatch: + +concurrency: + group: ${{ github.ref }}/crucible-ci + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest + outputs: + only-docs: ${{ steps.filter.outputs.only_changed }} + steps: + - uses: actions/checkout@v4 + - id: filter + uses: tj-actions/changed-files@v47 + with: + files: | + LICENSE + *.md + **/*.md + .github/rulesets/** + .github/workflows/run-crucible-tracking.yaml + .github/workflows/crucible-ci.yaml + docs/** + + call-real-tool-crucible-ci: + needs: changes + if: ${{ github.event_name == 'workflow_dispatch' || needs.changes.outputs.only-docs != 'true' }} + uses: perftool-incubator/crucible-ci/.github/workflows/tool-crucible-ci.yaml@main + with: + ci_target: "forkstat" + ci_target_branch: "${{ github.ref }}" + github_workspace: "$GITHUB_WORKSPACE" + secrets: + ci_registry_auth: ${{ secrets.CRUCIBLE_CI_ENGINES_REGISTRY_AUTH }} + quay_oauth_token: ${{ secrets.CRUCIBLE_QUAYIO_OAUTH_TOKEN }} + + call-faux-tool-crucible-ci: + needs: changes + if: ${{ github.event_name != 'workflow_dispatch' && needs.changes.outputs.only-docs == 'true' }} + uses: perftool-incubator/crucible-ci/.github/workflows/faux-tool-crucible-ci.yaml@main + + crucible-ci-complete: + needs: [ call-real-tool-crucible-ci, call-faux-tool-crucible-ci ] + if: always() + runs-on: ubuntu-latest + steps: + - name: Check Results + if: >- + contains(needs.*.result, 'failure') || + contains(needs.*.result, 'cancelled') + run: exit 1 + - run: echo "crucible-ci complete" diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..54937ff --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,22 @@ +# Forkstat Tool + +## Purpose +Monitors process fork(), exec(), and exit() activity during benchmark execution using the upstream forkstat utility. + +## Languages +- Bash: collection scripts (`forkstat-start`, `forkstat-stop`) +- Perl: post-processor (`forkstat-post-process`) + +## Key Files +| File | Purpose | +|------|---------| +| `forkstat-start` | Launches forkstat with configurable `--events` parameter (default: `all`) | +| `forkstat-stop` | Kills forkstat, compresses output with xz | +| `forkstat-post-process` | Converts raw forkstat output to crucible metrics (uses `toolbox::metrics`, `toolbox::json`, `toolbox::cpu` from `$TOOLBOX_HOME/perl`) | +| `rickshaw.json` | Rickshaw integration: endpoint allow/block lists, file deployment, post-process script | +| `workshop.json` | Engine image build: compiles forkstat from source | + +## Conventions +- Primary branch is `main` +- Runs as a profiler tool on master/worker roles, blocked on client/server +- Standard Bash/Perl modelines and 4-space indentation diff --git a/README.md b/README.md index 8577a2f..aef6e90 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,13 @@ # tool-forkstat -forkstat tool logs process fork(), exec(), and exit() activity +[![CI Actions Status](https://github.com/perftool-incubator/tool-forkstat/workflows/crucible-ci/badge.svg)](https://github.com/perftool-incubator/tool-forkstat/actions) -https://github.com/ColinIanKing/forkstat +Monitors process fork(), exec(), and exit() activity during benchmark execution for the [crucible](https://github.com/perftool-incubator/crucible) performance testing framework, using the upstream [forkstat](https://github.com/ColinIanKing/forkstat) utility. + +## Configuration + +The start script accepts one parameter: +- `--events ` — Comma-separated event types to monitor (default: `all`) + +## Integration + +Forkstat runs as a profiler tool on endpoint nodes. It is allowed on master and worker collector roles but blocked on client and server roles. The post-processor (`forkstat-post-process`) converts raw forkstat output into crucible metrics. diff --git a/forkstat-post-process b/forkstat-post-process index f7b9d5e..98ca472 100755 --- a/forkstat-post-process +++ b/forkstat-post-process @@ -1,24 +1,93 @@ -#!/usr/bin/perl -## -*- mode: perl; indent-tabs-mode: nil; perl-indent-level: 4 -*- -## vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=perl - -use strict; -use warnings; -use JSON::XS; -use JSON::Validator; -use Data::Dumper; -use Time::Piece; - -BEGIN { - if (!(exists $ENV{'TOOLBOX_HOME'} && -d "$ENV{'TOOLBOX_HOME'}/perl")) { - print "This script requires libraries that are provided by the toolbox project.\n"; - print "Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and\n"; - print "then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.\n"; - exit 1; - } -} -use lib "$ENV{'TOOLBOX_HOME'}/perl"; -use toolbox::json; -use toolbox::cpu; -use toolbox::metrics; +#!/usr/bin/env python3 +# -*- mode: python; indent-tabs-mode: nil; python-indent-level: 4 -*- +# vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=python +import sys +import os +import lzma +import re +import math +from datetime import datetime, timedelta +from pathlib import Path + +TOOLBOX_HOME = os.environ.get('TOOLBOX_HOME') +if TOOLBOX_HOME is None: + print("This script requires libraries that are provided by the toolbox project.") + print("Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and") + print("then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.") + exit(1) +else: + p = Path(TOOLBOX_HOME) / 'python' + if not p.exists() or not p.is_dir(): + print("ERROR: /python ('%s') does not exist!" % (p)) + exit(2) + sys.path.append(str(p)) +from toolbox.metrics import log_sample +from toolbox.metrics import finish_samples + +event_types = ('fork', 'exec', 'exit', 'clone') +event_pattern = re.compile(r'^(\d{2}:\d{2}:\d{2})\s+(' + '|'.join(event_types) + r')\s+') + +def emit_samples(file_id, end_ts, counts): + for event_type, count in counts.items(): + desc = {'source': 'forkstat', 'type': event_type, 'class': 'throughput'} + sample = {'end': end_ts, 'value': count} + log_sample(file_id, desc, {}, sample) + +def main(): + print('forkstat-post-process') + + date_file = 'forkstat-date.txt' + if not os.path.exists(date_file): + print("ERROR: %s not found, cannot determine date for timestamps" % date_file) + return 1 + + with open(date_file, 'r') as f: + date_str = f.read().strip() + + data_file = 'forkstat-stderrout.txt.xz' + if not os.path.exists(data_file): + print("ERROR: %s not found" % data_file) + return 1 + + file_id = '0' + prev_hour = None + day_offset = 0 + current_ts = None + counts = {} + + with lzma.open(data_file, 'rt') as fh: + for line in fh: + match = event_pattern.match(line) + if not match: + continue + + time_str = match.group(1) + event_type = match.group(2) + + cur_hour = int(time_str[:2]) + if prev_hour is not None and cur_hour < prev_hour: + day_offset += 1 + prev_hour = cur_hour + + dt_str = "%s %s" % (date_str, time_str) + dt = datetime.strptime(dt_str, '%Y-%m-%d %H:%M:%S') + if day_offset > 0: + dt += timedelta(days=day_offset) + end_ts = int(math.floor(dt.timestamp() * 1000)) + + if current_ts is not None and end_ts != current_ts: + emit_samples(file_id, current_ts, counts) + counts = {} + + current_ts = end_ts + counts[event_type] = counts.get(event_type, 0) + 1 + + if current_ts is not None and counts: + emit_samples(file_id, current_ts, counts) + + finish_samples() + return 0 + +if __name__ == "__main__": + exit(main()) diff --git a/forkstat-start b/forkstat-start index cfb55da..c30d362 100755 --- a/forkstat-start +++ b/forkstat-start @@ -35,6 +35,8 @@ done /bin/rm -f forkstat-pids.txt +# Record the UTC date for post-processing (forkstat only outputs HH:MM:SS) +date -u +%Y-%m-%d > forkstat-date.txt cmd_path=$( command -v forkstat ) cmd="$cmd_path -e ${events} -X -S" diff --git a/rickshaw.json b/rickshaw.json index b3d470f..c7e2206 100644 --- a/rickshaw.json +++ b/rickshaw.json @@ -24,23 +24,15 @@ "endpoint": "remotehosts", "collector-types": [ "client", "server" ] }, - { - "endpoint": "k8s", - "collector-types": [ "client", "server" ] - }, { "endpoint": "kube", "collector-types": [ "client", "server" ] } ], "whitelist": [ - { - "endpoint": "k8s", - "collector-types": [ "master", "worker" ] - }, { "endpoint": "kube", - "collector-types": [ "master", "worker" ] + "collector-types": [ "profiler" ] } ], "start": "forkstat-start",