Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ agentpay-backend/
- [Billing units and settlement semantics](docs/billing-units.md) explains
stroops, `priceStroops`, `billedStroops`, `/api/v1/billing/*`, and why
`POST /api/v1/settle` drains backend counters without moving funds.
- [Metrics](docs/metrics.md) documents Prometheus gauges, HTTP counters,
duration histograms, and error counters exposed by `GET /api/v1/metrics`.

## Quickstart

Expand Down
25 changes: 25 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Metrics

`GET /api/v1/metrics` exposes Prometheus text format (`text/plain; version=0.0.4`).

The endpoint keeps the existing operational gauges:

- `agentpay_services_total`
- `agentpay_api_keys_total`
- `agentpay_usage_requests_total`
- `agentpay_paused`

It also records HTTP traffic metrics:

- `agentpay_http_requests_total{method,route,status}` counts completed
responses.
- `agentpay_http_request_duration_seconds{method,route,status}` is a histogram
with bucket, sum, and count samples.
- `agentpay_http_errors_total{type}` counts requests that reached the terminal
Express error handler.

`route` uses the matched Express route pattern, such as
`/api/v1/usage/:agent/:serviceId`, rather than raw request paths. Unmatched
routes and parser failures use `route="unmatched"` so agent IDs, service IDs,
and other user-controlled path segments do not create high-cardinality metric
labels.
95 changes: 95 additions & 0 deletions src/metrics.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { beforeEach, describe, it } from "node:test";
import assert from "node:assert";
import request from "supertest";
import { createApp } from "./index.js";
import { eventLog } from "./events.js";
import { resetHttpMetrics } from "./metrics.js";
import {
apiKeyStore,
pauseState,
rateBuckets,
servicesDisabled,
servicesMetadata,
servicesStore,
usageStore,
webhookStore,
} from "./store/state.js";

beforeEach(() => {
apiKeyStore.clear();
eventLog.length = 0;
rateBuckets.clear();
servicesDisabled.clear();
servicesMetadata.clear();
servicesStore.clear();
usageStore.clear();
webhookStore.clear();
pauseState.paused = false;
resetHttpMetrics();
});

void describe("Prometheus HTTP metrics", () => {
void it("records request counters and duration histograms by method, route, and status", async () => {
const app = createApp();

const write = await request(app)
.post("/api/v1/usage")
.send({ agent: "agent-metrics", serviceId: "svc-metrics", requests: 3 });
assert.strictEqual(write.status, 201);

const read = await request(app).get("/api/v1/usage/agent-metrics/svc-metrics");
assert.strictEqual(read.status, 200);

const metrics = await request(app).get("/api/v1/metrics");
assert.strictEqual(metrics.status, 200);
assert.match(
metrics.text,
/agentpay_http_requests_total\{method="POST",route="\/api\/v1\/usage",status="201"\} 1/
);
assert.match(
metrics.text,
/agentpay_http_requests_total\{method="GET",route="\/api\/v1\/usage\/:agent\/:serviceId",status="200"\} 1/
);
assert.match(
metrics.text,
/agentpay_http_request_duration_seconds_count\{method="POST",route="\/api\/v1\/usage",status="201"\} 1/
);
assert.match(
metrics.text,
/agentpay_http_request_duration_seconds_bucket\{method="POST",route="\/api\/v1\/usage",status="201",le="0\.5"\} 1/
);
});

void it("uses bounded route-pattern labels instead of raw request paths", async () => {
const app = createApp();

await request(app).get("/api/v1/usage/raw-agent/raw-service");

const metrics = await request(app).get("/api/v1/metrics");
assert.strictEqual(metrics.status, 200);
assert.ok(metrics.text.includes('route="/api/v1/usage/:agent/:serviceId"'));
assert.ok(!metrics.text.includes("raw-agent"));
assert.ok(!metrics.text.includes("raw-service"));
});

void it("increments the terminal error counter when the error handler runs", async () => {
const app = createApp();

const bad = await request(app)
.post("/api/v1/usage")
.set("Content-Type", "application/json")
.send("{not-json");
assert.strictEqual(bad.status, 500);

const metrics = await request(app).get("/api/v1/metrics");
assert.strictEqual(metrics.status, 200);
assert.match(
metrics.text,
/agentpay_http_errors_total\{type="entity\.parse\.failed"\} 1/
);
assert.match(
metrics.text,
/agentpay_http_requests_total\{method="POST",route="unmatched",status="500"\} 1/
);
});
});
155 changes: 155 additions & 0 deletions src/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import type { Request } from "express";

type HttpMetricLabels = {
method: string;
route: string;
status: string;
};

type HttpMetricSample = HttpMetricLabels & {
count: number;
sumSeconds: number;
buckets: Map<number, number>;
};

const DURATION_BUCKETS_SECONDS = [
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10,
];
const httpSamples = new Map<string, HttpMetricSample>();
const httpErrorCounts = new Map<string, number>();

/** Records one completed HTTP response for Prometheus counters and histograms. */
export function recordHttpRequest(
req: Request,
statusCode: number,
durationSeconds: number
): void {
const labels: HttpMetricLabels = {
method: req.method.toUpperCase(),
route: routePattern(req),
status: String(statusCode),
};
const key = metricKey(labels);
let sample = httpSamples.get(key);
if (!sample) {
sample = {
...labels,
count: 0,
sumSeconds: 0,
buckets: new Map(DURATION_BUCKETS_SECONDS.map((bucket) => [bucket, 0])),
};
httpSamples.set(key, sample);
}

sample.count += 1;
sample.sumSeconds += durationSeconds;
for (const bucket of DURATION_BUCKETS_SECONDS) {
if (durationSeconds <= bucket) {
sample.buckets.set(bucket, (sample.buckets.get(bucket) ?? 0) + 1);
}
}
}

/** Records one terminal Express error-handler invocation. */
export function recordHttpError(type: string): void {
httpErrorCounts.set(type, (httpErrorCounts.get(type) ?? 0) + 1);
}

/** Appends Prometheus request counters, histograms, and error counters. */
export function renderHttpMetrics(): string[] {
const lines = [
"# HELP agentpay_http_requests_total HTTP responses by method, route, and status.",
"# TYPE agentpay_http_requests_total counter",
];

for (const sample of sortedSamples()) {
lines.push(`agentpay_http_requests_total${labelSet(sample)} ${sample.count}`);
}

lines.push(
"# HELP agentpay_http_request_duration_seconds HTTP response duration in seconds.",
"# TYPE agentpay_http_request_duration_seconds histogram"
);
for (const sample of sortedSamples()) {
for (const bucket of DURATION_BUCKETS_SECONDS) {
lines.push(
`agentpay_http_request_duration_seconds_bucket${labelSet({
...sample,
le: String(bucket),
})} ${sample.buckets.get(bucket) ?? 0}`
);
}
lines.push(
`agentpay_http_request_duration_seconds_bucket${labelSet({
...sample,
le: "+Inf",
})} ${sample.count}`
);
lines.push(
`agentpay_http_request_duration_seconds_sum${labelSet(sample)} ${formatNumber(
sample.sumSeconds
)}`
);
lines.push(
`agentpay_http_request_duration_seconds_count${labelSet(sample)} ${sample.count}`
);
}

lines.push(
"# HELP agentpay_http_errors_total Terminal error-handler invocations by error type.",
"# TYPE agentpay_http_errors_total counter"
);
for (const [type, count] of Array.from(httpErrorCounts.entries()).sort()) {
lines.push(`agentpay_http_errors_total${labelSet({ type })} ${count}`);
}

return lines;
}

export function resetHttpMetrics(): void {
httpSamples.clear();
httpErrorCounts.clear();
}

function sortedSamples(): HttpMetricSample[] {
return Array.from(httpSamples.values()).sort((a, b) =>
metricKey(a).localeCompare(metricKey(b))
);
}

function metricKey(labels: HttpMetricLabels): string {
return `${labels.method}\n${labels.route}\n${labels.status}`;
}

function routePattern(req: Request): string {
const routePath = req.route?.path;
if (typeof routePath === "string") {
return `${req.baseUrl}${routePath}`;
}
return "unmatched";
}

function labelSet(labels: Record<string, unknown>): string {
const entries = Object.entries(labels).filter(([key]) =>
["method", "route", "status", "le", "type"].includes(key)
);
return `{${entries
.map(([key, value]) => `${key}="${escapeLabelValue(labelValue(value))}"`)
.join(",")}}`;
}

function labelValue(value: unknown): string {
if (typeof value === "string") return value;
if (typeof value === "number") return String(value);
return "";
}

function escapeLabelValue(value: string): string {
return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/"/g, '\\"');
}

function formatNumber(value: number): string {
return Number.isInteger(value)
? String(value)
: value.toFixed(6).replace(/0+$/, "").replace(/\.$/, "");
}
4 changes: 3 additions & 1 deletion src/middleware/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
RATE_LIMIT_PER_WINDOW,
RATE_LIMIT_WINDOW_MS,
} from "../store/state.js";
import { recordHttpRequest } from "../metrics.js";
import type { AgentPayRequest } from "../types.js";

/**
Expand All @@ -20,6 +21,7 @@ import type { AgentPayRequest } from "../types.js";
*/
export function installPreRouteMiddleware(app: Application): void {
app.use(createCorsMiddleware());
app.use(requestTimerMiddleware);
app.use(express.json({ limit: "100kb" }));
app.use(securityHeadersMiddleware);
app.use(requestIdMiddleware);
Expand All @@ -33,7 +35,6 @@ export function installRequestStateMiddleware(app: Application): void {
app.use(apiKeyRecognitionMiddleware);
app.use(pauseGuardMiddleware);
app.use(rateLimitMiddleware);
app.use(requestTimerMiddleware);
}

/**
Expand Down Expand Up @@ -147,6 +148,7 @@ function requestTimerMiddleware(req: Request, res: Response, next: NextFunction)
const startNs = process.hrtime.bigint();
res.on("finish", () => {
const ms = Number(process.hrtime.bigint() - startNs) / 1_000_000;
recordHttpRequest(req, res.statusCode, ms / 1000);
if (!res.headersSent) {
res.setHeader("Server-Timing", `app;dur=${ms.toFixed(1)}`);
}
Expand Down
15 changes: 15 additions & 0 deletions src/routes/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
type Request,
type Response,
} from "express";
import { recordHttpError } from "../metrics.js";
import { getRequestId } from "../types.js";

/**
Expand All @@ -19,6 +20,7 @@ export function installErrorHandlers(app: Application): void {
});

app.use((err: unknown, req: Request, res: Response, _next: NextFunction) => {
recordHttpError(errorType(err));
if (
err &&
typeof err === "object" &&
Expand All @@ -42,3 +44,16 @@ export function installErrorHandlers(app: Application): void {
});
});
}

function errorType(err: unknown): string {
if (err && typeof err === "object" && "type" in err) {
const type = (err as { type: unknown }).type;
if (typeof type === "string" && type.length > 0) {
return type;
}
}
if (err instanceof Error && err.name.length > 0) {
return err.name;
}
return "unknown";
}
2 changes: 2 additions & 0 deletions src/routes/metrics.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Router, type Response } from "express";
import { renderHttpMetrics } from "../metrics.js";
import { apiKeyStore, pauseState, servicesStore, usageStore } from "../store/state.js";

/**
Expand All @@ -23,6 +24,7 @@ export function createMetricsRouter(): Router {
"# HELP agentpay_paused 1 if the backend is paused, 0 otherwise.",
"# TYPE agentpay_paused gauge",
`agentpay_paused ${pauseState.paused ? 1 : 0}`,
...renderHttpMetrics(),
];
res.setHeader("Content-Type", "text/plain; version=0.0.4");
res.send(lines.join("\n") + "\n");
Expand Down