From 68c93862f6da3b001bbc3512426a9240712f2800 Mon Sep 17 00:00:00 2001 From: James Martinez Date: Sun, 22 Feb 2026 19:48:53 -0600 Subject: [PATCH] Add Prometheus metrics endpoint to dashboard --- compose.yaml | 7 ++ docker/prometheus/prometheus.yml | 8 ++ package-lock.json | 80 ++++++++++++- packages/dashboard/package.json | 1 + .../dashboard/src/lib/metrics.server.test.ts | 85 ++++++++++++++ packages/dashboard/src/lib/metrics.server.ts | 50 ++++++++ packages/dashboard/src/routeTree.gen.ts | 24 +++- packages/dashboard/src/routes/metrics.ts | 12 ++ packages/dashboard/vite.config.ts | 4 + packages/docs/docs.json | 3 +- packages/docs/docs/dashboard.mdx | 5 + packages/docs/docs/production.mdx | 6 + packages/docs/docs/prometheus.mdx | 105 +++++++++++++++++ packages/openworkflow/backend.test.ts | 50 ++++++++ packages/openworkflow/backend.testsuite.ts | 107 ++++++++++++++++++ packages/openworkflow/backend.ts | 40 ++++++- packages/openworkflow/postgres/backend.ts | 15 +++ packages/openworkflow/sqlite/backend.ts | 18 +++ 18 files changed, 609 insertions(+), 11 deletions(-) create mode 100644 docker/prometheus/prometheus.yml create mode 100644 packages/dashboard/src/lib/metrics.server.test.ts create mode 100644 packages/dashboard/src/lib/metrics.server.ts create mode 100644 packages/dashboard/src/routes/metrics.ts create mode 100644 packages/docs/docs/prometheus.mdx create mode 100644 packages/openworkflow/backend.test.ts diff --git a/compose.yaml b/compose.yaml index 8eadc845..37ac8ac7 100644 --- a/compose.yaml +++ b/compose.yaml @@ -9,3 +9,10 @@ services: postgres -c shared_preload_libraries=pg_stat_statements -c pg_stat_statements.track=all + + prometheus: + image: prom/prometheus + ports: + - 9090:9090 + volumes: + - ./docker/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml new file mode 100644 index 00000000..b9d2d75f --- /dev/null +++ b/docker/prometheus/prometheus.yml @@ -0,0 +1,8 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: openworkflow_dashboard + metrics_path: /metrics + static_configs: + - targets: ["host.docker.internal:3000"] diff --git a/package-lock.json b/package-lock.json index d05f997f..017aedbc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -222,6 +222,7 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.29.0.tgz", "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -899,6 +900,7 @@ "integrity": "sha512-lf6d+BdMkJIFCxx2FpajLpqVGGyaGUNFU6jhEM6QUPeGuoA5et2kJXrL0NSY2uWLOVyYYc/FPjzlbe8trA9tBQ==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=20" } @@ -967,7 +969,8 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-css/-/dict-css-4.0.19.tgz", "integrity": "sha512-VYHtPnZt/Zd/ATbW3rtexWpBnHUohUrQOHff/2JBhsVgxOrksAxJnLAO43Q1ayLJBJUUwNVo+RU0sx0aaysZfg==", "dev": true, - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@cspell/dict-dart": { "version": "2.3.2", @@ -1107,14 +1110,16 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-html/-/dict-html-4.0.14.tgz", "integrity": "sha512-2bf7n+kS92g+cMKV0wr9o/Oq9n8JzU7CcrB96gIh2GHgnF+0xDOqO2W/1KeFAqOfqosoOVE48t+4dnEMkkoJ2Q==", "dev": true, - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@cspell/dict-html-symbol-entities": { "version": "4.0.5", "resolved": "https://registry.npmjs.org/@cspell/dict-html-symbol-entities/-/dict-html-symbol-entities-4.0.5.tgz", "integrity": "sha512-429alTD4cE0FIwpMucvSN35Ld87HCyuM8mF731KU5Rm4Je2SG6hmVx7nkBsLyrmH3sQukTcr1GaiZsiEg8svPA==", "dev": true, - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@cspell/dict-java": { "version": "5.0.12", @@ -1312,7 +1317,8 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-typescript/-/dict-typescript-3.2.3.tgz", "integrity": "sha512-zXh1wYsNljQZfWWdSPYwQhpwiuW0KPW1dSd8idjMRvSD0aSvWWHoWlrMsmZeRl4qM4QCEAjua8+cjflm41cQBg==", "dev": true, - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@cspell/dict-vue": { "version": "3.0.5", @@ -1489,6 +1495,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=20.19.0" }, @@ -1529,6 +1536,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=20.19.0" } @@ -2772,6 +2780,7 @@ "resolved": "https://registry.npmjs.org/@noble/ciphers/-/ciphers-1.3.0.tgz", "integrity": "sha512-2I0gnIVPtfnMw9ee9h1dJG7tp81+8Ob3OJb3Mv37rx5L40/b0i7djjCVvGOVqc9AEIQyvyu1i6ypKdFw8R8gQw==", "license": "MIT", + "peer": true, "engines": { "node": "^14.21.3 || >=16" }, @@ -4835,6 +4844,7 @@ "resolved": "https://registry.npmjs.org/@tanstack/react-router/-/react-router-1.161.3.tgz", "integrity": "sha512-evYPrkuFt4T6E0WVyBGGq83lWHJjsYy3E5SpPpfPY/uRnEgmgwfr6Xl570msRnWYMj7DIkYg8ZWFFwzqKrSlBw==", "license": "MIT", + "peer": true, "dependencies": { "@tanstack/history": "1.154.14", "@tanstack/react-store": "^0.9.1", @@ -4977,6 +4987,7 @@ "resolved": "https://registry.npmjs.org/@tanstack/router-core/-/router-core-1.161.3.tgz", "integrity": "sha512-8EuaGXLUjugQE9Rsb8VrWSy+wImcs/DZ9JORqUJYCmiiWnJzbat8KedQItq/9LCjMJyx4vTLCt8NnZCL+j1Ayg==", "license": "MIT", + "peer": true, "dependencies": { "@tanstack/history": "1.154.14", "@tanstack/store": "^0.9.1", @@ -5303,6 +5314,7 @@ "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.10.4", "@babel/runtime": "^7.12.5", @@ -5351,6 +5363,7 @@ "integrity": "sha512-3DgfkukFyC/sE/VuYjaUUWoFfuVjPK55vOFDsxD56XXynFMCZDYFogH2l/hDfOsQAm1myoU/1xByJ3tWqtulXA==", "dev": true, "license": "Apache-2.0", + "peer": true, "dependencies": { "@babel/generator": "^7.28.0", "@babel/parser": "^7.28.0", @@ -5531,6 +5544,7 @@ "integrity": "sha512-4K3bqJpXpqfg2XKGK9bpDTc6xO/xoUP/RBWS7AtRMug6zZFaRekiLzjVtAoZMquxoAbzBvy5nxQ7veS5eYzf8A==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.18.0" } @@ -5540,6 +5554,7 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz", "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -5549,6 +5564,7 @@ "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.3.tgz", "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", "license": "MIT", + "peer": true, "peerDependencies": { "@types/react": "^19.2.0" } @@ -5607,6 +5623,7 @@ "integrity": "sha512-4z2nCSBfVIMnbuu8uinj+f0o4qOeggYJLbjpPHka3KH1om7e+H9yLKTYgksTaHcGco+NClhhY2vyO3HsMH1RGw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.55.0", "@typescript-eslint/types": "8.55.0", @@ -6260,6 +6277,7 @@ "node_modules/acorn": { "version": "8.15.0", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -6720,6 +6738,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/bintrees": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz", + "integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==", + "license": "MIT" + }, "node_modules/blamer": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/blamer/-/blamer-1.0.7.tgz", @@ -6926,6 +6950,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.19", "caniuse-lite": "^1.0.30001751", @@ -7645,6 +7670,7 @@ "resolved": "https://registry.npmjs.org/crossws/-/crossws-0.4.4.tgz", "integrity": "sha512-w6c4OdpRNnudVmcgr7brb/+/HmYjMQvYToO/oTrprTwxRUiom3LYWU1PMWuD006okbUWpII1Ea9/+kwpUfmyRg==", "license": "MIT", + "peer": true, "peerDependencies": { "srvx": ">=0.7.1" }, @@ -7875,7 +7901,8 @@ "version": "3.2.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/data-uri-to-buffer": { "version": "4.0.1", @@ -7969,6 +7996,7 @@ "resolved": "https://registry.npmjs.org/db0/-/db0-0.3.4.tgz", "integrity": "sha512-RiXXi4WaNzPTHEOu8UPQKMooIbqOEyqA1t7Z6MsdxSCeb8iUC9ko3LcmsLmeUt2SM5bctfArZKkRQggKZz7JNw==", "license": "MIT", + "peer": true, "peerDependencies": { "@electric-sql/pglite": "*", "@libsql/client": "*", @@ -8642,6 +8670,7 @@ "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -8876,6 +8905,7 @@ "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -9336,6 +9366,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -10188,6 +10219,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.9.tgz", "integrity": "sha512-Eaw2YTGM6WOxA6CXbckaEvslr2Ne4NFsKrvc0v97JD5awbmeBLO5w9Ho9L9kmKonrwF9RJlW6BxT1PVv/agBHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -11817,6 +11849,7 @@ "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", "license": "ISC", + "peer": true, "dependencies": { "yallist": "^3.0.2" } @@ -12553,7 +12586,8 @@ "version": "2.0.0-alpha.3", "resolved": "https://registry.npmjs.org/ofetch/-/ofetch-2.0.0-alpha.3.tgz", "integrity": "sha512-zpYTCs2byOuft65vI3z43Dd6iSdFbOZZLb9/d21aCpx2rGastVU9dOCv0lu4ykc1Ur1anAYjDi3SUvR0vq50JA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/ohash": { "version": "2.0.11", @@ -13099,6 +13133,7 @@ "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.1.tgz", "integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==", "license": "MIT", + "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -13533,6 +13568,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -13542,6 +13578,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -13827,6 +13864,7 @@ "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.53.5.tgz", "integrity": "sha512-iTNAbFSlRpcHeeWu73ywU/8KuU/LZmNCSxp6fjQkJBD3ivUb8tpDrXhIxEzA05HlYMEwmtaUnb3RP+YNv162OQ==", "license": "MIT", + "peer": true, "dependencies": { "@types/estree": "1.0.8" }, @@ -14070,6 +14108,7 @@ "resolved": "https://registry.npmjs.org/seroval/-/seroval-1.5.0.tgz", "integrity": "sha512-OE4cvmJ1uSPrKorFIH9/w/Qwuvi/IMcGbv5RKgcJ/zjA/IohDLU6SVaxFN9FwajbP7nsX0dQqMDes1whk3y+yw==", "license": "MIT", + "peer": true, "engines": { "node": ">=10" } @@ -14402,6 +14441,7 @@ "resolved": "https://registry.npmjs.org/solid-js/-/solid-js-1.9.11.tgz", "integrity": "sha512-WEJtcc5mkh/BnHA6Yrg4whlF8g6QwpmXXRg4P2ztPmcKeHHlH4+djYecBLhSpecZY2RRECXYUwIc/C2r3yzQ4Q==", "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.1.0", "seroval": "~1.5.0", @@ -14490,6 +14530,7 @@ "resolved": "https://registry.npmjs.org/srvx/-/srvx-0.10.1.tgz", "integrity": "sha512-A//xtfak4eESMWWydSRFUVvCTQbSwivnGCEf8YGPe2eHU0+Z6znfUTCPF0a7oV3sObSOcrXHlL6Bs9vVctfXdg==", "license": "MIT", + "peer": true, "bin": { "srvx": "bin/srvx.mjs" }, @@ -14815,6 +14856,15 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/tdigest": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.2.tgz", + "integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==", + "license": "MIT", + "dependencies": { + "bintrees": "1.0.2" + } + }, "node_modules/tiny-case": { "version": "1.0.3", "license": "MIT" @@ -15291,6 +15341,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "devOptional": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -15425,6 +15476,7 @@ "dev": true, "hasInstallScript": true, "license": "MIT", + "peer": true, "dependencies": { "napi-postinstall": "^0.3.0" }, @@ -15548,6 +15600,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -15656,6 +15709,7 @@ "integrity": "sha512-hOQuK7h0FGKgBAas7v0mSAsnvrIgAvWmRFjmzpJ7SwFHH3g1k2u37JtYwOwmEKhK6ZO3v9ggDBBm0La1LCK4uQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@vitest/expect": "4.0.18", "@vitest/mocker": "4.0.18", @@ -16259,6 +16313,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -16339,6 +16394,7 @@ "clsx": "^2.1.1", "nitro": "^3.0.1-alpha.2", "openworkflow": "*", + "prom-client": "^14.2.0", "react": "^19.2.4", "react-dom": "^19.2.4", "shadcn": "^3.8.5", @@ -16418,6 +16474,18 @@ } } }, + "packages/dashboard/node_modules/prom-client": { + "version": "14.2.0", + "resolved": "https://registry.npmjs.org/prom-client/-/prom-client-14.2.0.tgz", + "integrity": "sha512-sF308EhTenb/pDRPakm+WgiN+VdM/T1RaHj1x+MvAuT8UiQP8JmOEbxVqtkbfR4LrvOg5n7ic01kRBDGXjYikA==", + "license": "Apache-2.0", + "dependencies": { + "tdigest": "^0.1.1" + }, + "engines": { + "node": ">=10" + } + }, "packages/dashboard/node_modules/tailwindcss": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.0.tgz", diff --git a/packages/dashboard/package.json b/packages/dashboard/package.json index b5733b17..8c2abf17 100644 --- a/packages/dashboard/package.json +++ b/packages/dashboard/package.json @@ -32,6 +32,7 @@ "clsx": "^2.1.1", "nitro": "^3.0.1-alpha.2", "openworkflow": "*", + "prom-client": "^14.2.0", "react": "^19.2.4", "react-dom": "^19.2.4", "shadcn": "^3.8.5", diff --git a/packages/dashboard/src/lib/metrics.server.test.ts b/packages/dashboard/src/lib/metrics.server.test.ts new file mode 100644 index 00000000..873f99dd --- /dev/null +++ b/packages/dashboard/src/lib/metrics.server.test.ts @@ -0,0 +1,85 @@ +import { getBackend } from "./backend"; +import { getMetricsResponse } from "./metrics.server"; +import type { Backend, WorkflowRunCounts } from "openworkflow/internal"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +vi.mock("./backend", () => ({ + getBackend: vi.fn(), +})); + +const mockedGetBackend = vi.mocked(getBackend); + +const ZERO_COUNTS: WorkflowRunCounts = { + pending: 0, + running: 0, + sleeping: 0, + completed: 0, + failed: 0, + canceled: 0, +}; + +describe("getMetricsResponse()", () => { + beforeEach(() => { + mockedGetBackend.mockReset(); + }); + + it("returns Prometheus exposition format with expected metric labels", async () => { + const counts: WorkflowRunCounts = { + ...ZERO_COUNTS, + pending: 3, + running: 1, + completed: 4, + failed: 2, + }; + + const backend: Pick = { + countWorkflowRuns: vi.fn().mockResolvedValue(counts), + }; + mockedGetBackend.mockResolvedValue(backend); + + const response = await getMetricsResponse(); + const body = await response.text(); + + expect(response.status).toBe(200); + expect(response.headers.get("content-type")).toBe( + "text/plain; version=0.0.4; charset=utf-8", + ); + expect(body).toContain("# HELP openworkflow_workflow_runs"); + expect(body).toContain("# TYPE openworkflow_workflow_runs gauge"); + expect(body).toContain('openworkflow_workflow_runs{status="pending"} 3'); + expect(body).toContain('openworkflow_workflow_runs{status="running"} 1'); + expect(body).toContain('openworkflow_workflow_runs{status="sleeping"} 0'); + expect(body).toContain('openworkflow_workflow_runs{status="completed"} 4'); + expect(body).toContain('openworkflow_workflow_runs{status="failed"} 2'); + expect(body).toContain('openworkflow_workflow_runs{status="canceled"} 0'); + }); + + it("calls backend.countWorkflowRuns() on every scrape", async () => { + const backend: Pick = { + countWorkflowRuns: vi.fn().mockResolvedValue(ZERO_COUNTS), + }; + mockedGetBackend.mockResolvedValue(backend); + + await getMetricsResponse(); + await getMetricsResponse(); + + expect(mockedGetBackend).toHaveBeenCalledTimes(2); + expect(backend.countWorkflowRuns).toHaveBeenCalledTimes(2); + }); + + it("returns 500 when backend aggregation fails", async () => { + const backend: Pick = { + countWorkflowRuns: vi + .fn() + .mockRejectedValue(new Error("failed to aggregate")), + }; + mockedGetBackend.mockResolvedValue(backend); + + const response = await getMetricsResponse(); + + expect(response.status).toBe(500); + expect(response.headers.get("content-type")).toBe( + "text/plain; charset=utf-8", + ); + }); +}); diff --git a/packages/dashboard/src/lib/metrics.server.ts b/packages/dashboard/src/lib/metrics.server.ts new file mode 100644 index 00000000..58520a94 --- /dev/null +++ b/packages/dashboard/src/lib/metrics.server.ts @@ -0,0 +1,50 @@ +import { getBackend } from "./backend"; +import type { WorkflowRunCounts } from "openworkflow/internal"; +import { Gauge, Registry } from "prom-client"; + +/** + * Build the Prometheus response for the dashboard metrics endpoint. + * @returns Prometheus response for /metrics + */ +export async function getMetricsResponse(): Promise { + try { + const backend = await getBackend(); + const workflowRunCounts = await backend.countWorkflowRuns(); + + const registry = new Registry(); + registerWorkflowRunCounts(registry, workflowRunCounts); + + return new Response(await registry.metrics(), { + status: 200, + headers: { + "content-type": registry.contentType, + }, + }); + } catch { + return new Response("failed to collect metrics\n", { + status: 500, + headers: { + "content-type": "text/plain; charset=utf-8", + }, + }); + } +} + +function registerWorkflowRunCounts( + registry: Registry, + workflowRunCounts: WorkflowRunCounts, +) { + const workflowRunsGauge = new Gauge({ + name: "openworkflow_workflow_runs", + help: "Current count of workflow runs in each status.", + labelNames: ["status"] as const, + registers: [registry], + }); + + for (const status of Object.keys(workflowRunCounts)) { + workflowRunsGauge.set( + { status }, + workflowRunCounts[status as keyof WorkflowRunCounts], + ); + } +} diff --git a/packages/dashboard/src/routeTree.gen.ts b/packages/dashboard/src/routeTree.gen.ts index 3d296cfc..5831e06b 100644 --- a/packages/dashboard/src/routeTree.gen.ts +++ b/packages/dashboard/src/routeTree.gen.ts @@ -9,9 +9,15 @@ // Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified. import { Route as rootRouteImport } from './routes/__root' +import { Route as MetricsRouteImport } from './routes/metrics' import { Route as IndexRouteImport } from './routes/index' import { Route as RunsRunIdRouteImport } from './routes/runs/$runId' +const MetricsRoute = MetricsRouteImport.update({ + id: '/metrics', + path: '/metrics', + getParentRoute: () => rootRouteImport, +} as any) const IndexRoute = IndexRouteImport.update({ id: '/', path: '/', @@ -25,32 +31,43 @@ const RunsRunIdRoute = RunsRunIdRouteImport.update({ export interface FileRoutesByFullPath { '/': typeof IndexRoute + '/metrics': typeof MetricsRoute '/runs/$runId': typeof RunsRunIdRoute } export interface FileRoutesByTo { '/': typeof IndexRoute + '/metrics': typeof MetricsRoute '/runs/$runId': typeof RunsRunIdRoute } export interface FileRoutesById { __root__: typeof rootRouteImport '/': typeof IndexRoute + '/metrics': typeof MetricsRoute '/runs/$runId': typeof RunsRunIdRoute } export interface FileRouteTypes { fileRoutesByFullPath: FileRoutesByFullPath - fullPaths: '/' | '/runs/$runId' + fullPaths: '/' | '/metrics' | '/runs/$runId' fileRoutesByTo: FileRoutesByTo - to: '/' | '/runs/$runId' - id: '__root__' | '/' | '/runs/$runId' + to: '/' | '/metrics' | '/runs/$runId' + id: '__root__' | '/' | '/metrics' | '/runs/$runId' fileRoutesById: FileRoutesById } export interface RootRouteChildren { IndexRoute: typeof IndexRoute + MetricsRoute: typeof MetricsRoute RunsRunIdRoute: typeof RunsRunIdRoute } declare module '@tanstack/react-router' { interface FileRoutesByPath { + '/metrics': { + id: '/metrics' + path: '/metrics' + fullPath: '/metrics' + preLoaderRoute: typeof MetricsRouteImport + parentRoute: typeof rootRouteImport + } '/': { id: '/' path: '/' @@ -70,6 +87,7 @@ declare module '@tanstack/react-router' { const rootRouteChildren: RootRouteChildren = { IndexRoute: IndexRoute, + MetricsRoute: MetricsRoute, RunsRunIdRoute: RunsRunIdRoute, } export const routeTree = rootRouteImport diff --git a/packages/dashboard/src/routes/metrics.ts b/packages/dashboard/src/routes/metrics.ts new file mode 100644 index 00000000..f87c589f --- /dev/null +++ b/packages/dashboard/src/routes/metrics.ts @@ -0,0 +1,12 @@ +import { getMetricsResponse } from "@/lib/metrics.server"; +import { createFileRoute } from "@tanstack/react-router"; + +export const Route = createFileRoute("/metrics")({ + server: { + handlers: { + GET: async () => { + return await getMetricsResponse(); + }, + }, + }, +}); diff --git a/packages/dashboard/vite.config.ts b/packages/dashboard/vite.config.ts index e53f6bb5..340aa4c1 100644 --- a/packages/dashboard/vite.config.ts +++ b/packages/dashboard/vite.config.ts @@ -18,6 +18,10 @@ const config = defineConfig({ tanstackStart(), viteReact(), ], + server: { + host: true, + allowedHosts: ["host.docker.internal"], + }, }); export default config; diff --git a/packages/docs/docs.json b/packages/docs/docs.json index 05b972f9..3b37864e 100644 --- a/packages/docs/docs.json +++ b/packages/docs/docs.json @@ -45,7 +45,8 @@ "docs/type-safety", "docs/versioning", "docs/canceling", - "docs/production" + "docs/production", + "docs/prometheus" ] }, { diff --git a/packages/docs/docs/dashboard.mdx b/packages/docs/docs/dashboard.mdx index e401863f..e3f9844a 100644 --- a/packages/docs/docs/dashboard.mdx +++ b/packages/docs/docs/dashboard.mdx @@ -36,6 +36,11 @@ The dashboard requires: The dashboard connects to the same backend as your workers. It shows data for the namespace configured in your config file. +## Prometheus Metrics + +The dashboard serves a Prometheus endpoint at `GET /metrics`. See the +[Prometheus guide](/docs/prometheus) for setup and alert examples. + The dashboard does not include built-in authentication. For production use, deploy it behind a VPN, private network, or reverse proxy with Basic Auth. diff --git a/packages/docs/docs/production.mdx b/packages/docs/docs/production.mdx index 75b169bf..f98c2141 100644 --- a/packages/docs/docs/production.mdx +++ b/packages/docs/docs/production.mdx @@ -162,3 +162,9 @@ The dashboard shows: The dashboard does not include built-in authentication. We recommend deploying it behind a VPN, private network, or Basic Auth proxy to prevent unauthorized access. + +### Prometheus Metrics + +The dashboard process exposes a Prometheus-compatible `GET /metrics` endpoint. +See the [Prometheus guide](/docs/prometheus) for scrape configuration and alert +examples. diff --git a/packages/docs/docs/prometheus.mdx b/packages/docs/docs/prometheus.mdx new file mode 100644 index 00000000..966dcdbf --- /dev/null +++ b/packages/docs/docs/prometheus.mdx @@ -0,0 +1,105 @@ +--- +title: Prometheus Metrics +description: Monitor workflow health with Prometheus +--- + +The OpenWorkflow dashboard exposes a `GET /metrics` endpoint that serves +workflow run counts in Prometheus exposition format. Use it to track failures, +detect stuck backlogs, and alert on throughput drops — with no extra services +to deploy. + +## Setup + +### 1. Start the dashboard + + +```bash npm +npx @openworkflow/cli dashboard +``` + +```bash pnpm +pnpx @openworkflow/cli dashboard +``` + +```bash bun +bunx @openworkflow/cli dashboard +``` + + + +### 2. Verify the endpoint + +```bash +curl -s http://localhost:3000/metrics +``` + +You should see output like: + +```text +# HELP openworkflow_workflow_runs Current count of workflow runs in each status. +# TYPE openworkflow_workflow_runs gauge +openworkflow_workflow_runs{status="pending"} 12 +openworkflow_workflow_runs{status="running"} 3 +openworkflow_workflow_runs{status="sleeping"} 1 +openworkflow_workflow_runs{status="completed"} 847 +openworkflow_workflow_runs{status="failed"} 2 +openworkflow_workflow_runs{status="canceled"} 0 +``` + +### 3. Configure Prometheus to scrape the dashboard + +Add a scrape target to your `prometheus.yml`: + +```yaml +scrape_configs: + - job_name: openworkflow + scrape_interval: 15s + static_configs: + - targets: ["localhost:3000"] +``` + +Replace `localhost:3000` with the address where your dashboard is running. + +## Metrics reference + +### `openworkflow_workflow_runs` + +| Field | Value | +| ------ | ------------------------------------------------------------------- | +| Type | Gauge | +| Labels | `status` | +| Values | `pending`, `running`, `sleeping`, `completed`, `failed`, `canceled` | + +Current count of workflow runs in each status. One query is executed per +scrape — there is no caching. + +- **Active statuses** (`pending`, `running`, `sleeping`) — Is work piling up? + Are runs stuck? +- **Terminal statuses** (`completed`, `failed`, `canceled`) — Are failures + increasing? Is throughput steady? + +## Alert examples + +### Failures detected in the last 5 minutes + +```promql +clamp_min(delta(openworkflow_workflow_runs{status="failed"}[5m]), 0) > 0 +``` + +### Failure rate elevated over the last hour + +```promql +clamp_min(delta(openworkflow_workflow_runs{status="failed"}[1h]), 0) > 10 +``` + +### Pending backlog is growing + +```promql +delta(openworkflow_workflow_runs{status="pending"}[10m]) > 0 +``` + +### No completed runs in the last hour + +```promql +clamp_min(delta(openworkflow_workflow_runs{status="completed"}[1h]), 0) == 0 +``` diff --git a/packages/openworkflow/backend.test.ts b/packages/openworkflow/backend.test.ts new file mode 100644 index 00000000..cb735acc --- /dev/null +++ b/packages/openworkflow/backend.test.ts @@ -0,0 +1,50 @@ +import { toWorkflowRunCounts } from "./backend.js"; +import { describe, expect, test } from "vitest"; + +describe("toWorkflowRunCounts", () => { + test("folds legacy succeeded rows into completed regardless of order", () => { + const counts = toWorkflowRunCounts([ + { status: "completed", count: 2 }, + { status: "succeeded", count: 3 }, + ]); + + expect(counts).toEqual({ + pending: 0, + running: 0, + sleeping: 0, + completed: 5, + failed: 0, + canceled: 0, + }); + + const reversedCounts = toWorkflowRunCounts([ + { status: "succeeded", count: 3 }, + { status: "completed", count: 2 }, + ]); + + expect(reversedCounts).toEqual({ + pending: 0, + running: 0, + sleeping: 0, + completed: 5, + failed: 0, + canceled: 0, + }); + }); + + test("ignores unknown statuses", () => { + const counts = toWorkflowRunCounts([ + { status: "pending", count: 1 }, + { status: "unknown_status", count: 99 }, + ]); + + expect(counts).toEqual({ + pending: 1, + running: 0, + sleeping: 0, + completed: 0, + failed: 0, + canceled: 0, + }); + }); +}); diff --git a/packages/openworkflow/backend.testsuite.ts b/packages/openworkflow/backend.testsuite.ts index 41dfd9bf..ab951ec2 100644 --- a/packages/openworkflow/backend.testsuite.ts +++ b/packages/openworkflow/backend.testsuite.ts @@ -607,6 +607,113 @@ export function testBackend(options: TestBackendOptions): void { }); }); + describe("countWorkflowRuns()", () => { + test("returns zero counts for an empty backend", async () => { + const backend = await setup(); + + expect(await backend.countWorkflowRuns()).toEqual({ + pending: 0, + running: 0, + sleeping: 0, + completed: 0, + failed: 0, + canceled: 0, + }); + + await teardown(backend); + }); + + test("returns counts grouped by workflow run status", async () => { + const backend = await setup(); + + const runningRun = await createClaimedWorkflowRun(backend); + const runningWorkerId = runningRun.workerId; + if (!runningWorkerId) throw new Error("Expected workerId"); + + const sleepingRun = await createClaimedWorkflowRun(backend); + const sleepingWorkerId = sleepingRun.workerId; + if (!sleepingWorkerId) throw new Error("Expected workerId"); + await backend.sleepWorkflowRun({ + workflowRunId: sleepingRun.id, + workerId: sleepingWorkerId, + availableAt: new Date(Date.now() + 60_000), + }); + + const completedRun = await createClaimedWorkflowRun(backend); + const completedWorkerId = completedRun.workerId; + if (!completedWorkerId) throw new Error("Expected workerId"); + await backend.completeWorkflowRun({ + workflowRunId: completedRun.id, + workerId: completedWorkerId, + output: null, + }); + + const failedRun = await createClaimedWorkflowRun(backend); + const failedWorkerId = failedRun.workerId; + if (!failedWorkerId) throw new Error("Expected workerId"); + await backend.failWorkflowRun({ + workflowRunId: failedRun.id, + workerId: failedWorkerId, + error: { message: "failed run" }, + retryPolicy: { + ...DEFAULT_WORKFLOW_RETRY_POLICY, + maximumAttempts: 1, + }, + }); + + const canceledRun = await createPendingWorkflowRun(backend); + await backend.cancelWorkflowRun({ workflowRunId: canceledRun.id }); + + await createPendingWorkflowRun(backend); + + expect(await backend.countWorkflowRuns()).toEqual({ + pending: 1, + running: 1, + sleeping: 1, + completed: 1, + failed: 1, + canceled: 1, + }); + + await teardown(backend); + }); + + test("updates counts when workflow runs transition statuses", async () => { + const backend = await setup(); + + const pendingRun = await createPendingWorkflowRun(backend); + expect(await backend.countWorkflowRuns()).toMatchObject({ + pending: 1, + running: 0, + }); + + const workerId = randomUUID(); + const claimed = await backend.claimWorkflowRun({ + workerId, + leaseDurationMs: 60_000, + }); + expect(claimed?.id).toBe(pendingRun.id); + + expect(await backend.countWorkflowRuns()).toMatchObject({ + pending: 0, + running: 1, + }); + + await backend.completeWorkflowRun({ + workflowRunId: pendingRun.id, + workerId, + output: null, + }); + + expect(await backend.countWorkflowRuns()).toMatchObject({ + running: 0, + completed: 1, + }); + + await teardown(backend); + }); + }); + describe("claimWorkflowRun()", () => { // because claims involve timing and leases, we create and teardown a new // namespaced backend instance for each test diff --git a/packages/openworkflow/backend.ts b/packages/openworkflow/backend.ts index fdbc432f..eb314aaa 100644 --- a/packages/openworkflow/backend.ts +++ b/packages/openworkflow/backend.ts @@ -1,7 +1,7 @@ import type { SerializedError } from "./core/error.js"; import { JsonValue } from "./core/json.js"; import type { StepAttempt, StepAttemptContext, StepKind } from "./core/step.js"; -import type { WorkflowRun } from "./core/workflow.js"; +import type { WorkflowRun, WorkflowRunStatus } from "./core/workflow.js"; import type { RetryPolicy } from "./workflow.js"; export const DEFAULT_NAMESPACE_ID = "default"; @@ -21,6 +21,7 @@ export interface Backend { listWorkflowRuns( params: Readonly, ): Promise>; + countWorkflowRuns(): Promise; claimWorkflowRun( params: Readonly, ): Promise; @@ -166,3 +167,40 @@ export interface PaginatedResponse { prev: string | null; }; } + +export type WorkflowRunCounts = Omit< + Record, + "succeeded" +>; + +/** + * Convert status-count rows from a `GROUP BY "status"` query into a + * typed {@link WorkflowRunCounts} object. + * @param rows - Rows from the database query + * @returns Workflow run counts keyed by status + */ +export function toWorkflowRunCounts( + rows: readonly { status: string; count: number | string }[], +): WorkflowRunCounts { + const counts: WorkflowRunCounts = { + pending: 0, + running: 0, + sleeping: 0, + completed: 0, + failed: 0, + canceled: 0, + }; + + for (const row of rows) { + // 'succeeded' status is deprecated, fold into 'completed' + if (row.status === "succeeded") { + counts.completed += Number(row.count); + } + + if (Object.hasOwn(counts, row.status)) { + counts[row.status as keyof WorkflowRunCounts] += Number(row.count); + } + } + + return counts; +} diff --git a/packages/openworkflow/postgres/backend.ts b/packages/openworkflow/postgres/backend.ts index 359b5796..95cbf286 100644 --- a/packages/openworkflow/postgres/backend.ts +++ b/packages/openworkflow/postgres/backend.ts @@ -1,7 +1,9 @@ import { + toWorkflowRunCounts, DEFAULT_NAMESPACE_ID, DEFAULT_RUN_IDEMPOTENCY_PERIOD_MS, Backend, + WorkflowRunCounts, CancelWorkflowRunParams, ClaimWorkflowRunParams, CreateStepAttemptParams, @@ -276,6 +278,19 @@ export class BackendPostgres implements Backend { return whereClause; } + async countWorkflowRuns(): Promise { + const workflowRunsTable = this.workflowRunsTable(); + + const rows = await this.pg<{ status: string; count: string }[]>` + SELECT "status", COUNT(*) AS "count" + FROM ${workflowRunsTable} + WHERE "namespace_id" = ${this.namespaceId} + GROUP BY "status" + `; + + return toWorkflowRunCounts(rows); + } + async claimWorkflowRun( params: ClaimWorkflowRunParams, ): Promise { diff --git a/packages/openworkflow/sqlite/backend.ts b/packages/openworkflow/sqlite/backend.ts index 30641213..33e9744e 100644 --- a/packages/openworkflow/sqlite/backend.ts +++ b/packages/openworkflow/sqlite/backend.ts @@ -1,4 +1,5 @@ import { + WorkflowRunCounts, DEFAULT_NAMESPACE_ID, DEFAULT_RUN_IDEMPOTENCY_PERIOD_MS, Backend, @@ -18,6 +19,7 @@ import { RescheduleWorkflowRunAfterFailedStepAttemptParams, CompleteWorkflowRunParams, SleepWorkflowRunParams, + toWorkflowRunCounts, } from "../backend.js"; import { wrapError } from "../core/error.js"; import { JsonValue } from "../core/json.js"; @@ -585,6 +587,22 @@ export class BackendSqlite implements Backend { return updated; } + // eslint-disable-next-line @typescript-eslint/require-await + async countWorkflowRuns(): Promise { + const stmt = this.db.prepare(` + SELECT "status", COUNT(*) AS "count" + FROM "workflow_runs" + WHERE "namespace_id" = ? + GROUP BY "status" + `); + + const rows = stmt.all(this.namespaceId) as { + status: string; + count: number; + }[]; + return toWorkflowRunCounts(rows); + } + listWorkflowRuns( params: ListWorkflowRunsParams, ): Promise> {