From 2325eee2167aad162bbf87928df4a93d18ae8123 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Mon, 9 Mar 2026 09:20:22 -0400 Subject: [PATCH] Store job state in MongoDB instead of PVC - Replace file-based state storage with MongoDB collection (job_state) - Remove PVC configuration from cronjobs.yml - Make shouldRun() async to support database queries This fixes the bi-weekly timing issue where the last-run state was lost when the ephemeral PVC was wiped between job runs. --- github-metrics/check-last-run.js | 89 ++++++++++++++++++++------------ github-metrics/cronjobs.yml | 9 +--- github-metrics/index.js | 2 +- 3 files changed, 58 insertions(+), 42 deletions(-) diff --git a/github-metrics/check-last-run.js b/github-metrics/check-last-run.js index 104e157..b6a8424 100644 --- a/github-metrics/check-last-run.js +++ b/github-metrics/check-last-run.js @@ -1,30 +1,43 @@ -import fs from 'fs'; -import { writeFile, mkdir } from 'fs/promises'; -import path from 'path'; - -// Path to the state file (mounted from persistent volume) -// Can be overridden via STATE_FILE_PATH environment variable -const STATE_FILE_PATH = process.env.STATE_FILE_PATH || '/data/last-run.json'; +import { MongoClient } from 'mongodb'; // Minimum days between runs (13 days to account for timing variations with weekly Monday runs) // Can be overridden via MIN_DAYS_BETWEEN_RUNS environment variable const MIN_DAYS_BETWEEN_RUNS = parseInt(process.env.MIN_DAYS_BETWEEN_RUNS || '13', 10); +// State document identifier +const STATE_DOC_ID = 'last-run-state'; + +/** + * Get the MongoDB collection for storing state + * @param {MongoClient} client - MongoDB client + * @returns {Collection} The state collection + */ +function getStateCollection(client) { + const database = client.db('github_metrics'); + return database.collection('job_state'); +} + /** * Check if enough time has passed since the last run - * @returns {boolean} true if should run, false if should skip + * @returns {Promise} true if should run, false if should skip */ -export function shouldRun() { +export async function shouldRun() { + const uri = process.env.ATLAS_CONNECTION_STRING; + const client = new MongoClient(uri); + try { - // Check if state file exists - if (!fs.existsSync(STATE_FILE_PATH)) { + await client.connect(); + const collection = getStateCollection(client); + + // Find the last run state document + const stateDoc = await collection.findOne({ _id: STATE_DOC_ID }); + + if (!stateDoc) { console.log('No previous run found. Running for the first time.'); return true; } - // Read the last run timestamp - const stateData = JSON.parse(fs.readFileSync(STATE_FILE_PATH, 'utf8')); - const lastRunTime = new Date(stateData.lastRun); + const lastRunTime = new Date(stateDoc.lastRun); const now = new Date(); // Calculate days since last run @@ -35,44 +48,54 @@ export function shouldRun() { console.log(`Minimum days required: ${MIN_DAYS_BETWEEN_RUNS}`); if (daysSinceLastRun < MIN_DAYS_BETWEEN_RUNS) { - console.log(`⏭️ Skipping run - only ${daysSinceLastRun.toFixed(2)} days since last run (need ${MIN_DAYS_BETWEEN_RUNS})`); + console.log(`Skipping run - only ${daysSinceLastRun.toFixed(2)} days since last run (need ${MIN_DAYS_BETWEEN_RUNS})`); return false; } - console.log(`✅ Proceeding with run - ${daysSinceLastRun.toFixed(2)} days since last run`); + console.log(`Proceeding with run - ${daysSinceLastRun.toFixed(2)} days since last run`); return true; } catch (error) { console.error('Error checking last run time:', error.message); - console.log('Proceeding with run due to error reading state file'); - return true; // Run if we can't read the state file + console.log('Proceeding with run due to error reading state from database'); + return true; // Run if we can't read the state + } finally { + await client.close(); } } /** - * Update the state file with the current timestamp + * Update the state in MongoDB with the current timestamp */ export async function updateLastRun() { + const uri = process.env.ATLAS_CONNECTION_STRING; + const client = new MongoClient(uri); + try { + await client.connect(); + const collection = getStateCollection(client); const now = new Date(); - const stateData = { - lastRun: now.toISOString(), - timestamp: now.getTime() - }; - - // Ensure the directory exists - const dir = path.dirname(STATE_FILE_PATH); - if (!fs.existsSync(dir)) { - await mkdir(dir, { recursive: true }); - } - // Write the state file - await writeFile(STATE_FILE_PATH, JSON.stringify(stateData, null, 2), 'utf8'); - console.log(`✅ Updated last run timestamp: ${now.toISOString()}`); + // Upsert the state document + await collection.updateOne( + { _id: STATE_DOC_ID }, + { + $set: { + lastRun: now.toISOString(), + timestamp: now.getTime(), + updatedAt: now + } + }, + { upsert: true } + ); + + console.log(`Updated last run timestamp in database: ${now.toISOString()}`); } catch (error) { console.error('Error updating last run time:', error.message); - // Don't throw - we don't want to fail the job just because we can't write the state file + // Don't throw - we don't want to fail the job just because we can't write the state + } finally { + await client.close(); } } diff --git a/github-metrics/cronjobs.yml b/github-metrics/cronjobs.yml index d43dfb4..ba7cb15 100644 --- a/github-metrics/cronjobs.yml +++ b/github-metrics/cronjobs.yml @@ -39,11 +39,4 @@ cronJobs: runAsUser: 1001 runAsGroup: 1001 fsGroup: 1001 - # Persistent volume to store last run timestamp - persistence: - enabled: true - name: github-metrics-state - storageClass: "kops-csi-1-21" - accessMode: ReadWriteOnce - size: 1Gi - mountPath: /data + # Last run state is now stored in MongoDB instead of a PVC diff --git a/github-metrics/index.js b/github-metrics/index.js index 5026de0..b7ec472 100644 --- a/github-metrics/index.js +++ b/github-metrics/index.js @@ -50,7 +50,7 @@ async function main() { try { // Check if enough time has passed since last run - if (!shouldRun()) { + if (!await shouldRun()) { console.log('Exiting - not enough time has passed since last run'); await sendSlackNotification({ skipped: true }); process.exit(0);