From a666252864669e69ad3a674106d0e65826061f49 Mon Sep 17 00:00:00 2001 From: Kris Zyp Date: Thu, 21 May 2026 23:01:36 -0600 Subject: [PATCH 1/2] feat(analytics): make retention windows configurable Add analytics.rawRetentionMs and analytics.aggregateRetentionMs config params to let operators age out hdb_analytics data faster than the hardcoded defaults (1h raw / 1y aggregate). Defaults are unchanged so existing deployments are unaffected. Raw retention is clamped to at least the aggregation period to prevent raw records from being deleted before they can be rolled up (avoids silent data loss when the value is set below the aggregation cadence). Closes: HarperFast/harper#566 Jira: CORE-3074 Co-Authored-By: Claude Sonnet 4.6 --- config-root.schema.json | 10 ++++++++++ resources/analytics/write.ts | 8 ++++++-- utility/hdbTerms.ts | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/config-root.schema.json b/config-root.schema.json index 4e603f659..d4f89c1b6 100644 --- a/config-root.schema.json +++ b/config-root.schema.json @@ -577,6 +577,16 @@ "type": "number", "description": "Number of aggregation cycles between node storage measurements. 0 to disable. Default: 10" }, + "rawRetentionMs": { + "type": "number", + "minimum": 0, + "description": "Milliseconds to retain raw (pre-aggregation) analytics. Clamped to at least aggregatePeriod to prevent data loss. Default: 3600000 (1 hour)" + }, + "aggregateRetentionMs": { + "type": "number", + "minimum": 0, + "description": "Milliseconds to retain aggregated analytics. Default: 31536000000 (1 year)" + }, "logging": { "$ref": "#/definitions/loggerConfig" } } } diff --git a/resources/analytics/write.ts b/resources/analytics/write.ts index 6fc509a6f..afd2b5965 100644 --- a/resources/analytics/write.ts +++ b/resources/analytics/write.ts @@ -715,11 +715,15 @@ function startScheduledTasks() { nodeStorageInterval = envGet(CONFIG_PARAMS.ANALYTICS_STORAGEINTERVAL) ?? DEFAULT_STORAGE_INTERVAL; const AGGREGATE_PERIOD = envGet(CONFIG_PARAMS.ANALYTICS_AGGREGATEPERIOD) * 1000; if (AGGREGATE_PERIOD) { + // Clamp raw retention to at least one full aggregation period so raw records + // are never deleted before they can be rolled up. + const rawRetentionMs = Math.max(envGet(CONFIG_PARAMS.ANALYTICS_RAWRETENTIONMS) ?? RAW_EXPIRATION, AGGREGATE_PERIOD); + const aggregateRetentionMs = envGet(CONFIG_PARAMS.ANALYTICS_AGGREGATERETENTIONMS) ?? AGGREGATE_EXPIRATION; setInterval( async () => { await aggregation(analyticsDelay, AGGREGATE_PERIOD); - await cleanup(getRawAnalyticsTable(), RAW_EXPIRATION); - await cleanup(getAnalyticsTable(), AGGREGATE_EXPIRATION); + await cleanup(getRawAnalyticsTable(), rawRetentionMs); + await cleanup(getAnalyticsTable(), aggregateRetentionMs); }, Math.min(AGGREGATE_PERIOD / 2, 0x7fffffff) ).unref(); diff --git a/utility/hdbTerms.ts b/utility/hdbTerms.ts index 26f2b9e55..b7ce6bf0b 100644 --- a/utility/hdbTerms.ts +++ b/utility/hdbTerms.ts @@ -426,6 +426,8 @@ export const LEGACY_CONFIG_PARAMS = { */ export const CONFIG_PARAMS = { ANALYTICS_AGGREGATEPERIOD: 'analytics_aggregatePeriod', + ANALYTICS_AGGREGATERETENTIONMS: 'analytics_aggregateRetentionMs', + ANALYTICS_RAWRETENTIONMS: 'analytics_rawRetentionMs', ANALYTICS_REPLICATE: 'analytics_replicate', ANALYTICS_STORAGEINTERVAL: 'analytics_storageInterval', AUTHENTICATION_AUTHORIZELOCAL: 'authentication_authorizeLocal', From fe8f8b036384da0aca556ebfb80f20d6130b145d Mon Sep 17 00:00:00 2001 From: Kris Zyp Date: Thu, 21 May 2026 23:09:21 -0600 Subject: [PATCH 2/2] fix(analytics): treat aggregateRetentionMs=0 as disabled, not mass-delete Setting aggregateRetentionMs to 0 would pass Date.now()-0=Date.now() to cleanup(), deleting every aggregate record. Treat 0 as "keep forever" to match the storageInterval: 0 = disabled convention. Co-Authored-By: Claude Sonnet 4.6 --- config-root.schema.json | 2 +- resources/analytics/write.ts | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/config-root.schema.json b/config-root.schema.json index d4f89c1b6..73c2c5226 100644 --- a/config-root.schema.json +++ b/config-root.schema.json @@ -585,7 +585,7 @@ "aggregateRetentionMs": { "type": "number", "minimum": 0, - "description": "Milliseconds to retain aggregated analytics. Default: 31536000000 (1 year)" + "description": "Milliseconds to retain aggregated analytics. 0 disables cleanup (keep forever). Default: 31536000000 (1 year)" }, "logging": { "$ref": "#/definitions/loggerConfig" } } diff --git a/resources/analytics/write.ts b/resources/analytics/write.ts index afd2b5965..cad92fc14 100644 --- a/resources/analytics/write.ts +++ b/resources/analytics/write.ts @@ -723,7 +723,8 @@ function startScheduledTasks() { async () => { await aggregation(analyticsDelay, AGGREGATE_PERIOD); await cleanup(getRawAnalyticsTable(), rawRetentionMs); - await cleanup(getAnalyticsTable(), aggregateRetentionMs); + // 0 means "keep forever" — skip aggregate cleanup, matching storageInterval: 0 convention + if (aggregateRetentionMs) await cleanup(getAnalyticsTable(), aggregateRetentionMs); }, Math.min(AGGREGATE_PERIOD / 2, 0x7fffffff) ).unref();