From fe5e1f40b22cb6183b864af5dd0e3977c5e59af1 Mon Sep 17 00:00:00 2001 From: Kenny Gutierrez Date: Sun, 31 May 2026 15:21:02 -0500 Subject: [PATCH 1/3] Batch the reporting-data purge so it survives large tables (#197) On sites that have been collecting CSP reports for a while, SecurityReportTo can hold millions of rows. The purge job's single unbounded DELETE exceeds the SqlCommand timeout and rolls back, so the table never shrinks. Changes: - Add ReportingOptions.PurgeBatchSize (default 5000; <= 0 restores the original single-DELETE behavior). - IReportingRepository.PurgeReporingData gains an optional int? batchSize. Source-compatible with existing callers. - SQL provider switches to DELETE TOP (@BatchSize) when a batch size is supplied; ElasticSearch ignores it since DeleteByQuery already handles bulk deletes. - PurgeReporintgDataJob loops batch-by-batch, reports progress via OnStatusChanged, and is now IsStoppable. Co-authored-by: Kenny Gutierrez --- .../Configuration/ReportingOptions.cs | 7 ++++ .../SqlDatabaseReportingRepository.cs | 20 +++++++++-- .../ElasticSearchReportingRepository.cs | 7 +++- .../Reporting/IReportingRepository.cs | 10 +++++- .../Reporting/Jobs/PurgeReporintgDataJob.cs | 36 +++++++++++++++++-- 5 files changed, 73 insertions(+), 7 deletions(-) diff --git a/src/Jhoose.Security/Configuration/ReportingOptions.cs b/src/Jhoose.Security/Configuration/ReportingOptions.cs index fed581cb..16b4ad1e 100644 --- a/src/Jhoose.Security/Configuration/ReportingOptions.cs +++ b/src/Jhoose.Security/Configuration/ReportingOptions.cs @@ -6,6 +6,13 @@ public class ReportingOptions { public int RetainDays { get; set; } = 30; + /// + /// Maximum rows the purge job deletes per batch. Set to 0 to disable batching + /// (issue a single unbounded DELETE, matching pre-batching behavior). + /// Only the SQL provider honors this; ElasticSearch handles bulk deletes natively. + /// + public int PurgeBatchSize { get; set; } = 5000; + public string UseProvider { get; set; } = string.Empty; public string ConnectionString { get; set; } = string.Empty; diff --git a/src/Jhoose.Security/Features/Reporting/Database/SqlDatabaseReportingRepository.cs b/src/Jhoose.Security/Features/Reporting/Database/SqlDatabaseReportingRepository.cs index b1f71f13..1116eb1e 100644 --- a/src/Jhoose.Security/Features/Reporting/Database/SqlDatabaseReportingRepository.cs +++ b/src/Jhoose.Security/Features/Reporting/Database/SqlDatabaseReportingRepository.cs @@ -100,13 +100,27 @@ await sqlHelper.ExecuteStoredProcedure("GetSecurityReportSummary", parameters, ( } } - public async Task PurgeReporingData(DateTime beforeDate) + public async Task PurgeReporingData(DateTime beforeDate, int? batchSize = null) { try { - var sqlCommand = "DELETE FROM SecurityReportTo WHERE RecievedAt < @BeforeDate"; + // Unbatched path preserves prior behavior for callers that pass no batch size. + if (!batchSize.HasValue || batchSize.Value <= 0) + { + var sqlCommand = "DELETE FROM SecurityReportTo WHERE RecievedAt < @BeforeDate"; + + return await sqlHelper.ExecuteNonQuery( + sqlCommand, + sqlHelper.CreateParameter("BeforeDate", SqlDbType.DateTime, beforeDate)); + } + + // Single batch: caller is expected to loop until rows == 0. Keeps each + // statement's transaction small enough to commit before the SqlCommand + // timeout, instead of rolling back a multi-hour DELETE. + var batchedSql = "DELETE TOP (@BatchSize) FROM SecurityReportTo WHERE RecievedAt < @BeforeDate"; return await sqlHelper.ExecuteNonQuery( - sqlCommand, + batchedSql, + sqlHelper.CreateParameter("BatchSize", SqlDbType.Int, batchSize.Value), sqlHelper.CreateParameter("BeforeDate", SqlDbType.DateTime, beforeDate)); } catch (Exception ex) diff --git a/src/Jhoose.Security/Features/Reporting/ElasticSearch/ElasticSearchReportingRepository.cs b/src/Jhoose.Security/Features/Reporting/ElasticSearch/ElasticSearchReportingRepository.cs index 6221d7a7..6c788d72 100644 --- a/src/Jhoose.Security/Features/Reporting/ElasticSearch/ElasticSearchReportingRepository.cs +++ b/src/Jhoose.Security/Features/Reporting/ElasticSearch/ElasticSearchReportingRepository.cs @@ -119,8 +119,13 @@ public async Task GetDashboardSummary(DashboardSummary summary return summary; } - public async Task PurgeReporingData(DateTime beforeDate) + public async Task PurgeReporingData(DateTime beforeDate, int? batchSize = null) { + // batchSize is ignored: Elasticsearch DeleteByQuery handles large deletions + // server-side without the rollback risk SQL has, so the caller's batching + // loop is unnecessary here. The parameter exists only to satisfy the interface. + _ = batchSize; + var response = await this.client.Value.DeleteByQueryAsync>(d => d.Query(query => query.Bool(b => b.Must(m => m.Range(r => r.DateRange(dr => dr.Field(f => f.RecievedAt).Gte(beforeDate))))))); var deletedCount = response.Deleted ?? 0; diff --git a/src/Jhoose.Security/Features/Reporting/IReportingRepository.cs b/src/Jhoose.Security/Features/Reporting/IReportingRepository.cs index e6f3985c..cdf523df 100644 --- a/src/Jhoose.Security/Features/Reporting/IReportingRepository.cs +++ b/src/Jhoose.Security/Features/Reporting/IReportingRepository.cs @@ -18,7 +18,15 @@ public interface IReportingRepository Task GetDashboardSummary(DashboardSummary summary); - Task PurgeReporingData(DateTime beforeDate); + /// + /// Deletes reporting rows older than . + /// When is provided and greater than zero, providers + /// that support it should delete at most that many rows per call so callers can + /// loop and bound the work each query does (avoiding command-timeout rollbacks on + /// large tables). When null, the call behaves as before and deletes everything in + /// one statement. + /// + Task PurgeReporingData(DateTime beforeDate, int? batchSize = null); Task Search(CspSearchParams searchParams); } \ No newline at end of file diff --git a/src/Jhoose.Security/Features/Reporting/Jobs/PurgeReporintgDataJob.cs b/src/Jhoose.Security/Features/Reporting/Jobs/PurgeReporintgDataJob.cs index 87185028..e6e3e86a 100644 --- a/src/Jhoose.Security/Features/Reporting/Jobs/PurgeReporintgDataJob.cs +++ b/src/Jhoose.Security/Features/Reporting/Jobs/PurgeReporintgDataJob.cs @@ -16,6 +16,7 @@ public class PurgeReporintgDataJob : ScheduledJobBase private readonly IReportingRepositoryFactory reportingRepositoryFactory; private readonly IOptions options; private readonly ILogger logger; + private bool stopSignaled; public PurgeReporintgDataJob(IReportingRepositoryFactory reportingRepositoryFactory, IOptions options, @@ -24,8 +25,11 @@ public PurgeReporintgDataJob(IReportingRepositoryFactory reportingRepositoryFact this.reportingRepositoryFactory = reportingRepositoryFactory; this.options = options; this.logger = logger; + this.IsStoppable = true; } + public override void Stop() => stopSignaled = true; + public override string Execute() { var reportingRepository = reportingRepositoryFactory.GetReportingRepository(); @@ -42,9 +46,37 @@ public override string Execute() } var beforeDate = DateTime.UtcNow.AddDays(options.Value.RetainDays * -1); - var purged = reportingRepository.PurgeReporingData(beforeDate).Result; + var batchSize = options.Value.PurgeBatchSize; + + // PurgeBatchSize <= 0 disables batching and preserves the original + // single-DELETE behavior for anyone who relied on it. + if (batchSize <= 0) + { + var purgedOnce = reportingRepository.PurgeReporingData(beforeDate).Result; + return $"Purged {purgedOnce} records, from before {beforeDate}"; + } + + var totalPurged = 0; + var batches = 0; + + OnStatusChanged($"Purging reporting rows older than {beforeDate:u} in batches of {batchSize}..."); + + while (!stopSignaled) + { + var purgedInBatch = reportingRepository.PurgeReporingData(beforeDate, batchSize).Result; + if (purgedInBatch <= 0) + { + break; + } + + totalPurged += purgedInBatch; + batches++; + OnStatusChanged($"Batch {batches}: purged {purgedInBatch} (total {totalPurged})."); + } - return $"Purged {purged} records, from before {beforeDate}"; + return stopSignaled + ? $"Stopped. Purged {totalPurged} records, from before {beforeDate} across {batches} batches." + : $"Purged {totalPurged} records, from before {beforeDate} across {batches} batches."; } catch (Exception ex) { From ddc633e47a5b7d22c603c461e64c911126a15617 Mon Sep 17 00:00:00 2001 From: Andy Markham Date: Tue, 2 Jun 2026 19:41:29 +0100 Subject: [PATCH 2/3] Bump version to 3.2.0 and update dependencies for CMS13 support --- .github/workflows/build-jhoose-security-cms13.yml | 4 ++-- .github/workflows/build-jhoose-security.yml | 4 ++-- README.md | 15 +++++++++++++-- src/Jhoose.Security/Jhoose.Security.csproj | 5 +++-- src/Sample/CMS13/alloy13preview.csproj | 8 ++++---- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-jhoose-security-cms13.yml b/.github/workflows/build-jhoose-security-cms13.yml index e46b7118..dc41e430 100644 --- a/.github/workflows/build-jhoose-security-cms13.yml +++ b/.github/workflows/build-jhoose-security-cms13.yml @@ -8,8 +8,8 @@ on: workflow_dispatch: env: - BUILD_NO: 3.1.0.${{ github.run_number }} - BUILD_NO_PRE: 3.1.0-rc.${{ github.run_number }} + BUILD_NO: 3.2.0.${{ github.run_number }} + BUILD_NO_PRE: 3.2.0-rc.${{ github.run_number }} jobs: build: diff --git a/.github/workflows/build-jhoose-security.yml b/.github/workflows/build-jhoose-security.yml index 95f28421..6102d63d 100644 --- a/.github/workflows/build-jhoose-security.yml +++ b/.github/workflows/build-jhoose-security.yml @@ -8,8 +8,8 @@ on: workflow_dispatch: env: - BUILD_NO: 3.1.0.${{ github.run_number }} - BUILD_NO_PRE: 3.1.0-rc.${{ github.run_number }} + BUILD_NO: 3.2.0.${{ github.run_number }} + BUILD_NO_PRE: 3.2.0-rc.${{ github.run_number }} jobs: build: diff --git a/README.md b/README.md index d0858902..b473f6b3 100644 --- a/README.md +++ b/README.md @@ -305,5 +305,16 @@ X-API-Key: ... |2.6.2|Fixed another bug with (CRLF in header values)| |2.6.3|Fixed performance issues with the reporting API.
Fixed a race condition that caused the nonce to leak across requests under high load.| |3.0.0|Added multisite support, CSP and Permissions Policy, Security Headers can now be configured per site. | - |3.0.4|Fixed issue with the files not being copied to the output directory when building the project, this was causing the module to not work when installed from NuGet.| - |3.1.0| Added CMS13 Support.| \ No newline at end of file + |3.0.4 |Fixed issue with the files not being copied to the output directory when building the project, this was causing the module to not work when installed from NuGet.| + |3.1.0 | Added CMS13 Support.| + |3.2.0 | Updated Purge scheduled job to run in batches : thanks @kennygutierrez + + + --- + ## Contributors + +https://github.com/Doom-83 +https://github.com/neorth +https://github.com/kennygutierrez + +Thanks for all the support, suggestions, features and bugfixes \ No newline at end of file diff --git a/src/Jhoose.Security/Jhoose.Security.csproj b/src/Jhoose.Security/Jhoose.Security.csproj index eb97d26c..c1ffc2d3 100644 --- a/src/Jhoose.Security/Jhoose.Security.csproj +++ b/src/Jhoose.Security/Jhoose.Security.csproj @@ -36,7 +36,7 @@ https://github.com/andrewmarkham/contentsecuritypolicy https://github.com/andrewmarkham/contentsecuritypolicy README.md - 3.1.0.0 + 3.2.0.0 Andrew Markham Interface to manage Content Security Policy, Permissions Policy and OWASP Recommended response headers Jhoose Security @@ -81,6 +81,7 @@ 3.0.0 - Added multisite support, CSP and Permissions Policy, Security Headers can now be configured per site. 3.0.4 - Fixed issue with the files not being copied to the output directory when building the project, this was causing the module to not work when installed from NuGet. 3.1.0 - Added support for CMS13. + 3.2.0 - Updated Purge scheduled job to run in batches : thanks @kennygutierrez true @@ -93,7 +94,7 @@ https://nuget.pkg.github.com/andrewmarkham/index.json Debug;Release;PreRelease - 3.1.0.0 + 3.2.0.0 1587, 1591 diff --git a/src/Sample/CMS13/alloy13preview.csproj b/src/Sample/CMS13/alloy13preview.csproj index 7feb27a8..40e148e7 100644 --- a/src/Sample/CMS13/alloy13preview.csproj +++ b/src/Sample/CMS13/alloy13preview.csproj @@ -13,10 +13,10 @@ - - - - + + + + From 15bfadf9076cd4f284d03d32c61976b105bee29c Mon Sep 17 00:00:00 2001 From: Andy Markham Date: Tue, 2 Jun 2026 19:55:58 +0100 Subject: [PATCH 3/3] remove secrets --- src/Sample/CMS13/appsettings.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Sample/CMS13/appsettings.json b/src/Sample/CMS13/appsettings.json index dcab11b7..f0d65b18 100644 --- a/src/Sample/CMS13/appsettings.json +++ b/src/Sample/CMS13/appsettings.json @@ -12,9 +12,9 @@ "ContentGraph": { "GatewayAddress": "https://cg.optimizely.com", "AllowSendingLog": "true", - "SingleKey": "CpnTa2chhSPeojBE3CVx1gyYIzfPWRkLYp4U3cxxAOUCjsfr", - "AppKey": "DgbIzCsKMBRSL4BbIfbcDBaLUPBra4IlxkJX1FtGNyDTIteU", - "Secret": "19ee4CmJyWlCjwS88fSkIVjrlKPaHFfoeANX86UWMyHu6QHw4eWhbPEB3m4sWXXL" + "SingleKey": "...", + "AppKey": "...", + "Secret": "..." } } } \ No newline at end of file