From eebeb52070cca7e9b2510e65f543079d0d0b751e Mon Sep 17 00:00:00 2001 From: nscuro Date: Thu, 4 Jun 2026 14:09:01 +0200 Subject: [PATCH] v4-migrator: Add completion logs and annotate row-count diffs in verify phase * Adds explicit logs after completion of each stage, including how long they took to run. * Annotates verify output with delta explanations. This effectively repeats information from the upgrade guide so it's kept brief. Note that by design, there can't be any delta in the load phase, because it uses plain `INSERT ... SELECT ...` statements with no `ON CONFLICT DO NOTHING` clause. So there is no way to drop rows there. Rows dropped during the transform phase are all intentional, i.e. for de-duplication. Trying to figure out which row reduction is intentional and which is accidental would blow up the complexity of this tool without need. Signed-off-by: nscuro --- .../v4migrator/cli/RunCommand.java | 6 ++ .../v4migrator/extract/ExtractPhase.java | 11 ++- .../v4migrator/load/LoadPhase.java | 19 +++- .../v4migrator/transform/TransformPhase.java | 11 ++- .../v4migrator/verify/RowCountNotes.java | 63 ++++++++++++ .../v4migrator/verify/VerifyPhase.java | 66 ++++++++++++- .../dependencytrack/v4migrator/VerifyIT.java | 38 +++++++- .../v4migrator/verify/VerifyNoteTest.java | 97 +++++++++++++++++++ 8 files changed, 300 insertions(+), 11 deletions(-) create mode 100644 support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/RowCountNotes.java create mode 100644 support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/verify/VerifyNoteTest.java diff --git a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/cli/RunCommand.java b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/cli/RunCommand.java index 24325988fe..dc56480c9e 100644 --- a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/cli/RunCommand.java +++ b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/cli/RunCommand.java @@ -26,6 +26,8 @@ import org.dependencytrack.v4migrator.load.LoadPhase; import org.dependencytrack.v4migrator.transform.TransformPhase; import org.jdbi.v3.core.Jdbi; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine.Command; import picocli.CommandLine.Mixin; import picocli.CommandLine.Option; @@ -33,6 +35,8 @@ @Command(name = "run", description = "extract + transform + load in one go.") public final class RunCommand extends AbstractMigratorCommand { + private static final Logger LOGGER = LoggerFactory.getLogger(RunCommand.class); + @Mixin SourceOptions sourceOpts = new SourceOptions(); @@ -58,6 +62,8 @@ protected int execute(final Jdbi target) throws Exception { metricsOpts.metricsRetentionDays).run(); new TransformPhase(global, target).run(); new LoadPhase(global, target, dropStaging).run(); + LOGGER.info("Migration completed: extract + transform + load finished. " + + "Run 'verify' to review row counts and probes."); return ExitCode.OK; } diff --git a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/extract/ExtractPhase.java b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/extract/ExtractPhase.java index bca964abe7..b1727f9998 100644 --- a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/extract/ExtractPhase.java +++ b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/extract/ExtractPhase.java @@ -70,9 +70,15 @@ public void run() throws Exception { } }); + final long start = System.nanoTime(); + long totalRows = 0; + int tableCount = 0; for (final TableMigration t : TableRegistry.extracted()) { - extractOne(t, extractor); + totalRows += extractOne(t, extractor); + tableCount++; } + final long ms = (System.nanoTime() - start) / 1_000_000; + LOGGER.info("Extract phase completed: {} table(s), {} row(s) in {} ms", tableCount, totalRows, ms); } /** @@ -110,7 +116,7 @@ private void dropTablesMatching(final org.jdbi.v3.core.Handle h, final String pa } } - private void extractOne(final TableMigration t, final SourceExtractor extractor) throws Exception { + private long extractOne(final TableMigration t, final SourceExtractor extractor) throws Exception { LOGGER.info("Extracting {}", t.name()); final long start = System.nanoTime(); markState(t.name(), "IN_PROGRESS", 0); @@ -119,6 +125,7 @@ private void extractOne(final TableMigration t, final SourceExtractor extractor) markState(t.name(), "COMPLETED", rows); final long ms = (System.nanoTime() - start) / 1_000_000; LOGGER.info(" -> {} rows in {} ms", rows, ms); + return rows; } catch (final Exception e) { markState(t.name(), "FAILED", 0); throw e; diff --git a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/load/LoadPhase.java b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/load/LoadPhase.java index 56ed775824..d6879ec0ac 100644 --- a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/load/LoadPhase.java +++ b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/load/LoadPhase.java @@ -71,10 +71,14 @@ public void run() { final StagingSchema staging = new StagingSchema(target, options.stagingSchema); staging.ensure(); + final long start = System.nanoTime(); + long totalRows = 0; + int tableCount = 0; preLoad(); try { for (final TableMigration t : TableRegistry.loaded()) { - loadOne(t); + totalRows += loadOne(t); + tableCount++; } } finally { postLoad(); @@ -84,6 +88,9 @@ public void run() { LOGGER.info("Dropping staging schema (--drop-staging set)."); staging.drop(); } + + final long ms = (System.nanoTime() - start) / 1_000_000; + LOGGER.info("Load phase completed: {} table(s), {} row(s) in {} ms", tableCount, totalRows, ms); } private void preLoad() { @@ -106,6 +113,7 @@ private void preLoad() { } private void postLoad() { + LOGGER.info("Finalizing load: re-enabling triggers and resetting identity sequences"); target.useHandle(h -> { h.execute("ALTER TABLE \"PROJECT\" ENABLE TRIGGER USER"); h.execute("ALTER TABLE \"PROJECT_ACCESS_USERS\" ENABLE TRIGGER USER"); @@ -123,13 +131,17 @@ SELECT setval(pg_get_serial_sequence('"%1$s"', 'ID'), } }); // ANALYZE every loaded table for fresh planner statistics. + final List loaded = TableRegistry.loaded(); + LOGGER.info("Analyzing {} loaded table(s)", loaded.size()); target.useHandle(h -> { - for (final TableMigration t : TableRegistry.loaded()) { + for (final TableMigration t : loaded) { h.execute("ANALYZE \"%s\"".formatted(t.name())); } }); // Refresh PORTFOLIOMETRICS_GLOBAL after PROJECTMETRICS is in place. + LOGGER.info("Refreshing PORTFOLIOMETRICS_GLOBAL materialized view"); target.useHandle(h -> h.execute("REFRESH MATERIALIZED VIEW \"PORTFOLIOMETRICS_GLOBAL\"")); + LOGGER.info("Applying v5.7.0 cleanup deletes"); replayV570CleanupDeletes(); } @@ -275,7 +287,7 @@ private void replayV570CleanupDeletes() { }); } - private void loadOne(final TableMigration t) { + private long loadOne(final TableMigration t) { LOGGER.info("Loading {} into v5", t.name()); markState(t.name(), "IN_PROGRESS", 0); final long expected = expectedRows(t.name()); @@ -285,6 +297,7 @@ private void loadOne(final TableMigration t) { final int inserted = target.inTransaction(h -> h.execute(t.loadSql().formatted(options.stagingSchema))); markState(t.name(), "COMPLETED", inserted); reporter.done(inserted); + return inserted; } catch (final RuntimeException e) { reporter.fail(); markState(t.name(), "FAILED", 0); diff --git a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/transform/TransformPhase.java b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/transform/TransformPhase.java index 7da9ceba6b..ad85080a2d 100644 --- a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/transform/TransformPhase.java +++ b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/transform/TransformPhase.java @@ -59,12 +59,18 @@ public void run() { h.execute("ANALYZE \"%s\".src_%s".formatted(options.stagingSchema, t.name())); } }); + final long start = System.nanoTime(); + long totalRows = 0; + int tableCount = 0; for (final TableMigration t : TableRegistry.transformed()) { - transformOne(t); + totalRows += transformOne(t); + tableCount++; } + final long ms = (System.nanoTime() - start) / 1_000_000; + LOGGER.info("Transform phase completed: {} table(s), {} row(s) in {} ms", tableCount, totalRows, ms); } - private void transformOne(final TableMigration t) { + private long transformOne(final TableMigration t) { LOGGER.info("Transforming {}", t.name()); final long start = System.nanoTime(); markState(t.name(), "IN_PROGRESS"); @@ -86,6 +92,7 @@ private void transformOne(final TableMigration t) { } else { LOGGER.info(" -> done in {} ms", ms); } + return rows; } catch (final RuntimeException e) { markState(t.name(), "FAILED"); throw e; diff --git a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/RowCountNotes.java b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/RowCountNotes.java new file mode 100644 index 0000000000..a7109460f2 --- /dev/null +++ b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/RowCountNotes.java @@ -0,0 +1,63 @@ +/* + * This file is part of Dependency-Track. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) OWASP Foundation. All Rights Reserved. + */ +package org.dependencytrack.v4migrator.verify; + +import java.util.Map; +import java.util.Optional; + +/** + * Operator-facing explanations for tables whose v5 row count legitimately differs from the v4 + * source. These are the documented lossy transforms (dedup, filtering, retention) applied during + * the transform phase; the reasons mirror the per-table Javadoc in + * {@code org.dependencytrack.v4migrator.TableRegistry}. + * + *

Kept here (rather than as a field on {@code TableMigration}) so the explanations live next to + * the verify reporting that surfaces them, without threading text through every registry record. + * The {@code [Probes]} section already itemizes UUID/user/case-collision drops, so those are not + * duplicated here. + */ +final class RowCountNotes { + + private static final Map NOTES = Map.ofEntries( + Map.entry("TEAM", "dedup by NAME"), + Map.entry("TAG", "dedup by NAME"), + Map.entry("OIDCGROUP", "dedup by NAME"), + Map.entry("PROJECT", "dedup by (NAME, VERSION); invalid-UUID rows dropped"), + Map.entry("PROJECT_METADATA", "one row per PROJECT_ID (latest by ID)"), + Map.entry("DEPENDENCYMETRICS", "latest snapshot per key; retention cutoff applied"), + Map.entry("PROJECTMETRICS", "latest snapshot per key; retention cutoff applied"), + Map.entry("FINDINGATTRIBUTION", "one attribution per (component, vulnerability, analyzer)"), + Map.entry("VULNERABLESOFTWARE", "dropped rows without vulnerability refs or with invalid UUID"), + Map.entry("USER", "consolidated from MANAGED/LDAP/OIDC users; invalid rows skipped"), + // Permission join tables: v4-only permissions are dropped during the rename remap, while + // PORTFOLIO_ACCESS_CONTROL_BYPASS is fanned out for ACCESS_MANAGEMENT holders. The net delta + // is a deterministic function of the remap, not a loss indicator either way. + Map.entry("TEAMS_PERMISSIONS", "permissions remapped (v4-only dropped); BYPASS fan-out added; net delta expected"), + Map.entry("USERS_PERMISSIONS", "permissions remapped (v4-only dropped); BYPASS fan-out added; net delta expected"), + Map.entry("PROJECT_ACCESS_TEAMS", "dropped rows with NULL TEAM_ID; dedup on (PROJECT_ID, TEAM_ID)"), + Map.entry("PROJECT_ACCESS_USERS", "derived from PROJECT_ACCESS_TEAMS join USERS_TEAMS; dedup on (PROJECT_ID, USER_ID)") + ); + + private RowCountNotes() { + } + + static Optional reasonFor(final String table) { + return Optional.ofNullable(NOTES.get(table)); + } +} diff --git a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/VerifyPhase.java b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/VerifyPhase.java index 3b8b7793c3..7e7bf2fdeb 100644 --- a/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/VerifyPhase.java +++ b/support/v4-migrator/src/main/java/org/dependencytrack/v4migrator/verify/VerifyPhase.java @@ -27,9 +27,11 @@ import org.slf4j.LoggerFactory; import java.io.PrintStream; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; /** * Pipeline ยง6. Advisory post-load checks emitted as human-readable stdout. @@ -62,6 +64,8 @@ public void run() { reportProbes(); out.println(); checkConstraintsSmoke(); + out.println(); + out.println("== verify complete =="); } private void checkFlywayHead() { @@ -88,15 +92,71 @@ private void checkFlywayHead() { private void reportRowCounts() { out.println("[Row counts]"); - out.printf(" %-24s %12s %12s %12s%n", "Table", "Source", "Staging", "v5"); + out.printf(" %-24s %12s %12s %12s %s%n", "Table", "Source", "Staging", "v5", "Note"); + final Set probed = probedTables(); for (final TableMigration t : TableRegistry.all()) { final Long src = t.hasExtract() ? countOptional(qualified("src_" + t.name())) : null; final Long tgt = t.hasTransform() ? countOptional(qualified("tgt_" + t.name())) : null; final Long v5 = t.hasLoad() ? countOptional("\"" + t.name() + "\"") : null; - out.printf(" %-24s %12s %12s %12s%n", + out.printf(" %-24s %12s %12s %12s %s%n", t.name(), - fmt(src), fmt(tgt), fmt(v5)); + fmt(src), fmt(tgt), fmt(v5), note(t.name(), src, tgt, v5, probed)); + } + } + + /** + * Explains a row-count reduction across the populated stages (source -> staging -> v5). + * Requires at least two non-null stages to compute a delta, so a freshly bootstrapped target + * (no staging schema, only seeded v5 rows) emits nothing. + * + *

Reductions are expected, not necessarily a sign of a problem: the load step copies the + * {@code tgt_*} staging tables verbatim, so reductions originate in the transform (deduplication, + * filtering, retention) and are intentional by design. (The sole exception is the derived + * {@code PROJECT_ACCESS_USERS} load, which uses {@code ON CONFLICT DO NOTHING} against an + * already-deduplicated staging table.) The note's job is to attribute each reduction so an + * operator can tell an accounted-for drop from an unexplained one: documented transforms render + * as {@code expected: }; drops already itemized by the {@code [Probes]} section render as + * {@code see [Probes]}; anything else renders as a neutral {@code reduction (-N), see migration + * guide} pointer. ASCII-only to stay automation-friendly. + */ + static String note(final String table, final Long src, final Long tgt, final Long v5, + final Set probed) { + final Long first = src != null ? src : tgt; + final Long last = v5 != null ? v5 : tgt; + // Need a baseline and a distinct later stage to speak of a reduction. + final int populated = (src != null ? 1 : 0) + (tgt != null ? 1 : 0) + (v5 != null ? 1 : 0); + if (populated < 2 || first == null || last == null || last >= first) { + return ""; + } + final long delta = first - last; + final Optional reason = RowCountNotes.reasonFor(table); + if (reason.isPresent()) { + return "expected: " + reason.get() + " (-" + delta + ")"; + } + if (probed.contains(table)) { + return "see [Probes] (-" + delta + ")"; + } + return "reduction (-" + delta + "), see migration guide"; + } + + /** + * Set of table names that appear in any probe (invalid UUIDs, skipped users, case collisions), + * i.e. tables whose row-count drop is already explained in the {@code [Probes]} section. Empty + * when the staging schema is absent (e.g. verify run straight after bootstrap). + */ + private Set probedTables() { + if (!stagingSchemaExists()) { + return Set.of(); } + return target.withHandle(h -> new HashSet<>(h.createQuery(""" + SELECT table_name FROM "%1$s".probe_invalid_uuids + UNION + SELECT table_name FROM "%1$s".probe_skipped_users + UNION + SELECT table_name FROM "%1$s".probe_case_collisions + """.formatted(options.stagingSchema)) + .mapTo(String.class) + .list())); } private void reportProbes() { diff --git a/support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/VerifyIT.java b/support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/VerifyIT.java index ddf39fce81..f1d7a19c81 100644 --- a/support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/VerifyIT.java +++ b/support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/VerifyIT.java @@ -105,13 +105,49 @@ void verifyReportsCountsAndProbes() throws Exception { // Schema head OK. assertThat(output).contains("OK Flyway head = " + Preflight.EXPECTED_FLYWAY_HEAD); - // Row counts table is present. + // Row counts table is present, including the Note column. assertThat(output).contains("[Row counts]"); + assertThat(output).containsPattern("Table\\s+Source\\s+Staging\\s+v5\\s+Note"); assertThat(output).contains("LICENSE"); assertThat(output).contains("TEAM"); + // The two same-named teams dedup, and that reduction is explained as expected. + assertThat(output).containsPattern("TEAM\\s+.*expected: dedup by NAME"); + // The dropped malformed LICENSE UUID is attributed to the probes section. + assertThat(output).containsPattern("LICENSE\\s+.*see \\[Probes]"); + // Reductions are never rendered as alarmist data-loss warnings. + assertThat(output).doesNotContain("WARN"); // Probe section reports the malformed UUID for LICENSE. assertThat(output).containsPattern("LICENSE\\s+\\d+ malformed UUID\\(s\\) dropped"); // Constraints section emits a non-zero CHECK count. assertThat(output).matches("(?s).*\\[Constraints].*[1-9][0-9]* CHECK constraint.*"); + // Explicit terminator so operators/automation can detect completion. + assertThat(output).contains("== verify complete =="); + } + + /** + * The migration guide recommends running verify directly after bootstrap, when no staging schema + * exists and only the PERMISSION catalog is seeded. That workflow must not surface spurious + * discrepancy warnings. A freshly started target container mirrors that post-bootstrap state. + */ + @Test + void verifyAfterBootstrapEmitsNoDiscrepancyWarnings() { + try (final V5TargetContainer freshTarget = new V5TargetContainer().start()) { + final GlobalOptions global = new GlobalOptions(); + global.targetUrl = freshTarget.jdbcUrl(); + global.targetUser = freshTarget.username(); + global.targetPass = freshTarget.password(); + global.stagingSchema = "dt_v4_migration"; + global.logLevel = "INFO"; + + final ByteArrayOutputStream buf = new ByteArrayOutputStream(); + try (PrintStream ps = new PrintStream(buf)) { + new VerifyPhase(global, freshTarget.jdbi(), ps).run(); + } + final String output = buf.toString(); + + assertThat(output).contains("[Row counts]"); + assertThat(output).doesNotContain("WARN"); + assertThat(output).contains("== verify complete =="); + } } } diff --git a/support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/verify/VerifyNoteTest.java b/support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/verify/VerifyNoteTest.java new file mode 100644 index 0000000000..9ac456995d --- /dev/null +++ b/support/v4-migrator/src/test/java/org/dependencytrack/v4migrator/verify/VerifyNoteTest.java @@ -0,0 +1,97 @@ +/* + * This file is part of Dependency-Track. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) OWASP Foundation. All Rights Reserved. + */ +package org.dependencytrack.v4migrator.verify; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit coverage for {@link VerifyPhase#note}, the per-table row-count discrepancy classifier. + */ +class VerifyNoteTest { + + private static final Set NO_PROBES = Set.of(); + + @Test + void noReductionYieldsBlankNote() { + assertThat(VerifyPhase.note("LICENSE", 100L, 100L, 100L, NO_PROBES)).isEmpty(); + } + + @Test + void increaseYieldsBlankNote() { + // Fan-out can make v5 exceed source; that is not a reduction. + assertThat(VerifyPhase.note("TEAMS_PERMISSIONS", 5L, 8L, 8L, NO_PROBES)).isEmpty(); + } + + @Test + void singlePopulatedStageNeverWarns() { + // Post-bootstrap shape: only the seeded v5 count is known. + assertThat(VerifyPhase.note("PERMISSION", null, null, 120L, NO_PROBES)).isEmpty(); + } + + @Test + void documentedReductionRendersExpected() { + final String note = VerifyPhase.note("TEAM", 50L, 40L, 40L, NO_PROBES); + assertThat(note).startsWith("expected: dedup by NAME").endsWith("(-10)"); + } + + @Test + void documentedReductionTakesPrecedenceOverProbe() { + // PROJECT is both deduped and probed for invalid UUIDs; the specific note wins. + final String note = VerifyPhase.note("PROJECT", 30L, 25L, 25L, Set.of("PROJECT")); + assertThat(note).startsWith("expected: "); + } + + @Test + void probedReductionPointsToProbesSection() { + final String note = VerifyPhase.note("SOMETABLE", 100L, 90L, 90L, Set.of("SOMETABLE")); + assertThat(note).isEqualTo("see [Probes] (-10)"); + } + + @Test + void undocumentedReductionShowsNeutralPointer() { + // Reductions are intentional transforms, not data loss, so the fallback is a neutral + // pointer rather than an alarm. + final String note = VerifyPhase.note("SOMETABLE", 100L, 100L, 95L, NO_PROBES); + assertThat(note).isEqualTo("reduction (-5), see migration guide"); + } + + @Test + void permissionFanOutTableRendersExpectedOnNetReduction() { + // When dropped v4-only permissions outnumber the fan-out, the documented note applies. + final String note = VerifyPhase.note("TEAMS_PERMISSIONS", 20L, 17L, 17L, NO_PROBES); + assertThat(note).startsWith("expected: "); + } + + @Test + void derivedTableReductionComparesStagingToV5() { + // Derived table: no source. ON CONFLICT can drop rows between staging and v5. + final String note = VerifyPhase.note("PROJECT_ACCESS_USERS", null, 10L, 8L, NO_PROBES); + assertThat(note).startsWith("expected: ").endsWith("(-2)"); + } + + @Test + void outputIsAsciiOnly() { + final String note = VerifyPhase.note("SOMETABLE", 100L, 100L, 95L, NO_PROBES); + assertThat(note.chars().allMatch(c -> c < 128)).isTrue(); + } +}