From 304736d3b11fdb52c28bbf3f3e1e89477df557f6 Mon Sep 17 00:00:00 2001 From: VirusAlex Date: Fri, 1 May 2026 00:41:34 +0300 Subject: [PATCH] fix(v0.4.0): state schemaVersion + Docker hardening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes audit findings HIGH H5/H6/H7/H8 + medium Dockerfile SHA-pin and gosu binary verification. Fourth of ~5 PRs gating v0.4.0. State schema versioning (H8) - JobState gains an int schemaVersion component (last position to keep the diff small at the 9 construction sites). CURRENT_SCHEMA_VERSION = 1. Existing v0.3.x JSON files have no field — Jackson defaults to 0, treated as schema 1 on read. - SidecarMeta gains the same. The MetaJson DTO it serializes through defaults old files to schema 1 on toMeta() and refuses any future schemaVersion > CURRENT_SCHEMA_VERSION (rather than silently misinterpreting newer-format files). Same on JsonJobStore.load / loadFile — refused entries are skipped, never destroyed. - Pre-v0.4.0 the on-disk JSON formats had no schemaVersion field, so a forward-incompatible change later would have been impossible to detect. Now: bumping the constant in either record signals "this version refuses to read older NetCopy's downgrade attempts" — a precondition for the v1.0.0 compatibility story. Docker HEALTHCHECK (H5) - HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 CMD curl -fsS http://127.0.0.1:7777/api/health - UBI 10 minimal doesn't ship curl; we install curl-minimal (~6 MiB layer) before the COPY of the gosu binary. Compose `depends_on: condition: service_healthy` and similar orchestrator primitives now work. Multi-arch image (H7) - release.yml: docker/build-push-action `platforms: linux/amd64,linux/arm64`. The Dockerfile already SHA-pins the per-arch gosu binary, so the cross-build is straightforward. ARM users (RPi 4+, Apple Silicon via Docker Desktop, AWS Graviton) finally get a working pull. `:latest` only for non-0.x (H6) - release.yml docker_tags: while VERSION matches `^0.`, push only ghcr.io/.../netcopy: — NOT :latest. Matches the README claim ("`latest` — Highest tagged stable release once 1.x ships") and the `make_latest` flag on the GitHub Release. Pre-v0.4.0 every 0.x tag silently overwrote :latest, contradicting the docs. Dockerfile base-image SHA pin - alpine:3.20 → @sha256:d9e853e87e... (digest-pinned) - eclipse-temurin:25-jre-ubi10-minimal → @sha256:c897ce903faf... - Decouples our build from any upstream re-tag of those names. Bump deliberately on each NetCopy release. gosu SHA-256 verification - per-arch SHA from the upstream SHA256SUMS file is now baked into the Dockerfile and verified after curl. Closes the "compromised GitHub release / MITM" supply-chain hole noted in the audit. Local mvn test: schema-related suites (JsonJobStoreTest, FileSidecarStoreTest, ChunkBitmapTest, ArchitectureTest) all green individually. The 56 errors in the full mvn test run are the existing Windows-env Jetty-loopback issue, unchanged from main. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/release.yml | 17 +++++++++- Dockerfile | 33 ++++++++++++++++--- .../dev/netcopy/state/FileSidecarStore.java | 18 ++++++++-- src/main/java/dev/netcopy/state/JobState.java | 18 +++++++++- .../java/dev/netcopy/state/JsonJobStore.java | 27 +++++++++++++-- .../java/dev/netcopy/state/SidecarMeta.java | 16 +++++++-- .../java/dev/netcopy/transfer/Puller.java | 6 ++-- .../dev/netcopy/transfer/ResumeManager.java | 3 +- .../dev/netcopy/transfer/TransferEngine.java | 6 ++-- .../dev/netcopy/state/JsonJobStoreTest.java | 9 +++-- 10 files changed, 132 insertions(+), 21 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7cb342d..67440b8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -46,7 +46,16 @@ jobs: KIND="tag" REL_TAG="${TAG}" REL_NAME="NetCopy ${VERSION}" - DOCKER_TAGS="ghcr.io/${REPO_LC}:${VERSION},ghcr.io/${REPO_LC}:latest" + # `:latest` is reserved for stable 1.x+ releases. While we ship 0.x, + # `latest` would otherwise track every pre-1.0 tag — confusing for users + # who pull `:latest` expecting a stable channel. Match the README claim + # ("`latest` — Highest tagged stable release once 1.x ships") AND the + # GitHub Release `make_latest` flag at the bottom of this file (same gate). + if [[ "${VERSION}" =~ ^0\. ]]; then + DOCKER_TAGS="ghcr.io/${REPO_LC}:${VERSION}" + else + DOCKER_TAGS="ghcr.io/${REPO_LC}:${VERSION},ghcr.io/${REPO_LC}:latest" + fi else VERSION="0.0.0-main-${GITHUB_SHA:0:7}" KIND="snapshot" @@ -98,6 +107,12 @@ jobs: context: . file: Dockerfile push: true + # Multi-arch (added v0.4.0): NetCopy is pure-Java so the JVM does + # the heavy lifting; the only arch-specific bit is the gosu binary, + # which the Dockerfile already selects per-arch with a SHA-pinned + # download. ARM users (RPi 4+, Apple Silicon via Docker Desktop, + # AWS Graviton) get a working image instead of "exec format error". + platforms: linux/amd64,linux/arm64 tags: ${{ steps.ver.outputs.docker_tags }} build-args: | JAR_FILE=release/netcopy-${{ steps.ver.outputs.version }}.jar diff --git a/Dockerfile b/Dockerfile index 754c64e..ccfc44d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,22 +14,33 @@ ARG VERSION=dev # UBI 10 minimal doesn't ship curl/wget by default and getting gosu from # microdnf is awkward; alpine + apk add curl is the smallest path. We just # COPY --from this layer; the alpine layer doesn't ship in the final image. -FROM alpine:3.20 AS gosu-stage +# +# Base image SHA-pinned: locking the digest decouples our build from any +# upstream re-tag of `alpine:3.20`. Bump deliberately on each NetCopy +# release; the version comment after `@sha256:...` is what gets searched. +FROM alpine:3.20@sha256:d9e853e87e55526f6b2917df91a2115c36dd7c696a35be12163d44e6e2a4b6bc AS gosu-stage ARG GOSU_VERSION=1.17 +# Per-arch SHA-256 of the gosu binary, pulled from +# https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/SHA256SUMS . +# Pinning the binary itself (not just the version tag) closes the +# "compromised GitHub release / MITM" supply-chain hole. +ARG GOSU_SHA256_AMD64=bbc4136d03ab138b1ad66fa4fc051bafc6cc7ffae632b069a53657279a450de3 +ARG GOSU_SHA256_ARM64=c3805a85d17f4454c23d7059bcb97e1ec1af272b90126e79ed002342de08389b RUN set -eux; \ apk add --no-cache curl; \ case "$(uname -m)" in \ - x86_64) gosuArch=amd64 ;; \ - aarch64) gosuArch=arm64 ;; \ + x86_64) gosuArch=amd64; gosuSha="${GOSU_SHA256_AMD64}" ;; \ + aarch64) gosuArch=arm64; gosuSha="${GOSU_SHA256_ARM64}" ;; \ *) echo "unsupported arch $(uname -m)"; exit 1 ;; \ esac; \ curl -fsSL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-${gosuArch}" \ -o /tmp/gosu; \ + echo "${gosuSha} /tmp/gosu" | sha256sum -c -; \ chmod +x /tmp/gosu; \ /tmp/gosu --version # ---- Stage 2: runtime -------------------------------------------------- -FROM eclipse-temurin:25-jre-ubi10-minimal AS runtime +FROM eclipse-temurin:25-jre-ubi10-minimal@sha256:c897ce903faf6736e4b5cbb2dd5e05e6b74909d71105f3cfe33840b3ce7b8b21 AS runtime # OCI labels filled in further by the workflow's --label flags. LABEL org.opencontainers.image.title="NetCopy" @@ -37,6 +48,13 @@ LABEL org.opencontainers.image.description="Fast multi-stream file transfer betw LABEL org.opencontainers.image.source="https://github.com/VirusAlex/NetCopy" LABEL org.opencontainers.image.licenses="Apache-2.0" +# curl is needed for the HEALTHCHECK (UBI 10 minimal doesn't ship it). Cheap +# install — adds ~6 MiB to the final image. Combined into the gosu COPY block +# below so we keep the layer count down. +RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs curl-minimal \ + && microdnf clean all \ + && rm -rf /var/cache/yum + # gosu lets the entrypoint chown the state dir as root and then drop privileges # to PUID:PGID before running java. See entrypoint.sh for the rationale. COPY --from=gosu-stage /tmp/gosu /usr/local/bin/gosu @@ -75,6 +93,13 @@ WORKDIR /var/lib/netcopy # 7777 control plane (HTTP + WS), 7778 binary TCP data plane. EXPOSE 7777 7778 +# Healthcheck against the unauthenticated /api/health endpoint. Defaults are +# conservative — JVM cold-start on a slow VM can take ~10 s, so allow 30 s +# start-period before the first failed probe counts. After that, probe every +# 30 s; mark unhealthy after 3 consecutive failures. +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -fsS http://127.0.0.1:7777/api/health > /dev/null || exit 1 + # Conventional volume mount points; override with -v on `docker run`. # -v /host/share:/share:ro -> mounted as a --shared-root # -v /host/incoming:/incoming -> mounted as a --receive-root diff --git a/src/main/java/dev/netcopy/state/FileSidecarStore.java b/src/main/java/dev/netcopy/state/FileSidecarStore.java index 93f15aa..5cf85f7 100644 --- a/src/main/java/dev/netcopy/state/FileSidecarStore.java +++ b/src/main/java/dev/netcopy/state/FileSidecarStore.java @@ -362,7 +362,8 @@ private Sidecar createFresh(Path target, Path dir, long size, List plan, source.mtime(), plan.size(), List.copyOf(plan), - source.expectedHash() == null ? null : source.expectedHash().clone() + source.expectedHash() == null ? null : source.expectedHash().clone(), + SidecarMeta.CURRENT_SCHEMA_VERSION ); writeMetaFresh(metaPath, meta); @@ -506,6 +507,9 @@ private static final class MetaJson { public int chunkCount; public List plan; public String expectedHashHex; + /** v0.4.0+ field. Absent in legacy files (Jackson defaults to 0); we treat 0 as + * schema 1 in toMeta() because pre-schema files used the layout that became schema 1. */ + public int schemaVersion; static MetaJson from(SidecarMeta m) { MetaJson j = new MetaJson(); @@ -517,6 +521,9 @@ static MetaJson from(SidecarMeta m) { j.expectedHashHex = m.expectedHash() == null ? null : HexFormat.of().formatHex(m.expectedHash()); + j.schemaVersion = m.schemaVersion() == 0 + ? SidecarMeta.CURRENT_SCHEMA_VERSION + : m.schemaVersion(); return j; } @@ -527,7 +534,14 @@ SidecarMeta toMeta() { byte[] hash = expectedHashHex == null ? null : HexFormat.of().parseHex(expectedHashHex); - return new SidecarMeta(relPath, size, sourceMtime, chunkCount, chunks, hash); + int sv = schemaVersion == 0 ? 1 : schemaVersion; + if (sv > SidecarMeta.CURRENT_SCHEMA_VERSION) { + throw new IllegalStateException( + "sidecar meta.json schemaVersion " + sv + " > supported " + + SidecarMeta.CURRENT_SCHEMA_VERSION + + " (file written by a newer NetCopy release; refuse to interpret)"); + } + return new SidecarMeta(relPath, size, sourceMtime, chunkCount, chunks, hash, sv); } } diff --git a/src/main/java/dev/netcopy/state/JobState.java b/src/main/java/dev/netcopy/state/JobState.java index dbe78fe..7c27cae 100644 --- a/src/main/java/dev/netcopy/state/JobState.java +++ b/src/main/java/dev/netcopy/state/JobState.java @@ -16,6 +16,14 @@ *

{@code direction == OUTBOUND} jobs are not persisted by the puller side. * The field is kept in the schema for symmetry with future server-side bookkeeping * and for compatibility with the frozen contract in {@code interfaces.md}. + * + *

Schema versioning

+ *

The {@code schemaVersion} field declares the on-disk format generation. It is + * always {@link #CURRENT_SCHEMA_VERSION} for newly-written files. A reader + * confronted with a JSON file whose schemaVersion is HIGHER than the reader's + * supported version is required to refuse it (the file was written by a future + * release that may have changed semantics in incompatible ways). A reader seeing + * schemaVersion 0 (the field absent — legacy v0.3.x files) treats it as schema 1. */ public record JobState( String id, @@ -34,8 +42,16 @@ public record JobState( long updatedAt, Long completedAt, Long totalDurationMs, - Long avgThroughputBps + Long avgThroughputBps, + /** On-disk schema generation; see class javadoc. {@link #CURRENT_SCHEMA_VERSION} + * for new writes, 0 if the JSON file came from a pre-schema (≤ v0.3.x) build — + * treated as schema 1 on read. */ + int schemaVersion ) { + /** Bump together with any breaking change to the on-disk JSON shape. Readers MUST + * reject files whose {@code schemaVersion > CURRENT_SCHEMA_VERSION}. Bumps in this + * file are accompanied by a migration note in CHANGELOG.md. */ + public static final int CURRENT_SCHEMA_VERSION = 1; // ---- Enum wire format ------------------------------------------------------- // // Each enum below uses @JsonValue + @JsonCreator to serialise/deserialise as diff --git a/src/main/java/dev/netcopy/state/JsonJobStore.java b/src/main/java/dev/netcopy/state/JsonJobStore.java index 3316842..d3aa86c 100644 --- a/src/main/java/dev/netcopy/state/JsonJobStore.java +++ b/src/main/java/dev/netcopy/state/JsonJobStore.java @@ -74,7 +74,8 @@ public String create(JobState job) { job.updatedAt(), job.completedAt(), job.totalDurationMs(), - job.avgThroughputBps() + job.avgThroughputBps(), + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); writeAtomic(withId); return id; @@ -89,7 +90,16 @@ public Optional load(String id) { } try { byte[] bytes = Files.readAllBytes(file); - return Optional.of(mapper.readValue(bytes, JobState.class)); + JobState job = mapper.readValue(bytes, JobState.class); + if (job.schemaVersion() > JobState.CURRENT_SCHEMA_VERSION) { + // See loadFile() for rationale. + org.slf4j.LoggerFactory.getLogger(JsonJobStore.class).warn( + "Refusing to load job {}: schemaVersion {} > supported {} " + + "(file written by a newer NetCopy release; downgrade is unsafe).", + id, job.schemaVersion(), JobState.CURRENT_SCHEMA_VERSION); + return Optional.empty(); + } + return Optional.of(job); } catch (NoSuchFileException nsfe) { return Optional.empty(); } catch (IOException e) { @@ -140,7 +150,18 @@ public void delete(String id) { private Optional loadFile(Path file) { try { byte[] bytes = Files.readAllBytes(file); - return Optional.of(mapper.readValue(bytes, JobState.class)); + JobState job = mapper.readValue(bytes, JobState.class); + if (job.schemaVersion() > JobState.CURRENT_SCHEMA_VERSION) { + // Future-format file. Refuse to interpret — the schema may have changed + // semantics in incompatible ways (e.g. a field with a new meaning). + // Listing skips this entry; resume won't touch it; nothing is destroyed. + org.slf4j.LoggerFactory.getLogger(JsonJobStore.class).warn( + "Refusing to load job {}: schemaVersion {} > supported {} " + + "(file written by a newer NetCopy release; downgrade is unsafe).", + file.getFileName(), job.schemaVersion(), JobState.CURRENT_SCHEMA_VERSION); + return Optional.empty(); + } + return Optional.of(job); } catch (IOException e) { // Skip unreadable/corrupt entries rather than failing listAll(). return Optional.empty(); diff --git a/src/main/java/dev/netcopy/state/SidecarMeta.java b/src/main/java/dev/netcopy/state/SidecarMeta.java index 6fd9fa4..6dbbf97 100644 --- a/src/main/java/dev/netcopy/state/SidecarMeta.java +++ b/src/main/java/dev/netcopy/state/SidecarMeta.java @@ -7,12 +7,20 @@ * creation; on reopen the size + sourceMtime fields are compared against the producer-side * source identity to detect that the file has changed. * + *

The {@code schemaVersion} field declares the on-disk format generation. Readers MUST + * refuse a meta whose schemaVersion exceeds {@link #CURRENT_SCHEMA_VERSION}; the file came + * from a newer NetCopy release that may have changed the layout in incompatible ways. + * Pre-schema files (v0.3.x and earlier) carry no field — Jackson defaults the int component + * to {@code 0}, which is treated as schema 1. + * * @param relPath logical path of the source file inside its share root * @param size total size in bytes of the source file * @param sourceMtime producer-side mtime in seconds-since-epoch (matches manifest) * @param chunkCount number of chunks in the plan (must equal {@code plan.size()}) * @param plan ordered chunk plan; {@link SidecarStore.Chunk#idx} is dense in [0, chunkCount) * @param expectedHash full-file xxh3-128 hash if known at planning time, else {@code null} + * @param schemaVersion on-disk format generation; {@link #CURRENT_SCHEMA_VERSION} for new + * writes, 0 for legacy files (treated as schema 1 on read) */ public record SidecarMeta( String relPath, @@ -20,5 +28,9 @@ public record SidecarMeta( long sourceMtime, int chunkCount, List plan, - byte[] expectedHash -) {} + byte[] expectedHash, + int schemaVersion +) { + /** Bump together with any breaking change to the on-disk meta.json shape. */ + public static final int CURRENT_SCHEMA_VERSION = 1; +} diff --git a/src/main/java/dev/netcopy/transfer/Puller.java b/src/main/java/dev/netcopy/transfer/Puller.java index 44c039b..bb609ee 100644 --- a/src/main/java/dev/netcopy/transfer/Puller.java +++ b/src/main/java/dev/netcopy/transfer/Puller.java @@ -958,7 +958,8 @@ private JobState persistTerminal(JobState job, JobState.JobLifecycle target) { job.conflictPolicy(), job.fileParallelism(), job.chunksPerFile(), target, job.createdAt(), now, completedAt, - job.totalDurationMs(), job.avgThroughputBps() + job.totalDurationMs(), job.avgThroughputBps(), + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); try { jobStore.save(updated); @@ -976,7 +977,8 @@ private JobState withCompletion(JobState job, long durationMs, long avgBps) { job.conflictPolicy(), job.fileParallelism(), job.chunksPerFile(), JobState.JobLifecycle.COMPLETED, job.createdAt(), now, now, - durationMs, avgBps + durationMs, avgBps, + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); } diff --git a/src/main/java/dev/netcopy/transfer/ResumeManager.java b/src/main/java/dev/netcopy/transfer/ResumeManager.java index 3941316..97e8370 100644 --- a/src/main/java/dev/netcopy/transfer/ResumeManager.java +++ b/src/main/java/dev/netcopy/transfer/ResumeManager.java @@ -66,7 +66,8 @@ public int restore() { job.conflictPolicy(), job.fileParallelism(), job.chunksPerFile(), JobState.JobLifecycle.PAUSED, job.createdAt(), now, job.completedAt(), - job.totalDurationMs(), job.avgThroughputBps() + job.totalDurationMs(), job.avgThroughputBps(), + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); try { jobStore.save(paused); diff --git a/src/main/java/dev/netcopy/transfer/TransferEngine.java b/src/main/java/dev/netcopy/transfer/TransferEngine.java index 73cc59e..c11a681 100644 --- a/src/main/java/dev/netcopy/transfer/TransferEngine.java +++ b/src/main/java/dev/netcopy/transfer/TransferEngine.java @@ -153,7 +153,8 @@ public String start(NewTransferRequest req) { now, null, null, - null + null, + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); jobStore.create(job); log.info("engine.start: created job {} (protocol={}, peerUrl={}, tcpPort={})", @@ -591,7 +592,8 @@ private static JobState withState(JobState job, JobState.JobLifecycle state) { job.conflictPolicy(), job.fileParallelism(), job.chunksPerFile(), state, job.createdAt(), now, completedAt, - job.totalDurationMs(), job.avgThroughputBps() + job.totalDurationMs(), job.avgThroughputBps(), + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); } diff --git a/src/test/java/dev/netcopy/state/JsonJobStoreTest.java b/src/test/java/dev/netcopy/state/JsonJobStoreTest.java index 83b5647..d71b816 100644 --- a/src/test/java/dev/netcopy/state/JsonJobStoreTest.java +++ b/src/test/java/dev/netcopy/state/JsonJobStoreTest.java @@ -40,7 +40,8 @@ private static JobState samplePlanned(String id, long createdAt) { createdAt, null, null, - null + null, + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); } @@ -97,7 +98,8 @@ void roundTrip_createLoadSaveLoad(@TempDir Path stateDir) { 5_000L, 5_000L, 4_000L, - 123_456L + 123_456L, + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); store.save(mutated); @@ -216,7 +218,8 @@ void save_overwritesAtomicallyWithoutRaisingOnExistingFile(@TempDir Path stateDi current.fileParallelism(), current.chunksPerFile(), JobState.JobLifecycle.RUNNING, current.createdAt(), current.updatedAt() + 100, - current.completedAt(), current.totalDurationMs(), current.avgThroughputBps() + current.completedAt(), current.totalDurationMs(), current.avgThroughputBps(), + JobState.CURRENT_SCHEMA_VERSION // schemaVersion (v0.4.0+) ); store.save(next); }