From d41eb85077da7369d1a71a855ed08748862019b7 Mon Sep 17 00:00:00 2001 From: Tiago Correia Date: Mon, 4 May 2026 09:09:00 +0100 Subject: [PATCH 1/2] fix: drop head_object precheck in uploads migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ECS task role is scoped to s3:PutObject only. HeadObject without GetObject returns 403 (S3 hides existence from unauthorized callers), which the existing handler doesn't translate to "doesn't exist", so the script crashed on container start and ECS never reached steady state. Drop the precheck — PutObject is idempotent for the same body, so re-runs just overwrite with the same bytes. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/migrate_uploads_to_s3.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/scripts/migrate_uploads_to_s3.py b/scripts/migrate_uploads_to_s3.py index 5cb2013..f8c0d3b 100644 --- a/scripts/migrate_uploads_to_s3.py +++ b/scripts/migrate_uploads_to_s3.py @@ -1,8 +1,8 @@ """One-shot: copy app/static/uploads/* to S3 and prefix existing DB rows. Runs once per environment as part of the disk -> S3 cutover. Idempotent — -re-running uploads only objects S3 doesn't already have, and updates only -DB rows that don't yet carry the ``books/`` prefix. +PutObject silently overwrites with the same content, and the DB phase +only updates rows that don't yet carry the ``books/`` prefix. Usage:: @@ -22,7 +22,6 @@ from pathlib import Path import boto3 -from botocore.exceptions import ClientError REPO_ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(REPO_ROOT)) @@ -44,16 +43,6 @@ def _s3_client(app): ) -def _exists(s3, bucket: str, key: str) -> bool: - try: - s3.head_object(Bucket=bucket, Key=key) - return True - except ClientError as e: - if e.response["Error"]["Code"] in ("404", "NoSuchKey", "NotFound"): - return False - raise - - def upload_files(app) -> int: if not UPLOADS_DIR.is_dir(): print(f"No uploads directory at {UPLOADS_DIR} — nothing to copy.") @@ -63,13 +52,14 @@ def upload_files(app) -> int: s3 = _s3_client(app) uploaded = 0 + # No head_object precheck: the ECS task role is scoped to s3:PutObject + # only, and HeadObject without GetObject returns 403 (not 404). PUT is + # idempotent for the same body, so re-running just overwrites with the + # same bytes. for path in sorted(UPLOADS_DIR.iterdir()): if not path.is_file(): continue key = f"{UPLOAD_PREFIX}{path.name}" - if _exists(s3, bucket, key): - print(f" skip (exists): {key}") - continue content_type, _ = mimetypes.guess_type(path.name) if DRY_RUN: print(f" would upload: {path} -> s3://{bucket}/{key} ({content_type})") From 8e92ffeac09c9e5088f32ed0fd2bcfa74aef8b93 Mon Sep 17 00:00:00 2001 From: Tiago Correia Date: Mon, 4 May 2026 09:22:06 +0100 Subject: [PATCH 2/2] docs: clarify migration runs on every boot, not once per env The "Runs once per environment" wording was true when the script was intended to run manually, but with entrypoint.sh invoking it on every container boot it just misleads. Note that PutObject re-runs are cheap in-region overwrites until the file is removed in the cleanup PR. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/migrate_uploads_to_s3.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/migrate_uploads_to_s3.py b/scripts/migrate_uploads_to_s3.py index f8c0d3b..5e1a52c 100644 --- a/scripts/migrate_uploads_to_s3.py +++ b/scripts/migrate_uploads_to_s3.py @@ -1,8 +1,10 @@ -"""One-shot: copy app/static/uploads/* to S3 and prefix existing DB rows. +"""Copy app/static/uploads/* to S3 and prefix existing DB rows. -Runs once per environment as part of the disk -> S3 cutover. Idempotent — -PutObject silently overwrites with the same content, and the DB phase -only updates rows that don't yet carry the ``books/`` prefix. +Invoked from entrypoint.sh on every container boot during the disk -> S3 +cutover. Idempotent and safe to re-run: PutObject overwrites with the +same content (so post-cutover boots redo cheap in-region PUTs until +this file is removed), and the DB phase only updates rows that don't +yet carry the ``books/`` prefix. Usage::