Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 10 additions & 18 deletions scripts/migrate_uploads_to_s3.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""One-shot: copy app/static/uploads/* to S3 and prefix existing DB rows.
"""Copy app/static/uploads/* to S3 and prefix existing DB rows.

Runs once per environment as part of the disk -> S3 cutover. Idempotent —
re-running uploads only objects S3 doesn't already have, and updates only
DB rows that don't yet carry the ``books/`` prefix.
Invoked from entrypoint.sh on every container boot during the disk -> S3
cutover. Idempotent and safe to re-run: PutObject overwrites with the
same content (so post-cutover boots redo cheap in-region PUTs until
this file is removed), and the DB phase only updates rows that don't
yet carry the ``books/`` prefix.

Usage::

Expand All @@ -22,7 +24,6 @@
from pathlib import Path

import boto3
from botocore.exceptions import ClientError

REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO_ROOT))
Expand All @@ -44,16 +45,6 @@ def _s3_client(app):
)


def _exists(s3, bucket: str, key: str) -> bool:
try:
s3.head_object(Bucket=bucket, Key=key)
return True
except ClientError as e:
if e.response["Error"]["Code"] in ("404", "NoSuchKey", "NotFound"):
return False
raise


def upload_files(app) -> int:
if not UPLOADS_DIR.is_dir():
print(f"No uploads directory at {UPLOADS_DIR} — nothing to copy.")
Expand All @@ -63,13 +54,14 @@ def upload_files(app) -> int:
s3 = _s3_client(app)
uploaded = 0

# No head_object precheck: the ECS task role is scoped to s3:PutObject
# only, and HeadObject without GetObject returns 403 (not 404). PUT is
# idempotent for the same body, so re-running just overwrites with the
# same bytes.
for path in sorted(UPLOADS_DIR.iterdir()):
if not path.is_file():
continue
key = f"{UPLOAD_PREFIX}{path.name}"
if _exists(s3, bucket, key):
print(f" skip (exists): {key}")
continue
content_type, _ = mimetypes.guess_type(path.name)
if DRY_RUN:
print(f" would upload: {path} -> s3://{bucket}/{key} ({content_type})")
Expand Down