Skip to content

Commit 3d9b45e

Browse files
authored
Merge pull request #9 from arabcoders/dev
feat: implement explicit upload completion
2 parents dc7b1d7 + 048676d commit 3d9b45e

13 files changed

Lines changed: 251 additions & 131 deletions

backend/app/routers/uploads.py

Lines changed: 81 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,66 @@ async def _get_upload_record(db: AsyncSession, upload_id: str) -> models.UploadR
9898
return record
9999

100100

101+
async def _finalize_upload(
102+
db: AsyncSession,
103+
record: models.UploadRecord,
104+
queue: ProcessingQueue | None,
105+
) -> models.UploadRecord:
106+
"""Validate and finalize an uploaded file after all bytes have been received."""
107+
if record.upload_length is None:
108+
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Upload length unknown")
109+
110+
if record.upload_offset < record.upload_length:
111+
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Upload not finished")
112+
113+
if record.status in {"completed", "postprocessing"}:
114+
return record
115+
116+
path = Path(record.storage_path)
117+
if not path.exists():
118+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Uploaded file not found")
119+
120+
try:
121+
actual_mimetype: str = detect_mimetype(path)
122+
except Exception as e:
123+
raise HTTPException(
124+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
125+
detail=f"Failed to detect file type: {e}",
126+
) from e
127+
128+
stmt: Select[tuple[models.UploadToken]] = select(models.UploadToken).where(models.UploadToken.id == record.token_id)
129+
res: Result[tuple[models.UploadToken]] = await db.execute(stmt)
130+
131+
if not (token := res.scalar_one_or_none()):
132+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Token not found")
133+
134+
if not mime_allowed(actual_mimetype, token.allowed_mime):
135+
path.unlink(missing_ok=True)
136+
await db.delete(record)
137+
await db.commit()
138+
raise HTTPException(
139+
status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
140+
detail=f"Actual file type '{actual_mimetype}' does not match allowed types",
141+
)
142+
143+
record.mimetype = actual_mimetype
144+
145+
if is_multimedia(actual_mimetype):
146+
record.status = "postprocessing"
147+
record.completed_at = None
148+
await db.commit()
149+
await db.refresh(record)
150+
if queue:
151+
await queue.enqueue(record.public_id)
152+
return record
153+
154+
record.status = "completed"
155+
record.completed_at = datetime.now(UTC)
156+
await db.commit()
157+
await db.refresh(record)
158+
return record
159+
160+
101161
@router.post("/initiate", response_model=schemas.InitiateUploadResponse, status_code=status.HTTP_201_CREATED, name="initiate_upload")
102162
async def initiate_upload(
103163
request: Request,
@@ -207,7 +267,6 @@ async def tus_patch(
207267
upload_id: str,
208268
request: Request,
209269
db: Annotated[AsyncSession, Depends(get_db)],
210-
queue: Annotated[ProcessingQueue | None, Depends(get_processing_queue)],
211270
upload_offset: Annotated[int, Header(convert_underscores=False, alias="Upload-Offset")] = ...,
212271
content_length: Annotated[int | None, Header()] = None,
213272
content_type: Annotated[str, Header(convert_underscores=False, alias="Content-Type")] = ...,
@@ -219,7 +278,6 @@ async def tus_patch(
219278
upload_id (str): The public ID of the upload.
220279
request (Request): The incoming HTTP request.
221280
db (AsyncSession): Database session.
222-
queue (ProcessingQueue | None): The processing queue for post-processing.
223281
upload_offset (int): The current upload offset from the client.
224282
content_length (int | None): The Content-Length header value.
225283
content_type (str): The Content-Type header value.
@@ -268,52 +326,14 @@ async def tus_patch(
268326
if record.upload_offset > record.upload_length:
269327
raise HTTPException(status_code=status.HTTP_413_CONTENT_TOO_LARGE, detail="Upload exceeds declared length")
270328

271-
if record.upload_offset == record.upload_length:
272-
try:
273-
actual_mimetype: str = detect_mimetype(path)
274-
except Exception as e:
275-
raise HTTPException(
276-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
277-
detail=f"Failed to detect file type: {e}",
278-
)
279-
280-
stmt: Select[tuple[models.UploadToken]] = select(models.UploadToken).where(models.UploadToken.id == record.token_id)
281-
res: Result[tuple[models.UploadToken]] = await db.execute(stmt)
282-
283-
if not (token := res.scalar_one_or_none()):
284-
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Token not found")
285-
286-
if not mime_allowed(actual_mimetype, token.allowed_mime):
287-
path.unlink(missing_ok=True)
288-
await db.delete(record)
289-
await db.commit()
290-
raise HTTPException(
291-
status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
292-
detail=f"Actual file type '{actual_mimetype}' does not match allowed types",
293-
)
294-
295-
record.mimetype = actual_mimetype
296-
297-
if is_multimedia(actual_mimetype):
298-
record.status = "postprocessing"
299-
await db.commit()
300-
await db.refresh(record)
301-
if queue:
302-
await queue.enqueue(record.public_id)
303-
else:
304-
record.status = "completed"
305-
record.completed_at = datetime.now(UTC)
306-
await db.commit()
307-
await db.refresh(record)
308-
else:
309-
record.status = "in_progress"
310-
311-
try:
312-
await db.commit()
313-
await db.refresh(record)
314-
except Exception:
315-
await db.rollback()
316-
await db.refresh(record)
329+
record.status = "in_progress"
330+
331+
try:
332+
await db.commit()
333+
await db.refresh(record)
334+
except Exception:
335+
await db.rollback()
336+
await db.refresh(record)
317337

318338
return Response(
319339
status_code=status.HTTP_204_NO_CONTENT,
@@ -366,26 +386,32 @@ async def tus_delete(upload_id: str, db: Annotated[AsyncSession, Depends(get_db)
366386

367387

368388
@router.post("/{upload_id}/complete", response_model=schemas.UploadRecordResponse, name="mark_complete")
369-
async def mark_complete(upload_id: str, db: Annotated[AsyncSession, Depends(get_db)]) -> models.UploadRecord:
389+
async def mark_complete(
390+
upload_id: str,
391+
db: Annotated[AsyncSession, Depends(get_db)],
392+
queue: Annotated[ProcessingQueue | None, Depends(get_processing_queue)],
393+
token: Annotated[str, Query(description="Upload token")] = ...,
394+
) -> models.UploadRecord:
370395
"""
371396
Mark an upload as complete.
372397
373398
Args:
374399
upload_id (str): The public ID of the upload.
375400
db (AsyncSession): Database session.
401+
queue (ProcessingQueue | None): The processing queue for post-processing.
402+
token (str): The upload token string.
376403
377404
Returns:
378405
UploadRecord: The updated upload record.
379406
380407
"""
381408
record: models.UploadRecord = await _get_upload_record(db, upload_id)
382-
await _ensure_token(db, token_id=record.token_id, check_remaining=False)
409+
token_row: models.UploadToken = await _ensure_token(db, token_value=token, check_remaining=False)
383410

384-
record.status = "completed"
385-
record.completed_at = datetime.now(UTC)
386-
await db.commit()
387-
await db.refresh(record)
388-
return record
411+
if record.token_id != token_row.id:
412+
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Upload does not belong to this token")
413+
414+
return await _finalize_upload(db, record, queue)
389415

390416

391417
@router.delete("/{upload_id}/cancel", response_model=dict, name="cancel_upload")

backend/tests/test_download_restrictions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ async def test_download_blocked_for_disabled_token(client):
2020

2121
upload_data = await initiate_upload(client, upload_token, "test.txt", 12)
2222
upload_id = upload_data["upload_id"]
23-
await upload_file_via_tus(client, upload_id, b"test content")
23+
await upload_file_via_tus(client, upload_id, b"test content", upload_token)
2424

2525
await client.patch(
2626
app.url_path_for("update_token", token_value=upload_token),
@@ -45,7 +45,7 @@ async def test_download_blocked_for_expired_token(client):
4545

4646
upload_data = await initiate_upload(client, upload_token, "test.txt", 12)
4747
upload_id = upload_data["upload_id"]
48-
await upload_file_via_tus(client, upload_id, b"test content")
48+
await upload_file_via_tus(client, upload_id, b"test content", upload_token)
4949

5050
expired_time = datetime.now(UTC) - timedelta(hours=1)
5151
await client.patch(
@@ -70,7 +70,7 @@ async def test_download_allowed_for_disabled_token_with_admin_key(client):
7070

7171
upload_data = await initiate_upload(client, upload_token, "test.txt", 12)
7272
upload_id = upload_data["upload_id"]
73-
await upload_file_via_tus(client, upload_id, b"test content")
73+
await upload_file_via_tus(client, upload_id, b"test content", upload_token)
7474

7575
await client.patch(
7676
app.url_path_for("update_token", token_value=upload_token),
@@ -94,7 +94,7 @@ async def test_get_file_info_blocked_for_disabled_token(client):
9494

9595
upload_data = await initiate_upload(client, upload_token, "test.txt", 12)
9696
upload_id = upload_data["upload_id"]
97-
await upload_file_via_tus(client, upload_id, b"test content")
97+
await upload_file_via_tus(client, upload_id, b"test content", upload_token)
9898

9999
await client.patch(
100100
app.url_path_for("update_token", token_value=upload_token),
@@ -118,7 +118,7 @@ async def test_get_file_info_allowed_for_disabled_token_with_admin_key(client):
118118

119119
upload_data = await initiate_upload(client, upload_token, "test.txt", 12)
120120
upload_id = upload_data["upload_id"]
121-
await upload_file_via_tus(client, upload_id, b"test content")
121+
await upload_file_via_tus(client, upload_id, b"test content", upload_token)
122122

123123
await client.patch(
124124
app.url_path_for("update_token", token_value=upload_token),

backend/tests/test_download_url_security.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ async def test_list_token_uploads_does_not_expose_api_key():
2121
upload_data = await initiate_upload(
2222
client, token_data["token"], filename="test.txt", size_bytes=11, filetype="text/plain", meta_data={}
2323
)
24-
await upload_file_via_tus(client, upload_data["upload_id"], b"hello world")
24+
await upload_file_via_tus(client, upload_data["upload_id"], b"hello world", token_data["token"])
2525

2626
# Get uploads list as admin
2727
response = await client.get(
@@ -48,7 +48,7 @@ async def test_get_file_info_does_not_expose_api_key():
4848
upload_data = await initiate_upload(
4949
client, token_data["token"], filename="test.txt", size_bytes=11, filetype="text/plain", meta_data={}
5050
)
51-
await upload_file_via_tus(client, upload_data["upload_id"], b"hello world")
51+
await upload_file_via_tus(client, upload_data["upload_id"], b"hello world", token_data["token"])
5252

5353
response = await client.get(
5454
app.url_path_for(

backend/tests/test_mimetype_validation.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from backend.app.config import settings
1212
from backend.app.db import SessionLocal
1313
from backend.app.main import app
14+
from backend.tests.utils import complete_upload
1415

1516

1617
@pytest.mark.asyncio
@@ -56,8 +57,11 @@ async def test_mimetype_spoofing_rejected(client):
5657
},
5758
)
5859

59-
assert patch_resp.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, "Fake video file should be rejected with 415"
60-
assert "does not match allowed types" in patch_resp.json()["detail"], "Error should indicate type mismatch"
60+
assert patch_resp.status_code == status.HTTP_204_NO_CONTENT, "TUS PATCH should accept bytes before explicit completion"
61+
62+
complete_status, complete_data = await complete_upload(client, upload_id, token_value)
63+
assert complete_status == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, "Fake video file should be rejected during explicit completion"
64+
assert "does not match allowed types" in complete_data["detail"], "Error should indicate type mismatch"
6165

6266
head_resp = await client.head(app.url_path_for("tus_head", upload_id=upload_id))
6367
assert head_resp.status_code == status.HTTP_404_NOT_FOUND, "Rejected upload should be removed"
@@ -111,6 +115,10 @@ async def test_valid_mimetype_accepted(client):
111115

112116
assert patch_resp.status_code == status.HTTP_204_NO_CONTENT, "Valid text file should be accepted"
113117

118+
complete_status, complete_data = await complete_upload(client, upload_id, token_value)
119+
assert complete_status == status.HTTP_200_OK, "Completion endpoint should finalize valid text uploads"
120+
assert complete_data["status"] == "completed", "Text upload should be marked completed after explicit completion"
121+
114122
head_resp = await client.head(app.url_path_for("tus_head", upload_id=upload_id))
115123
assert head_resp.status_code == status.HTTP_200_OK, "Upload should still exist after completion"
116124

@@ -158,6 +166,9 @@ async def test_mimetype_updated_on_completion(client):
158166
)
159167
assert patch_resp.status_code == status.HTTP_204_NO_CONTENT, "Upload completion should return 204"
160168

169+
complete_status, _ = await complete_upload(client, upload_id, token_value)
170+
assert complete_status == status.HTTP_200_OK, "Completion endpoint should succeed for uploaded text files"
171+
161172
async with SessionLocal() as session:
162173
stmt = select(models.UploadRecord).where(models.UploadRecord.public_id == upload_id)
163174
res = await session.execute(stmt)
@@ -211,6 +222,10 @@ async def test_ffprobe_extracts_metadata_for_video(client):
211222
)
212223
assert patch_resp.status_code == status.HTTP_204_NO_CONTENT, "Video upload should complete successfully"
213224

225+
complete_status, complete_data = await complete_upload(client, upload_id, token_value)
226+
assert complete_status == status.HTTP_200_OK, "Completion endpoint should accept uploaded video files"
227+
assert complete_data["status"] == "postprocessing", "Video upload should enter postprocessing after explicit completion"
228+
214229
from backend.tests.test_postprocessing import wait_for_processing
215230

216231
await wait_for_processing([upload_id], timeout=10.0)
@@ -272,6 +287,10 @@ async def test_ffprobe_not_run_for_non_multimedia(client):
272287
)
273288
assert patch_resp.status_code == status.HTTP_204_NO_CONTENT, "Text upload should complete successfully"
274289

290+
complete_status, complete_data = await complete_upload(client, upload_id, token_value)
291+
assert complete_status == status.HTTP_200_OK, "Completion endpoint should succeed for text uploads"
292+
assert complete_data["status"] == "completed", "Text upload should complete immediately after explicit completion"
293+
275294
async with SessionLocal() as session:
276295
stmt = select(models.UploadRecord).where(models.UploadRecord.public_id == upload_id)
277296
res = await session.execute(stmt)

backend/tests/test_postprocessing.py

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ async def test_multimedia_upload_enters_postprocessing(client):
5656
)
5757
upload_id = upload_data["upload_id"]
5858

59-
await upload_file_via_tus(client, upload_id, video_content)
59+
await upload_file_via_tus(client, upload_id, video_content, token_value)
6060

6161
async with SessionLocal() as session:
6262
stmt = select(models.UploadRecord).where(models.UploadRecord.public_id == upload_id)
@@ -79,7 +79,7 @@ async def test_non_multimedia_upload_completes_immediately(client):
7979
)
8080
upload_id = upload_data["upload_id"]
8181

82-
await upload_file_via_tus(client, upload_id, pdf_content)
82+
await upload_file_via_tus(client, upload_id, pdf_content, token_value)
8383

8484
async with SessionLocal() as session:
8585
stmt = select(models.UploadRecord).where(models.UploadRecord.public_id == upload_id)
@@ -90,49 +90,6 @@ async def test_non_multimedia_upload_completes_immediately(client):
9090
assert record.completed_at is not None, "Upload should be marked complete"
9191

9292

93-
@pytest.mark.asyncio
94-
async def test_postprocessing_worker_processes_queue(client):
95-
"""Test that the post-processing worker processes pending uploads."""
96-
token_data = await create_token(client, max_uploads=2)
97-
token_value = token_data["token"]
98-
99-
video_file = Path(__file__).parent / "fixtures" / "sample.mp4"
100-
video_content = video_file.read_bytes()
101-
102-
upload1_data = await initiate_upload(
103-
client, token_value, filename="video1.mp4", size_bytes=len(video_content), filetype="video/mp4", meta_data={"title": "Video 1"}
104-
)
105-
upload1_id = upload1_data["upload_id"]
106-
107-
upload2_data = await initiate_upload(
108-
client, token_value, filename="video2.mp4", size_bytes=len(video_content), filetype="video/mp4", meta_data={"title": "Video 2"}
109-
)
110-
upload2_id = upload2_data["upload_id"]
111-
112-
await upload_file_via_tus(client, upload1_id, video_content)
113-
await upload_file_via_tus(client, upload2_id, video_content)
114-
115-
async with SessionLocal() as session:
116-
stmt = select(models.UploadRecord).where(models.UploadRecord.public_id.in_([upload1_id, upload2_id]))
117-
result = await session.execute(stmt)
118-
records = result.scalars().all()
119-
120-
for record in records:
121-
assert record.status in ("postprocessing", "completed"), "Upload should be in postprocessing or already completed"
122-
123-
completed = await wait_for_processing([upload1_id, upload2_id])
124-
assert completed, "Processing should complete within timeout"
125-
126-
async with SessionLocal() as session:
127-
stmt = select(models.UploadRecord).where(models.UploadRecord.public_id.in_([upload1_id, upload2_id]))
128-
result = await session.execute(stmt)
129-
records = result.scalars().all()
130-
131-
for record in records:
132-
assert record.status == "completed", "Both uploads should be completed after processing"
133-
assert record.completed_at is not None, "Both uploads should have completion time"
134-
135-
13693
@pytest.mark.asyncio
13794
async def test_postprocessing_handles_missing_file():
13895
"""Test that post-processing handles missing files gracefully."""

0 commit comments

Comments
 (0)