diff --git a/backend/src/cms_backend/db/book.py b/backend/src/cms_backend/db/book.py index 7e4a72e..d78a51c 100644 --- a/backend/src/cms_backend/db/book.py +++ b/backend/src/cms_backend/db/book.py @@ -102,8 +102,11 @@ def get_next_book_to_move_files_or_none( ) -> Book | None: return session.scalars( select(Book) - .where(Book.needs_file_operation.is_(True)) - .where(Book.has_error.is_(False)) + .where( + Book.needs_file_operation.is_(True), + Book.has_error.is_(False), + Book.location_kind.not_in(["to_delete", "deleted"]), + ) .order_by(Book.created_at) .limit(1) ).one_or_none() diff --git a/backend/src/cms_backend/db/models.py b/backend/src/cms_backend/db/models.py index 42cf2b6..4f80fa8 100644 --- a/backend/src/cms_backend/db/models.py +++ b/backend/src/cms_backend/db/models.py @@ -134,6 +134,7 @@ class Book(Base): location_kind: Mapped[str] = mapped_column( init=False, default="quarantine", server_default="quarantine" ) + deletion_date: Mapped[datetime | None] = mapped_column(default=None, init=False) events: Mapped[list[str]] = mapped_column(init=False, default_factory=list) title_id: Mapped[UUID | None] = mapped_column( diff --git a/backend/src/cms_backend/migrations/versions/a5f67b148119_add_deletion_date_to_book.py b/backend/src/cms_backend/migrations/versions/a5f67b148119_add_deletion_date_to_book.py new file mode 100644 index 0000000..fa16a6a --- /dev/null +++ b/backend/src/cms_backend/migrations/versions/a5f67b148119_add_deletion_date_to_book.py @@ -0,0 +1,28 @@ +"""add deletion_date to book + +Revision ID: a5f67b148119 +Revises: df6a64dec5a0 +Create Date: 2026-02-09 13:41:08.318866 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "a5f67b148119" +down_revision = "df6a64dec5a0" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("book", sa.Column("deletion_date", sa.DateTime(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("book", "deletion_date") + # ### end Alembic commands ### diff --git a/backend/src/cms_backend/mill/context.py b/backend/src/cms_backend/mill/context.py index 7538ebc..ecd7ad0 100644 --- a/backend/src/cms_backend/mill/context.py +++ b/backend/src/cms_backend/mill/context.py @@ -41,3 +41,7 @@ class Context: default=UUID(get_mandatory_env("STAGING_WAREHOUSE_ID")) ) staging_base_path: Path = field(default=Path(os.getenv("STAGING_BASE_PATH", ""))) + + old_book_deletion_delay: timedelta = timedelta( + seconds=parse_timespan(os.getenv("OLD_BOOK_DELETION_DELAY", default="1d")) + ) diff --git a/backend/src/cms_backend/mill/processors/title.py b/backend/src/cms_backend/mill/processors/title.py index 2404253..c806d9b 100644 --- a/backend/src/cms_backend/mill/processors/title.py +++ b/backend/src/cms_backend/mill/processors/title.py @@ -1,3 +1,5 @@ +import datetime +from collections import defaultdict from dataclasses import dataclass from pathlib import Path from uuid import UUID @@ -9,7 +11,11 @@ from cms_backend.db.models import Book, Title from cms_backend.mill.context import Context as MillContext from cms_backend.utils.datetime import getnow -from cms_backend.utils.filename import compute_target_filename +from cms_backend.utils.filename import ( + PERIOD_LENGTH, + compute_target_filename, + get_period_and_suffix_from_filename, +) @dataclass(eq=True, frozen=True) @@ -19,6 +25,71 @@ class FileLocation: filename: str +def apply_retention_rules(session: OrmSession, title: Title): + """Apply retention rules to `prod` books belonging to same title and flavour group. + + The retention rules are described in https://wiki.openzim.org/wiki/ZIM_Updates + - Keep last version of two ZIM files from the two last distinct months (e.g + if we have `2024-04`, `2024-04a`, `2024-06`, `2024-06a`, `2024-06b`, + then we keep `2024-04a` and `2024-06b`) + - AND keep every version which is 30 days old or less. + """ + + now = getnow() + + books_by_flavour: dict[str, list[Book]] = defaultdict(list) + for book in title.books: + if ( + book.location_kind == "prod" + and not book.has_error + and book.created_at <= (now - datetime.timedelta(days=30)) + and book.needs_file_operation is False + ): + books_by_flavour[book.flavour or ""].append(book) + + books_to_delete: list[Book] = [] + + for _, books in books_by_flavour.items(): + # Group books by period (without the suffix) + books_by_period: dict[str, list[Book]] = defaultdict(list) + for book in books: + if not book.date: + continue + books_by_period[book.date[:PERIOD_LENGTH]].append(book) + + # Keep last version from each of the 2 most recent periods + sorted_periods = sorted(books_by_period.keys(), reverse=True) + for period in sorted_periods[:2]: + sorted_books_by_period = sort_books_by_filename_period( + books_by_period[period] + ) + # Mark all but the most recent one for deletion + books_to_delete.extend(sorted_books_by_period[1:]) + + # Mark the remainder of the books to be deleted. + for period in sorted_periods[2:]: + books_to_delete.extend(books_by_period[period]) + + deletion_date = now + MillContext.old_book_deletion_delay + + for book in books_to_delete: + logger.info( + f"Marking book {book.id} for deletion, deletion_date={deletion_date}" + ) + book.location_kind = "to_delete" + book.deletion_date = deletion_date + book.needs_file_operation = True + book.events.append( + f"{now}: marked for deletion due to retention policy, " + f"will be deleted after {deletion_date}" + ) + title.events.append(f"{now}: book {book.id} marked for deletion.") + session.add(book) + session.add(title) + + session.flush() + + def add_book_to_title(session: OrmSession, book: Book, title: Title): try: # Retrieve name from book.name directly @@ -76,6 +147,9 @@ def add_book_to_title(session: OrmSession, book: Book, title: Title): ) book.location_kind = "staging" if goes_to_staging else "prod" + if not goes_to_staging: + apply_retention_rules(session, title) + except Exception as exc: book.events.append( f"{getnow()}: error encountered while adding to title {title.id}\n{exc}" @@ -167,3 +241,21 @@ def create_book_target_locations( ) book.needs_file_operation = True + + +def sort_books_by_filename_period(books: list[Book]) -> list[Book]: + """Sort a list of books by period. + + Assumes: + - the book's location exists since it contains the filename of the book + """ + + def sort_fn(book: Book) -> tuple[str, int, str]: + period, suffix = get_period_and_suffix_from_filename(book.locations[0].filename) + return (period, len(suffix), suffix) + + return sorted( + books, + key=sort_fn, + reverse=True, + ) diff --git a/backend/src/cms_backend/shuttle/context.py b/backend/src/cms_backend/shuttle/context.py index 77ce30c..29da065 100644 --- a/backend/src/cms_backend/shuttle/context.py +++ b/backend/src/cms_backend/shuttle/context.py @@ -39,4 +39,8 @@ class Context: seconds=parse_timespan(os.getenv("MOVE_FILES_INTERVAL", default="1m")) ) + delete_files_interval: timedelta = timedelta( + seconds=parse_timespan(os.getenv("DELETE_FILES_INTERVAL", default="1h")) + ) + local_warehouse_paths: ClassVar[dict[UUID, Path]] = _parse_local_warehouse_paths() diff --git a/backend/src/cms_backend/shuttle/delete_files.py b/backend/src/cms_backend/shuttle/delete_files.py new file mode 100644 index 0000000..6eafaea --- /dev/null +++ b/backend/src/cms_backend/shuttle/delete_files.py @@ -0,0 +1,96 @@ +import datetime + +from sqlalchemy import select +from sqlalchemy.orm import Session as OrmSession + +from cms_backend import logger +from cms_backend.db.models import Book +from cms_backend.shuttle.context import Context as ShuttleContext +from cms_backend.utils.datetime import getnow + + +def delete_files(session: OrmSession): + """Delete books from filesystem that have passed their deletion_date. + + Finds books with location_kind='to_delete' and deletion_date <= now, + deletes their files from filesystem, and marks them as 'deleted'. + """ + now = getnow() + nb_zim_files_deleted = 0 + + while True: + with session.begin_nested(): + book = get_next_book_to_delete(session, now) + if not book: + break + + try: + logger.debug(f"Deleting files for book {book.id}") + delete_book_files(session, book) + nb_zim_files_deleted += 1 + except Exception as exc: + book.events.append( + f"{getnow()}: error encountered while deleting files\n{exc}" + ) + logger.exception(f"Failed to delete files for book {book.id}") + book.has_error = True + + logger.info(f"Done deleting {nb_zim_files_deleted} ZIM files") + + +def get_next_book_to_delete(session: OrmSession, now: datetime.datetime) -> Book | None: + """Get the next book that needs deletion.""" + return session.scalars( + select(Book) + .where( + Book.location_kind == "to_delete", + Book.deletion_date <= now, + Book.has_error.is_(False), + Book.needs_file_operation.is_(True), + ) + .order_by(Book.deletion_date) + .limit(1) + ).one_or_none() + + +def delete_book_files(session: OrmSession, book: Book): + """Delete all files for a book from filesystem.""" + inaccessible_warehouse_names = { + loc.warehouse.name + for loc in book.locations + if loc.warehouse_id not in ShuttleContext.local_warehouse_paths.keys() + } + + # If any warehouse is not accessible, we cannot proceed + if len(inaccessible_warehouse_names) > 0: + logger.debug( + f"Cannot delete book {book.id}, no access to " + f"{','.join(inaccessible_warehouse_names)} warehouses" + ) + return + + # Delete all current location files + for location in book.locations: + if location.status == "current": + try: + file_path = location.full_local_path( + ShuttleContext.local_warehouse_paths + ) + file_path.unlink(missing_ok=True) + logger.info(f"Deleted file for book {book.id} at {file_path}") + book.events.append(f"{getnow()}: deleted file at {location.full_str}") + session.delete(location) + except Exception: + logger.exception( + f"Failed to delete file at {location.full_str} for book {book.id}" + ) + raise + + # Mark book as deleted + book.location_kind = "deleted" + book.needs_file_operation = False + book.events.append(f"{getnow()}: all files deleted, book marked as deleted") + session.add(book) + + session.flush() + logger.info(f"Book {book.id} files have been deleted") diff --git a/backend/src/cms_backend/shuttle/main.py b/backend/src/cms_backend/shuttle/main.py index b890114..aa3a989 100644 --- a/backend/src/cms_backend/shuttle/main.py +++ b/backend/src/cms_backend/shuttle/main.py @@ -9,6 +9,7 @@ from cms_backend.context import Context from cms_backend.db import Session from cms_backend.shuttle.context import Context as ShuttleContext +from cms_backend.shuttle.delete_files import delete_files from cms_backend.shuttle.move_files import move_files from cms_backend.utils.database import upgrade_db_schema from cms_backend.utils.datetime import getnow @@ -20,6 +21,10 @@ func=move_files, interval=ShuttleContext.move_files_interval, ), + TaskConfig( + func=delete_files, + interval=ShuttleContext.delete_files_interval, + ), ] diff --git a/backend/src/cms_backend/utils/filename.py b/backend/src/cms_backend/utils/filename.py index 21b558b..4a5f6f8 100644 --- a/backend/src/cms_backend/utils/filename.py +++ b/backend/src/cms_backend/utils/filename.py @@ -1,5 +1,6 @@ """Utilities for computing and managing book target filenames.""" +import re from uuid import UUID from sqlalchemy import select @@ -8,6 +9,9 @@ from cms_backend.db.models import BookLocation PERIOD_LENGTH = 7 +FILENAME_PERIOD_SUFFIX_PATTERN = re.compile( + r".*_(?P\d{4}-\d{2})(?P[a-z]*)\.zim" +) def get_next_suffix(current_suffix: str) -> str: @@ -162,3 +166,12 @@ def compute_target_filename( next_suffix = get_next_suffix(last_suffix) return f"{base_pattern}{next_suffix}.zim" + + +def get_period_and_suffix_from_filename(filename: str) -> tuple[str, str]: + """Get the (period, suffix) tuple from filename.""" + match = FILENAME_PERIOD_SUFFIX_PATTERN.match(filename) + if match is None: + raise ValueError("Unable to retrieve period from filename") + groupdict = match.groupdict() + return (groupdict["period"], groupdict["suffix"]) diff --git a/backend/tests/mill/processors/test_title.py b/backend/tests/mill/processors/test_title.py new file mode 100644 index 0000000..76f04d6 --- /dev/null +++ b/backend/tests/mill/processors/test_title.py @@ -0,0 +1,275 @@ +"""Tests for title processor functions.""" + +from collections.abc import Callable +from datetime import timedelta +from unittest.mock import patch + +import pytest +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.db.models import Book, BookLocation, Title +from cms_backend.mill.processors.title import ( + apply_retention_rules, + sort_books_by_filename_period, +) +from cms_backend.utils.datetime import getnow + + +def test_sort_books_by_filename_period( + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], +): + """Books with mixed periods and suffixes should be sorted correctly.""" + book1 = create_book() + create_book_location(book=book1, filename="wiki_2024-01.zim") + + book2 = create_book() + create_book_location(book=book2, filename="wiki_2024-01a.zim") + + book3 = create_book() + create_book_location(book=book3, filename="wiki_2024-02.zim") + + book4 = create_book() + create_book_location(book=book4, filename="wiki_2023-12z.zim") + + book5 = create_book() + create_book_location(book=book5, filename="wiki_2023-12aa.zim") + + book6 = create_book() + create_book_location(book=book6, filename="wiki_2024-01aa.zim") + + book7 = create_book() + create_book_location(book=book7, filename="wiki_2024-01b.zim") + + book8 = create_book() + create_book_location(book=book8, filename="wiki_2024-01ba.zim") + + book9 = create_book() + create_book_location(book=book9, filename="wiki_2024-02a.zim") + + result = sort_books_by_filename_period( + [book1, book2, book3, book4, book5, book6, book7, book8, book9] + ) + + assert len(result) == 9 + sorted_filenames = [book.locations[0].filename for book in result] + + assert sorted_filenames == [ + "wiki_2024-02a.zim", + "wiki_2024-02.zim", + "wiki_2024-01ba.zim", + "wiki_2024-01aa.zim", + "wiki_2024-01b.zim", + "wiki_2024-01a.zim", + "wiki_2024-01.zim", + "wiki_2023-12aa.zim", + "wiki_2023-12z.zim", + ] + + +def test_invalid_filename_raises_error( + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], +): + """Book with invalid filename format should raise ValueError.""" + book = create_book() + create_book_location(book=book, filename="invalid_filename.zim") + + with pytest.raises(ValueError, match="Unable to retrieve period from filename"): + sort_books_by_filename_period([book]) + + +def test_apply_retention_rules_keeps_last_version_of_two_most_recent_months( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], +): + """Retention rules should keep the last version of two most recent months.""" + title = create_title(name="test_wiki_en_all") + now = getnow() + + # Create books from 4 different months, with multiple versions per month + # Month 1 (oldest): 2024-01 + book1 = create_book( + name="test_wiki", + date="2024-01", + flavour="nopic", + created_at=now - timedelta(days=25), + ) + book1.location_kind = "prod" + book1.title = title + create_book_location(book=book1, filename="test_wiki_2024-01.zim") + + # Month 2: 2024-02 with two versions + book2a = create_book( + name="test_wiki", + date="2024-02", + flavour="nopic", + created_at=now - timedelta(days=40), + ) + book2a.location_kind = "prod" + book2a.title = title + create_book_location(book=book2a, filename="test_wiki_2024-02.zim") + + book2b = create_book( + name="test_wiki", + date="2024-02", + flavour="nopic", + created_at=now - timedelta(days=39), + ) + book2b.location_kind = "prod" + book2b.title = title + create_book_location(book=book2b, filename="test_wiki_2024-02a.zim") + + # Month 3: 2024-03 with three versions + book3a = create_book( + name="test_wiki", + date="2024-03", + flavour="nopic", + created_at=now - timedelta(days=35), + ) + book3a.location_kind = "prod" + book3a.title = title + create_book_location(book=book3a, filename="test_wiki_2024-03.zim") + + book3b = create_book( + name="test_wiki", + date="2024-03", + flavour="nopic", + created_at=now - timedelta(days=34), + ) + book3b.location_kind = "prod" + book3b.title = title + create_book_location(book=book3b, filename="test_wiki_2024-03a.zim") + + book3c = create_book( + name="test_wiki", + date="2024-03", + flavour="nopic", + created_at=now - timedelta(days=33), + ) + book3c.location_kind = "prod" + book3c.title = title + create_book_location(book=book3c, filename="test_wiki_2024-03b.zim") + + # Month 4 (newest): 2024-04 with two versions + book4a = create_book( + name="test_wiki", + date="2024-04", + flavour="nopic", + created_at=now - timedelta(days=35), + ) + book4a.location_kind = "prod" + book4a.title = title + create_book_location(book=book4a, filename="test_wiki_2024-04.zim") + + book4b = create_book( + name="test_wiki", + date="2024-04", + flavour="nopic", + created_at=now - timedelta(days=34), + ) + book4b.location_kind = "prod" + book4b.title = title + create_book_location(book=book4b, filename="test_wiki_2024-04a.zim") + + dbsession.flush() + + with patch("cms_backend.mill.processors.title.getnow", return_value=now): + apply_retention_rules(dbsession, title) + + dbsession.flush() + + # Keep book1 since it's created_at is still less than 30 days + assert book1.location_kind == "prod" + # Should keep only the latest from the two most recent months: + # - 2024-04a + # - 2024-03b + assert book4b.location_kind == "prod" + assert book3c.location_kind == "prod" + + # All others should be marked for deletion + assert book2a.location_kind == "to_delete" + assert book2b.location_kind == "to_delete" + assert book3a.location_kind == "to_delete" + assert book3b.location_kind == "to_delete" + assert book4a.location_kind == "to_delete" + + +def test_apply_retention_rules_handles_different_flavours_separately( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], +): + """Retention rules should treat different flavours independently.""" + title = create_title(name="test_wiki_en_all") + now = getnow() + + # Create books with "nopic" flavour: two in january and one in february + book_nopic_jan = create_book( + name="test_wiki", + date="2024-01", + flavour="nopic", + created_at=now - timedelta(days=40), + ) + book_nopic_jan.location_kind = "prod" + book_nopic_jan.title = title + create_book_location(book=book_nopic_jan, filename="test_wiki_nopic_2024-01.zim") + + book_nopic_jan_b = create_book( + name="test_wiki", + date="2024-01", + flavour="nopic", + created_at=now - timedelta(days=35), + ) + book_nopic_jan_b.location_kind = "prod" + book_nopic_jan_b.title = title + create_book_location(book=book_nopic_jan_b, filename="test_wiki_nopic_2024-01a.zim") + + book_nopic_feb = create_book( + name="test_wiki", + date="2024-02", + flavour="nopic", + created_at=now - timedelta(days=40), + ) + book_nopic_feb.location_kind = "prod" + book_nopic_feb.title = title + create_book_location(book=book_nopic_feb, filename="test_wiki_nopic_2024-02.zim") + + # Create books with "maxi" flavour + book_maxi_jan = create_book( + name="test_wiki", + date="2024-01", + flavour="maxi", + created_at=now - timedelta(days=38), + ) + book_maxi_jan.location_kind = "prod" + book_maxi_jan.title = title + create_book_location(book=book_maxi_jan, filename="test_wiki_maxi_2024-01.zim") + + book_maxi_feb = create_book( + name="test_wiki", + date="2024-02", + flavour="maxi", + created_at=now - timedelta(days=38), + ) + book_maxi_feb.location_kind = "prod" + book_maxi_feb.title = title + create_book_location(book=book_maxi_feb, filename="test_wiki_maxi_2024-02.zim") + + dbsession.flush() + + with patch("cms_backend.mill.processors.title.getnow", return_value=now): + apply_retention_rules(dbsession, title) + + dbsession.flush() + + # Only the first nopic_jan book should be deleted because in it's flavour, + # there are two considered more recent than it. + assert book_nopic_jan.location_kind == "to_delete" + assert book_nopic_jan_b.location_kind == "prod" + assert book_nopic_feb.location_kind == "prod" + assert book_maxi_feb.location_kind == "prod" + assert book_maxi_jan.location_kind == "prod" diff --git a/backend/tests/shuttle/test_delete_files.py b/backend/tests/shuttle/test_delete_files.py new file mode 100644 index 0000000..c49208a --- /dev/null +++ b/backend/tests/shuttle/test_delete_files.py @@ -0,0 +1,168 @@ +from collections.abc import Callable +from contextlib import ExitStack +from datetime import timedelta +from pathlib import Path +from unittest.mock import patch + +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.db.models import Book, BookLocation, Warehouse +from cms_backend.shuttle.delete_files import delete_book_files, delete_files +from cms_backend.utils.datetime import getnow + + +def test_delete_files_processes_eligible_book( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that delete_files processes books ready for deletion.""" + warehouse = create_warehouse() + now = getnow() + + book = create_book() + book.location_kind = "to_delete" + book.deletion_date = now - timedelta(days=1) + book.has_error = False + book.needs_file_operation = True + dbsession.flush() + + create_book_location(book=book, warehouse_id=warehouse.id, status="current") + dbsession.flush() + + with ExitStack() as stack: + mock_context = stack.enter_context( + patch("cms_backend.shuttle.delete_files.ShuttleContext") + ) + stack.enter_context(patch("pathlib.Path.unlink")) + + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} + + delete_files(dbsession) + + assert book.location_kind == "deleted" + assert book.needs_file_operation is False + + +def test_delete_files_inaccessible_warehouse( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that delete_files skips books if warehouse is not accessible.""" + warehouse = create_warehouse(name="inaccessible_warehouse") + now = getnow() + + book = create_book() + book.location_kind = "to_delete" + book.deletion_date = now - timedelta(days=1) + book.has_error = False + book.needs_file_operation = True + dbsession.flush() + + create_book_location(book=book, warehouse_id=warehouse.id, status="current") + dbsession.flush() + + with patch("cms_backend.shuttle.delete_files.ShuttleContext") as mock_context: + mock_context.local_warehouse_paths = {} + delete_book_files(dbsession, book) + + assert book.location_kind == "to_delete" + assert book.needs_file_operation is True + assert len(book.events) == 0 + + +def test_delete_files_handles_file_deletion_error( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that delete_files handles file deletion errors gracefully.""" + warehouse = create_warehouse() + now = getnow() + + book = create_book() + book.location_kind = "to_delete" + book.deletion_date = now - timedelta(days=1) + book.has_error = False + book.needs_file_operation = True + dbsession.flush() + + create_book_location( + book=book, warehouse_id=warehouse.id, path="path", status="current" + ) + dbsession.flush() + + with ExitStack() as stack: + mock_context = stack.enter_context( + patch("cms_backend.shuttle.delete_files.ShuttleContext") + ) + mock_unlink = stack.enter_context(patch("pathlib.Path.unlink")) + + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} + mock_unlink.side_effect = OSError("Permission denied") + + delete_files(dbsession) + + # Book should be marked with error + assert book.has_error is True + assert book.location_kind == "to_delete" + assert any( + "error encountered while deleting files" in event for event in book.events + ) + + +def test_delete_files_handles_errors_and_continues_processing( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that delete_files marks books with errors and continues.""" + warehouse = create_warehouse() + now = getnow() + + # Create two eligible books + book1 = create_book() + book1.location_kind = "to_delete" + book1.deletion_date = now - timedelta(days=1) + book1.has_error = False + book1.needs_file_operation = True + + book2 = create_book() + book2.location_kind = "to_delete" + book2.deletion_date = now - timedelta(days=2) + book2.has_error = False + book2.needs_file_operation = True + + dbsession.flush() + + create_book_location(book=book1, warehouse_id=warehouse.id, status="current") + create_book_location(book=book2, warehouse_id=warehouse.id, status="current") + dbsession.flush() + + with ExitStack() as stack: + mock_context = stack.enter_context( + patch("cms_backend.shuttle.delete_files.ShuttleContext") + ) + mock_unlink = stack.enter_context(patch("pathlib.Path.unlink")) + + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} + + # Make unlink fail for the first call only + mock_unlink.side_effect = [OSError("Permission denied"), None] + + delete_files(dbsession) + + # book2 is processed first (older deletion_date) and should have error + assert book2.has_error is True + assert any( + "error encountered while deleting files" in event for event in book2.events + ) + + # book1 is processed second and should succeed + assert book1.location_kind == "deleted" + assert book1.has_error is False