diff --git a/.gitignore b/.gitignore index 5e30185..38088a1 100644 --- a/.gitignore +++ b/.gitignore @@ -135,3 +135,4 @@ test.db # dump of scripts scripts/*csv +*.zim diff --git a/backend/src/cms_backend/db/book.py b/backend/src/cms_backend/db/book.py index a1ab21f..8fceaca 100644 --- a/backend/src/cms_backend/db/book.py +++ b/backend/src/cms_backend/db/book.py @@ -1,6 +1,7 @@ from typing import Any from uuid import UUID +from sqlalchemy import select from sqlalchemy.orm import Session as OrmSession from cms_backend.db.models import Book, BookLocation, WarehousePath, ZimfarmNotification @@ -97,3 +98,14 @@ def create_book_location( ) return location + + +def get_next_book_to_move_files_or_none( + session: OrmSession, +) -> Book | None: + return session.scalars( + select(Book) + .where(Book.status == "pending_move") + .order_by(Book.created_at) + .limit(1) + ).one_or_none() diff --git a/backend/src/cms_backend/db/models.py b/backend/src/cms_backend/db/models.py index 2566db9..d6396d0 100644 --- a/backend/src/cms_backend/db/models.py +++ b/backend/src/cms_backend/db/models.py @@ -1,5 +1,6 @@ from datetime import datetime from ipaddress import IPv4Address +from pathlib import Path from typing import Any, Optional from uuid import UUID @@ -143,6 +144,12 @@ class Book(Base): postgresql_where=text("status = 'errored'"), ) +Index( + "idx_book_status_pending_move", + Book.status, + postgresql_where=text("status = 'pending_move'"), +) + class Title(Base): __tablename__ = "title" @@ -225,3 +232,17 @@ class BookLocation(Base): book: Mapped["Book"] = relationship(back_populates="locations", init=False) warehouse_path: Mapped["WarehousePath"] = relationship(init=False) + + def full_local_path(self, warehouse_local_folders_map: dict[UUID, str]) -> Path: + folder_in_warehouse = Path(self.warehouse_path.folder_name) / self.filename + warehouse_folder = Path( + warehouse_local_folders_map[self.warehouse_path.warehouse.id] + ) + return warehouse_folder / folder_in_warehouse + + @property + def full_str(self) -> str: + return ( + f"{self.warehouse_path.warehouse.name}:" + f"{self.warehouse_path.folder_name}/{self.filename}" + ) diff --git a/backend/src/cms_backend/migrations/versions/add_pending_move_index.py b/backend/src/cms_backend/migrations/versions/add_pending_move_index.py new file mode 100644 index 0000000..e9c2907 --- /dev/null +++ b/backend/src/cms_backend/migrations/versions/add_pending_move_index.py @@ -0,0 +1,30 @@ +"""Add partial index for pending_move book status + +Revision ID: add_pending_move_index +Revises: add_book_location_table +Create Date: 2025-11-13 00:00:00.000000 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "add_pending_move_index" +down_revision = "add_book_location_table" +branch_labels = None +depends_on = None + + +def upgrade(): + # Create partial index for pending_move status + op.create_index( + "idx_book_status_pending_move", + "book", + ["status"], + postgresql_where="status = 'pending_move'", + ) + + +def downgrade(): + # Drop the partial index + op.drop_index("idx_book_status_pending_move", table_name="book") diff --git a/backend/src/cms_backend/mill/process_zimfarm_notifications.py b/backend/src/cms_backend/mill/process_zimfarm_notifications.py index 552b1ce..5c16404 100644 --- a/backend/src/cms_backend/mill/process_zimfarm_notifications.py +++ b/backend/src/cms_backend/mill/process_zimfarm_notifications.py @@ -8,7 +8,6 @@ def process_zimfarm_notifications(session: OrmSession): logger.info("Processing Zimfarm notifications") nb_notifications_processed = 0 - raise Exception("foo") while True: with session.begin_nested(): notification = get_next_notification_to_process_or_none(session) diff --git a/backend/src/cms_backend/processors/book.py b/backend/src/cms_backend/processors/book.py index a509474..baf93d6 100644 --- a/backend/src/cms_backend/processors/book.py +++ b/backend/src/cms_backend/processors/book.py @@ -157,6 +157,7 @@ def create_book_target_locations( f"{getnow()}: book already at all target locations, skipping target " "creation" ) + book.status = "published" return # Create target locations for each applicable warehouse path @@ -168,3 +169,5 @@ def create_book_target_locations( filename=target_filename, status="target", ) + + book.status = "pending_move" diff --git a/backend/src/cms_backend/processors/title.py b/backend/src/cms_backend/processors/title.py index 66c1728..87536d1 100644 --- a/backend/src/cms_backend/processors/title.py +++ b/backend/src/cms_backend/processors/title.py @@ -20,7 +20,6 @@ def add_book_to_title(session: OrmSession, book: Book, title: Title): title.books.append(book) book.events.append(f"{getnow()}: book added to title {title.id}") title.events.append(f"{getnow()}: book {book.id} added to title") - book.status = "processed" if title.name != book.name: title.events.append(f"{getnow()}: updating title name to {book.name}") diff --git a/backend/src/cms_backend/shuttle/context.py b/backend/src/cms_backend/shuttle/context.py index 683b4ca..40d42a4 100644 --- a/backend/src/cms_backend/shuttle/context.py +++ b/backend/src/cms_backend/shuttle/context.py @@ -1,14 +1,28 @@ -import dataclasses import os +from dataclasses import dataclass from datetime import timedelta -from typing import TypeVar +from typing import ClassVar +from uuid import UUID from humanfriendly import parse_timespan -T = TypeVar("T") +WarehouseId = str +LocalWarehousePath = str -@dataclasses.dataclass(kw_only=True) +def _parse_local_warehouse_paths() -> dict[UUID, str]: + env_value = os.getenv("LOCAL_WAREHOUSE_PATHS", default="") + if not env_value: + return {} + return { + UUID(warehouse_id): local_path + for item in env_value.split(",") + if item + for (warehouse_id, local_path) in [item.split(":", 1)] + } + + +@dataclass(kw_only=True) class Context: """Class holding every contextual / configuration bits which can be moved @@ -20,8 +34,8 @@ class Context: os.getenv("PAUSE_IN_THE_LOOP", default="10s") ) - process_zimfarm_notifications_interval: timedelta = timedelta( - seconds=parse_timespan( - os.getenv("PROCESS_ZIMFARM_NOTIFICATIONS_INTERVAL", default="1m") - ) + move_files_interval: timedelta = timedelta( + seconds=parse_timespan(os.getenv("MOVE_FILES_INTERVAL", default="1m")) ) + + local_warehouse_paths: ClassVar[dict[UUID, str]] = _parse_local_warehouse_paths() diff --git a/backend/src/cms_backend/shuttle/main.py b/backend/src/cms_backend/shuttle/main.py index 5ef4791..b890114 100644 --- a/backend/src/cms_backend/shuttle/main.py +++ b/backend/src/cms_backend/shuttle/main.py @@ -9,12 +9,18 @@ from cms_backend.context import Context from cms_backend.db import Session from cms_backend.shuttle.context import Context as ShuttleContext +from cms_backend.shuttle.move_files import move_files from cms_backend.utils.database import upgrade_db_schema from cms_backend.utils.datetime import getnow from cms_backend.utils.task_config import TaskConfig # Configure background tasks with their execution intervals -tasks: list[TaskConfig] = [] +tasks: list[TaskConfig] = [ + TaskConfig( + func=move_files, + interval=ShuttleContext.move_files_interval, + ), +] def main(): diff --git a/backend/src/cms_backend/shuttle/move_files.py b/backend/src/cms_backend/shuttle/move_files.py new file mode 100644 index 0000000..7a28d5a --- /dev/null +++ b/backend/src/cms_backend/shuttle/move_files.py @@ -0,0 +1,132 @@ +import shutil + +from sqlalchemy.orm import Session as OrmSession + +from cms_backend import logger +from cms_backend.db.book import get_next_book_to_move_files_or_none +from cms_backend.db.models import Book, BookLocation +from cms_backend.shuttle.context import Context as ShuttleContext +from cms_backend.utils.datetime import getnow + + +def move_files(session: OrmSession): + logger.info("Moving ZIM files") + nb_zim_files_moved = 0 + while True: + with session.begin_nested(): + book = get_next_book_to_move_files_or_none(session) + if not book: + break + logger.debug(f"Processing ZIM file of book {book.id}") + move_book_files(session, book) + nb_zim_files_moved += 1 + + logger.info(f"Done moving {nb_zim_files_moved} ZIM files") + + +def move_book_files(session: OrmSession, book: Book): + inaccessible_warehouse_names = { + loc.warehouse_path.warehouse.name + for loc in book.locations + if loc.warehouse_path.warehouse_id + not in ShuttleContext.local_warehouse_paths.keys() + } + + # if any warehouse is not accessible, we do not proceed (complex scenarii not yet + # implemented) + if len(inaccessible_warehouse_names) > 0: + logger.debug( + f"Ignoring book {book.id}, no access to " + f"{','.join(inaccessible_warehouse_names)} warehouses" + ) + return + + current_locations: list[BookLocation] = [ + loc for loc in book.locations if loc.status == "current" + ] + + target_locations: list[BookLocation] = [ + loc for loc in book.locations if loc.status == "target" + ] + + if len(current_locations) == 0: + book.events.append( + f"{getnow()}: error encountered while moving files, no current location" + ) + book.status = "errored" + return + + if len(target_locations) == 0: + book.events.append( + f"{getnow()}: ignoring move files operation, no target location set" + ) + book.status = "published" + return + + # start with copies + while len(target_locations) > len(current_locations): + current_location = current_locations[0] + target_location = target_locations[0] + + current_path = current_location.full_local_path( + ShuttleContext.local_warehouse_paths + ) + target_path = target_location.full_local_path( + ShuttleContext.local_warehouse_paths + ) + + target_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(current_path, target_path) + logger.debug(f"Copied book {book.id} from {current_path} to {target_path}") + book.events.append( + f"{getnow()}: copied book from {current_location.full_str} to " + f"{target_location.full_str}" + ) + target_locations.remove(target_location) + target_location.status = "current" + + # continue with moves + while len(current_locations) > 0 and len(target_locations) > 0: + current_location = current_locations[0] + target_location = target_locations[0] + + current_path = current_location.full_local_path( + ShuttleContext.local_warehouse_paths + ) + target_path = target_location.full_local_path( + ShuttleContext.local_warehouse_paths + ) + + target_path.parent.mkdir(parents=True, exist_ok=True) + shutil.move(current_path, target_path) + logger.debug(f"Moved book {book.id} from {current_path} to {target_path}") + book.events.append( + f"{getnow()}: moved book from {current_location.full_str} to " + f"{target_location.full_str}" + ) + current_locations.remove(current_location) + target_locations.remove(target_location) + book.locations.remove(current_location) + session.delete(current_location) + session.flush() + target_location.status = "current" + + # cleanup phase: delete extra current locations + while len(current_locations) > 0: + current_location = current_locations[0] + current_path = current_location.full_local_path( + ShuttleContext.local_warehouse_paths + ) + + current_path.unlink(missing_ok=True) + logger.debug( + f"Deleted extra current location for book {book.id} at {current_path}" + ) + book.events.append( + f"{getnow()}: deleted old location {current_location.full_str}" + ) + current_locations.remove(current_location) + book.locations.remove(current_location) + session.delete(current_location) + + book.status = "published" diff --git a/backend/tests/processors/test_book_location_integration.py b/backend/tests/processors/test_book_location_integration.py index ca694df..9f3bf92 100644 --- a/backend/tests/processors/test_book_location_integration.py +++ b/backend/tests/processors/test_book_location_integration.py @@ -452,6 +452,9 @@ def test_no_target_when_current_matches_single_path( "book already at all target locations" in event for event in book.events ) + # Book should be marked as published (no move needed) + assert book.status == "published" + def test_no_target_when_current_matches_multiple_paths( self, dbsession: OrmSession, @@ -522,6 +525,9 @@ def test_no_target_when_current_matches_multiple_paths( "book already at all target locations" in event for event in book.events ) + # Book should be marked as published (no move needed) + assert book.status == "published" + def test_target_created_when_partial_match( self, dbsession: OrmSession, @@ -574,6 +580,9 @@ def test_target_created_when_partial_match( "book already at all target locations" in event for event in book.events ) + # Book should be marked as pending_move (needs file movement) + assert book.status == "pending_move" + def test_target_created_when_filename_differs( self, dbsession: OrmSession, @@ -619,3 +628,6 @@ def test_target_created_when_filename_differs( assert not any( "book already at all target locations" in event for event in book.events ) + + # Book should be marked as pending_move (needs file movement) + assert book.status == "pending_move" diff --git a/backend/tests/processors/test_zimfarm_notification.py b/backend/tests/processors/test_zimfarm_notification.py index 95bc919..fb67460 100644 --- a/backend/tests/processors/test_zimfarm_notification.py +++ b/backend/tests/processors/test_zimfarm_notification.py @@ -58,7 +58,7 @@ def test_process_notification_success( assert notification.book is not None assert notification.book.title == title assert notification.book.title_id == title.id - assert notification.book.status == "processed" + assert notification.book.status == "pending_move" assert any( event for event in notification.events @@ -421,7 +421,7 @@ def test_process_notification_with_existing_books( assert notification.book is not None assert notification.book.title == title - assert notification.book.status == "processed" + assert notification.book.status == "pending_move" assert len(title.books) == 2 assert existing_book in title.books assert notification.book in title.books diff --git a/backend/tests/shuttle/__init__.py b/backend/tests/shuttle/__init__.py new file mode 100644 index 0000000..12c62ce --- /dev/null +++ b/backend/tests/shuttle/__init__.py @@ -0,0 +1 @@ +"""Tests for the shuttle module.""" diff --git a/backend/tests/shuttle/conftest.py b/backend/tests/shuttle/conftest.py new file mode 100644 index 0000000..2fefc7f --- /dev/null +++ b/backend/tests/shuttle/conftest.py @@ -0,0 +1,48 @@ +"""Fixtures for shuttle tests.""" + +from collections.abc import Callable +from pathlib import Path + +import pytest +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.db.models import Warehouse, WarehousePath + + +@pytest.fixture +def temp_warehouse_dirs(tmp_path: Path) -> dict[str, Path]: + """Create temporary warehouse directories for testing.""" + warehouse_1 = tmp_path / "warehouse_1" + warehouse_2 = tmp_path / "warehouse_2" + + warehouse_1.mkdir() + warehouse_2.mkdir() + + return { + "warehouse_1": warehouse_1, + "warehouse_2": warehouse_2, + } + + +@pytest.fixture +def create_warehouse_with_path( + dbsession: OrmSession, +) -> Callable[..., tuple[Warehouse, WarehousePath]]: + """Factory to create a warehouse and its path for testing.""" + + def _create( + name: str, + folder_name: str = "zims", + ) -> tuple[Warehouse, WarehousePath]: + warehouse = Warehouse(name=name, configuration={}) + dbsession.add(warehouse) + dbsession.flush() + + warehouse_path = WarehousePath(folder_name=folder_name) + warehouse_path.warehouse = warehouse + dbsession.add(warehouse_path) + dbsession.flush() + + return warehouse, warehouse_path + + return _create diff --git a/backend/tests/shuttle/test_move_files.py b/backend/tests/shuttle/test_move_files.py new file mode 100644 index 0000000..ecba1f2 --- /dev/null +++ b/backend/tests/shuttle/test_move_files.py @@ -0,0 +1,717 @@ +"""Tests for the move_files module.""" + +from pathlib import Path +from typing import Any + +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.shuttle.context import Context +from cms_backend.shuttle.move_files import move_book_files, move_files + + +class TestMoveFilesEndToEnd: + """End-to-end tests for the move_files function.""" + + def test_move_files_processes_multiple_books_in_order( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test that move_files processes multiple books in created_at order.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", + folder_name="zims", + ) + warehouse_2, warehouse_path_2 = create_warehouse_with_path( + name="warehouse_2", + folder_name="zims", + ) + dbsession.flush() + + # Create test files + source_file_1 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book1.zim" + source_file_2 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book2.zim" + source_file_3 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book3.zim" + for src_file in [source_file_1, source_file_2, source_file_3]: + src_file.parent.mkdir(parents=True, exist_ok=True) + source_file_1.write_text("book1 content") + source_file_2.write_text("book2 content") + source_file_3.write_text("book3 content") + + # Create books with locations + book_1 = create_book() + create_book_location( + book=book_1, + warehouse_path=warehouse_path_1, + filename="book1.zim", + status="current", + ) + create_book_location( + book=book_1, + warehouse_path=warehouse_path_2, + filename="book1.zim", + status="target", + ) + book_1.status = "pending_move" + + book_2 = create_book() + create_book_location( + book=book_2, + warehouse_path=warehouse_path_1, + filename="book2.zim", + status="current", + ) + create_book_location( + book=book_2, + warehouse_path=warehouse_path_2, + filename="book2.zim", + status="target", + ) + book_2.status = "pending_move" + + book_3 = create_book() + create_book_location( + book=book_3, + warehouse_path=warehouse_path_1, + filename="book3.zim", + status="current", + ) + create_book_location( + book=book_3, + warehouse_path=warehouse_path_2, + filename="book3.zim", + status="target", + ) + book_3.status = "pending_move" + + dbsession.flush() + + warehouse_paths = { + warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), + warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]), + } + + Context.local_warehouse_paths = warehouse_paths + move_files(dbsession) + dbsession.flush() + + # Verify all books processed + assert book_1.status == "published" + assert book_2.status == "published" + assert book_3.status == "published" + + # Verify files moved + target_file_1 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book1.zim" + target_file_2 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book2.zim" + target_file_3 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book3.zim" + + assert target_file_1.read_text() == "book1 content" + assert target_file_2.read_text() == "book2 content" + assert target_file_3.read_text() == "book3 content" + + assert not source_file_1.exists() + assert not source_file_2.exists() + assert not source_file_3.exists() + + +class TestMoveBookFilesSuccess: + """Tests for successful move_book_files scenarios.""" + + def test_move_book_files_simple_move( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test simple move with 1 current and 1 target location.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", + folder_name="zims", + ) + dbsession.flush() + + source_dir = temp_warehouse_dirs["warehouse_1"] / "zims" + source_dir.mkdir(parents=True, exist_ok=True) + source_file = source_dir / "old_book.zim" + source_file.write_text("test content") + + book = create_book() + current_loc = create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="old_book.zim", + status="current", + ) + target_loc = create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="new_book.zim", + status="target", + ) + dbsession.flush() + + warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + target_file = source_dir / "new_book.zim" + assert target_file.exists() + assert target_file.read_text() == "test content" + assert not source_file.exists() + assert book.status == "published" + assert target_loc.status == "current" + assert current_loc not in book.locations + + def test_move_book_files_copy_then_move( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test copying to first target, then moving to second (1→2).""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="source" + ) + warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path( + name="warehouse_2", folder_name="target_a" + ) + warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path( + name="warehouse_2", folder_name="target_b" + ) + dbsession.flush() + + source_dir = temp_warehouse_dirs["warehouse_1"] / "source" + source_dir.mkdir(parents=True, exist_ok=True) + source_file = source_dir / "source.zim" + source_file.write_text("book content") + + book = create_book() + # current_loc + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="source.zim", + status="current", + ) + # target_loc_1 + create_book_location( + book=book, + warehouse_path=warehouse_path_2_a, + filename="target1.zim", + status="target", + ) + # target_loc_2 + create_book_location( + book=book, + warehouse_path=warehouse_path_2_b, + filename="target2.zim", + status="target", + ) + dbsession.flush() + + for subfolder in ["target_a", "target_b"]: + (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir( + parents=True, exist_ok=True + ) + + warehouse_paths = { + warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), + warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]), + warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]), + } + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + target_file_1 = temp_warehouse_dirs["warehouse_2"] / "target_a" / "target1.zim" + target_file_2 = temp_warehouse_dirs["warehouse_2"] / "target_b" / "target2.zim" + assert target_file_1.exists() + assert target_file_1.read_text() == "book content" + assert target_file_2.exists() + assert target_file_2.read_text() == "book content" + assert not source_file.exists() + assert book.status == "published" + + def test_move_book_files_multiple_copies_one_move( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test 1→3: 2 copies then 1 move.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="source" + ) + warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path( + name="warehouse_2", folder_name="target_a" + ) + warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path( + name="warehouse_2", folder_name="target_b" + ) + warehouse_2_c, warehouse_path_2_c = create_warehouse_with_path( + name="warehouse_2", folder_name="target_c" + ) + dbsession.flush() + + source_dir = temp_warehouse_dirs["warehouse_1"] / "source" + source_dir.mkdir(parents=True, exist_ok=True) + source_file = source_dir / "source.zim" + source_file.write_text("book content") + + book = create_book() + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="source.zim", + status="current", + ) + create_book_location( + book=book, + warehouse_path=warehouse_path_2_a, + filename="target1.zim", + status="target", + ) + create_book_location( + book=book, + warehouse_path=warehouse_path_2_b, + filename="target2.zim", + status="target", + ) + create_book_location( + book=book, + warehouse_path=warehouse_path_2_c, + filename="target3.zim", + status="target", + ) + dbsession.flush() + + for subfolder in ["target_a", "target_b", "target_c"]: + (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir( + parents=True, exist_ok=True + ) + + warehouse_paths = { + warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), + warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]), + warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]), + warehouse_2_c.id: str(temp_warehouse_dirs["warehouse_2"]), + } + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + for i, subfolder in enumerate(["target_a", "target_b", "target_c"], 1): + target_file = ( + temp_warehouse_dirs["warehouse_2"] / subfolder / f"target{i}.zim" + ) + assert target_file.exists() + assert target_file.read_text() == "book content" + + assert not source_file.exists() + current_locs = [loc for loc in book.locations if loc.status == "current"] + assert len(current_locs) == 3 + + +class TestMoveBookFilesErrors: + """Tests for error handling in move_book_files.""" + + def test_move_book_files_no_current_locations( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test that book with no current locations is marked as errored.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="zims" + ) + dbsession.flush() + + book = create_book() + target_loc = create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="target.zim", + status="target", + ) + dbsession.flush() + + warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + assert book.status == "errored" + assert any("no current location" in event for event in book.events) + assert target_loc.status == "target" + + def test_move_book_files_no_target_locations( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test that book with no target locations is marked as published.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="zims" + ) + dbsession.flush() + + source_dir = temp_warehouse_dirs["warehouse_1"] / "zims" + source_dir.mkdir(parents=True, exist_ok=True) + source_file = source_dir / "book.zim" + source_file.write_text("content") + + book = create_book() + # current_loc + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="book.zim", + status="current", + ) + dbsession.flush() + + warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + assert book.status == "published" + assert any("no target location" in event for event in book.events) + assert source_file.exists() + + def test_move_book_files_inaccessible_warehouse( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test that book with inaccessible warehouse is skipped.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="zims" + ) + _, warehouse_path_2 = create_warehouse_with_path( + name="warehouse_2", folder_name="zims" + ) + dbsession.flush() + + book = create_book() + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="book.zim", + status="current", + ) + create_book_location( + book=book, + warehouse_path=warehouse_path_2, + filename="book_target.zim", + status="target", + ) + dbsession.flush() + + # Only warehouse_1 accessible (warehouse_2 is inaccessible) + warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + # Book should not be processed (status unchanged) + assert book.status == "pending_processing" + + +class TestMoveBookFilesEdgeCases: + """Tests for edge cases in move_book_files.""" + + def test_move_book_files_same_location( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test when current location matches target.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="zims" + ) + dbsession.flush() + + source_dir = temp_warehouse_dirs["warehouse_1"] / "zims" + source_dir.mkdir(parents=True, exist_ok=True) + source_file = source_dir / "book.zim" + source_file.write_text("content") + + book = create_book() + # current_loc + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="book.zim", + status="current", + ) + # target_loc + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="book.zim", + status="target", + ) + dbsession.flush() + + warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + assert book.status == "published" + assert source_file.exists() + assert source_file.read_text() == "content" + + def test_move_book_files_events_contain_warehouse_info( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test that events contain warehouse and folder info.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="primary_warehouse", folder_name="main_zims" + ) + warehouse_2, warehouse_path_2 = create_warehouse_with_path( + name="backup_warehouse", folder_name="backup_zims" + ) + dbsession.flush() + + source_dir = temp_warehouse_dirs["warehouse_1"] / "main_zims" + source_dir.mkdir(parents=True, exist_ok=True) + source_file = source_dir / "test_book.zim" + source_file.write_text("content") + + book = create_book() + # current_loc + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="test_book.zim", + status="current", + ) + # target_loc + create_book_location( + book=book, + warehouse_path=warehouse_path_2, + filename="test_book_v2.zim", + status="target", + ) + dbsession.flush() + + (temp_warehouse_dirs["warehouse_2"] / "backup_zims").mkdir( + parents=True, exist_ok=True + ) + + warehouse_paths = { + warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), + warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]), + } + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + move_events = [e for e in book.events if "moved book from" in e] + assert len(move_events) == 1 + event = move_events[0] + assert "primary_warehouse" in event + assert "main_zims" in event + assert "backup_warehouse" in event + assert "backup_zims" in event + + def test_move_book_files_file_content_preserved( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test that file content is preserved during copy and move.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="source" + ) + warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path( + name="warehouse_2", folder_name="target_a" + ) + warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path( + name="warehouse_2", folder_name="target_b" + ) + dbsession.flush() + + test_content = "This is a test ZIM file content with special chars: éàù\n" * 100 + source_dir = temp_warehouse_dirs["warehouse_1"] / "source" + source_dir.mkdir(parents=True, exist_ok=True) + source_file = source_dir / "source.zim" + source_file.write_text(test_content) + + book = create_book() + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="source.zim", + status="current", + ) + create_book_location( + book=book, + warehouse_path=warehouse_path_2_a, + filename="target1.zim", + status="target", + ) + create_book_location( + book=book, + warehouse_path=warehouse_path_2_b, + filename="target2.zim", + status="target", + ) + dbsession.flush() + + for subfolder in ["target_a", "target_b"]: + (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir( + parents=True, exist_ok=True + ) + + warehouse_paths = { + warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), + warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]), + warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]), + } + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + target_file_1 = temp_warehouse_dirs["warehouse_2"] / "target_a" / "target1.zim" + target_file_2 = temp_warehouse_dirs["warehouse_2"] / "target_b" / "target2.zim" + + assert target_file_1.read_text() == test_content + assert target_file_2.read_text() == test_content + + def test_move_book_files_three_current_one_target( + self, + dbsession: OrmSession, + create_book: Any, + create_book_location: Any, + create_warehouse_with_path: Any, + temp_warehouse_dirs: dict[str, Path], + ): + """Test that when there are 3 current and 1 target, one moves and two get + deleted.""" + warehouse_1, warehouse_path_1 = create_warehouse_with_path( + name="warehouse_1", folder_name="zims" + ) + warehouse_2, warehouse_path_2 = create_warehouse_with_path( + name="warehouse_2", folder_name="zims" + ) + warehouse_3, warehouse_path_3 = create_warehouse_with_path( + name="warehouse_3", folder_name="zims" + ) + warehouse_4, warehouse_path_4 = create_warehouse_with_path( + name="warehouse_4", folder_name="zims" + ) + dbsession.flush() + + # Create three current locations with actual files + source_dir_1 = temp_warehouse_dirs["warehouse_1"] / "zims" + source_dir_1.mkdir(parents=True, exist_ok=True) + source_file_1 = source_dir_1 / "book1.zim" + source_file_1.write_text("content 1") + + source_dir_2 = temp_warehouse_dirs["warehouse_2"] / "zims" + source_dir_2.mkdir(parents=True, exist_ok=True) + source_file_2 = source_dir_2 / "book2.zim" + source_file_2.write_text("content 2") + + source_dir_3 = temp_warehouse_dirs["warehouse_1"] / "zims" + source_file_3 = source_dir_3 / "book3.zim" + source_file_3.write_text("content 3") + + book = create_book() + # current_loc_1 + create_book_location( + book=book, + warehouse_path=warehouse_path_1, + filename="book1.zim", + status="current", + ) + # current_loc_2 + create_book_location( + book=book, + warehouse_path=warehouse_path_2, + filename="book2.zim", + status="current", + ) + # current_loc_3 + create_book_location( + book=book, + warehouse_path=warehouse_path_3, + filename="book3.zim", + status="current", + ) + # target_loc + create_book_location( + book=book, + warehouse_path=warehouse_path_4, + filename="book_final.zim", + status="target", + ) + dbsession.flush() + + (temp_warehouse_dirs["warehouse_2"] / "zims").mkdir(parents=True, exist_ok=True) + + warehouse_paths = { + warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), + warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]), + warehouse_3.id: str(temp_warehouse_dirs["warehouse_1"]), + warehouse_4.id: str(temp_warehouse_dirs["warehouse_2"]), + } + Context.local_warehouse_paths = warehouse_paths + move_book_files(dbsession, book) + dbsession.flush() + + # Verify one file moved to target + target_file = temp_warehouse_dirs["warehouse_2"] / "zims" / "book_final.zim" + assert target_file.exists() + assert target_file.read_text() == "content 1" # First current moved + + # Verify other files deleted + assert not source_file_1.exists() # Moved + assert not source_file_2.exists() # Deleted + assert not source_file_3.exists() # Deleted + + # Verify events + move_events = [e for e in book.events if "moved book from" in e] + delete_events = [e for e in book.events if "deleted old location" in e] + assert len(move_events) == 1 + assert len(delete_events) == 2 + + # Verify only one current location remains + current_locations = [loc for loc in book.locations if loc.status == "current"] + assert len(current_locations) == 1 + assert current_locations[0].warehouse_path_id == warehouse_path_4.id + + assert book.status == "published" diff --git a/dev/README.md b/dev/README.md index a626e73..f8a7cb3 100644 --- a/dev/README.md +++ b/dev/README.md @@ -50,6 +50,64 @@ This sets up the containers, runs the migrations. Note that to run tests, we use a separate DB with the backend-tests container +### Setup warehouse paths + +Before using the shuttle service for file operations, you need to initialize the warehouse paths in the database: + +```sh +docker exec cms_shuttle python /scripts/setup_warehouses.py +``` + +This script will: +- Create warehouse directories in `dev/warehouses/` +- Create corresponding database records (Warehouse and WarehousePath) +- Print the LOCAL_WAREHOUSE_PATHS configuration (already configured in docker-compose.yml) + +Current warehouse configuration: +- **hidden**: 2 paths (`jail`, `dev`) +- **prod**: 1 path (`other`) + +To modify warehouse configuration, edit the `WAREHOUSES_CONFIG` dict in [scripts/setup_warehouses.py](scripts/setup_warehouses.py) and re-run the script. + +### Setup titles + +After setting up warehouse paths, you can create sample titles with their warehouse path associations: + +```sh +docker exec cms_mill python /scripts/setup_titles.py +``` + +This script will: +- Create Title records in the database +- Associate titles with dev and prod warehouse paths via TitleWarehousePath + +To modify title configuration, edit the `TITLES_CONFIG` list in [scripts/setup_titles.py](scripts/setup_titles.py) and re-run the script. + +### Setup notifications + +After setting up titles, you can create sample zimfarm notifications for testing the mill processor: + +```sh +docker exec cms_shuttle python /scripts/setup_notifications.py +``` + +This script will: +- Create ZimfarmNotification records with status "pending" +- Create "fake" ZIMs in warehouse folders +- Each notification references a warehouse path and matches a title's producer_unique_id + +After creating notifications, the mill will process them into books. To modify notification configuration, edit the `NOTIFICATIONS_CONFIG` list in [scripts/setup_notifications.py](scripts/setup_notifications.py) and re-run the script. + +### Wipe database and files + +To delete all data from the database and all ZIM files from warehouses: + +```sh +docker exec cms_shuttle python /scripts/wipe.py +``` + +This is useful when you need to reset everything to a clean state before re-running setup scripts. + ### Restart the backend The backend might typically fail if the DB schema is not up-to-date, or if you create some nasty bug while modifying the code. diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml index 3edb36a..714e941 100644 --- a/dev/docker-compose.yml +++ b/dev/docker-compose.yml @@ -59,6 +59,7 @@ services: container_name: cms_mill volumes: - ../backend/src/cms_backend:/usr/local/lib/python3.13/site-packages/cms_backend + - ./scripts:/scripts environment: DEBUG: 1 DATABASE_URL: postgresql+psycopg://cms:cmspass@postgresdb:5432/cms @@ -72,9 +73,12 @@ services: container_name: cms_shuttle volumes: - ../backend/src/cms_backend:/usr/local/lib/python3.13/site-packages/cms_backend + - ./scripts:/scripts + - ./warehouses:/warehouses environment: DEBUG: 1 DATABASE_URL: postgresql+psycopg://cms:cmspass@postgresdb:5432/cms + LOCAL_WAREHOUSE_PATHS: "11111111-1111-1111-1111-111111111111:/warehouses/hidden,22222222-2222-2222-2222-222222222222:/warehouses/prod,33333333-3333-3333-3333-333333333333:/warehouses/client1" depends_on: postgresdb: condition: service_healthy @@ -89,6 +93,7 @@ services: - ../backend/tests:/app/tests environment: DATABASE_URL: postgresql+psycopg://cms:cmspass@postgresdb:5432/cmstest + ALEMBIC_UPGRADE_HEAD_ON_START: false depends_on: - postgresdb frontend: diff --git a/dev/scripts/setup_notifications.py b/dev/scripts/setup_notifications.py new file mode 100644 index 0000000..31271da --- /dev/null +++ b/dev/scripts/setup_notifications.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +""" +Development zimfarm notifications setup script. + +Creates ZimfarmNotification records for testing the mill processor. +""" + +from datetime import datetime +from pathlib import Path +from uuid import uuid4 + +from cms_backend.db import Session +from cms_backend.db.models import ZimfarmNotification + + +# Base directory where warehouse folders are located (inside container) +WAREHOUSE_BASE_PATH = Path("/warehouses") + +# Configuration: Define sample notifications +# Each notification should match a title's producer_unique_id and warehouse path +NOTIFICATIONS_CONFIG = [ + { + "article_count": 10000, + "media_count": 5000, + "size": 1024000000, + "metadata": { + "Name": "wikipedia_en_all", + "Title": "Wikipedia English All Maxi", + "Creator": "openZIM", + "Publisher": "Kiwix", + "Date": "2025-01-15", + "Description": "Wikipedia English offline", + "Language": "eng", + "Flavour": "maxi", + }, + "zimcheck": {"status": "pass"}, + "warehouse_name": "hidden", + "folder_name": "jail", + "filename": "wikipedia_en_all_maxi_2025-01.zim", + "producer": { + "displayName": "farm.openzim.org: wikipedia_en_all_maxi", + "displayUrl": "https://farm.openzim.org/recipes/wikipedia_en_all_maxi", + "uniqueId": "farm.openzim.org:wikipedia_en_all_maxi", + }, + }, + { + "article_count": 500, + "media_count": 200, + "size": 50000000, + "metadata": { + "Name": "wiktionary_fr_all", + "Title": "Wiktionnaire Francais", + "Creator": "openZIM", + "Publisher": "Kiwix", + "Date": "2025-01-10", + "Description": "Wiktionnaire hors-ligne", + "Language": "fra", + "Flavour": "maxi", + }, + "zimcheck": {"status": "pass"}, + "warehouse_name": "hidden", + "folder_name": "jail", + "filename": "wiktionary_fr_all_maxi_2025-01.zim", + "producer": { + "displayName": "wiktionary_fr", + "displayUrl": "https://farm.openzim.org/recipes/wiktionary_fr", + "uniqueId": "farm.openzim.org:wiktionary_fr", + }, + }, + { + "article_count": 1500, + "media_count": 2020, + "size": 40000, + "metadata": { + "Name": "wiktionary_en_all", + "Title": "English Wiktionary", + "Creator": "openZIM", + "Publisher": "Kiwix", + "Date": "2025-01-10", + "Description": "Offline wiktionary", + "Language": "eng", + "Flavour": "maxi", + }, + "zimcheck": {"status": "pass"}, + "warehouse_name": "hidden", + "folder_name": "jail", + "filename": "wiktionary_en_all_maxi_2025-01.zim", + "producer": { + "displayName": "wiktionary_en", + "displayUrl": "https://farm.openzim.org/recipes/wiktionary_en", + "uniqueId": "farm.openzim.org:wiktionary_en", + }, + }, +] + + +def create_notifications(): + """Create zimfarm notification records and placeholder files.""" + session = Session() + + created_notifications = [] + + try: + for content in NOTIFICATIONS_CONFIG: + filename = content.get("filename", "unknown") + warehouse_name = content["warehouse_name"] + folder_name = content["folder_name"] + + print(f"\nProcessing notification: {filename}") + + # Check if file already exists in warehouse + file_path = WAREHOUSE_BASE_PATH / warehouse_name / folder_name / filename + if file_path.exists(): + print(f" - File already exists at {file_path} (skipping)") + continue + + # Generate random notification ID + notification_id = uuid4() + + # Create notification record + notification = ZimfarmNotification( + id=notification_id, + received_at=datetime.now(), + content=content, + ) + session.add(notification) + session.flush() + print(f" + Created notification '{filename}' with ID {notification.id}") + print(f" Status: {notification.status}") + + # Create placeholder file with notification ID + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(str(notification_id)) + print(f" + Created file: {file_path}") + + created_notifications.append( + (filename, warehouse_name, folder_name, notification_id) + ) + + # Commit all changes + session.commit() + print("\n+ All database changes committed") + + # Print summary + if created_notifications: + print("\n" + "=" * 70) + print("Notifications created (status: pending):") + print("=" * 70) + for ( + filename, + warehouse_name, + folder_name, + notification_id, + ) in created_notifications: + print(f" {filename}") + print(f" warehouse: {warehouse_name}/{folder_name}") + print(f" id: {notification_id}") + print("=" * 70) + print("\nThe mill will now process these notifications into books.") + else: + print("\nNo new notifications created (all files already exist).") + + except Exception as e: + session.rollback() + print(f"\n- Error: {e}") + raise + finally: + session.close() + + +if __name__ == "__main__": + create_notifications() diff --git a/dev/scripts/setup_titles.py b/dev/scripts/setup_titles.py new file mode 100644 index 0000000..4b61619 --- /dev/null +++ b/dev/scripts/setup_titles.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Development titles setup script. + +Creates Title records and associates them with warehouse paths. +""" + +from cms_backend.db import Session +from cms_backend.db.models import Title, TitleWarehousePath, Warehouse, WarehousePath + +# Configuration: Define titles and their warehouse path associations +# Format for paths: (warehouse_name, folder_name) +TITLES_CONFIG = [ + { + "name": "wikipedia_en_all", + "producer_unique_id": "farm.openzim.org:wikipedia_en_all_maxi", + "dev_paths": [("hidden", "dev")], + "prod_paths": [("prod", "other")], + }, + { + "name": "wiktionary_fr_all", + "producer_unique_id": "farm.openzim.org:wiktionary_fr", + "producer_display_name": "wiktionary_fr", + "producer_display_url": "https://farm.openzim.org/recipes/wiktionary_fr", + "in_prod": True, + "dev_paths": [("hidden", "dev")], + "prod_paths": [("prod", "other"), ("client1", "all")], + }, +] + + +def get_warehouse_path(session, warehouse_name: str, folder_name: str) -> WarehousePath: + """Look up a WarehousePath by warehouse name and folder name.""" + result = ( + session.query(WarehousePath) + .join(Warehouse) + .filter( + Warehouse.name == warehouse_name, + WarehousePath.folder_name == folder_name, + ) + .first() + ) + if not result: + raise ValueError( + f"WarehousePath not found: {warehouse_name}/{folder_name}. " + "Run setup_warehouses.py first." + ) + return result + + +def create_titles(): + """Create title records and associate them with warehouse paths.""" + session = Session() + + try: + for title_config in TITLES_CONFIG: + title_name = title_config["name"] + print(f"\nProcessing title: {title_name}") + + # Check if title already exists + existing = session.query(Title).filter(Title.name == title_name).first() + + if existing: + print(f" - Title '{title_name}' already exists (skipping)") + continue + + # Create title record + title = Title( + name=title_name, + producer_unique_id=title_config["producer_unique_id"], + ) + # Set optional fields + if "in_prod" in title_config: + title.in_prod = title_config["in_prod"] + if "producer_display_name" in title_config: + title.producer_display_name = title_config["producer_display_name"] + if "producer_display_url" in title_config: + title.producer_display_url = title_config["producer_display_url"] + + session.add(title) + session.flush() # Get the generated UUID + print(f" + Created title '{title_name}' with ID {title.id}") + + # Associate dev warehouse paths + for warehouse_name, folder_name in title_config.get("dev_paths", []): + warehouse_path = get_warehouse_path( + session, warehouse_name, folder_name + ) + twp = TitleWarehousePath(path_type="dev") + twp.title = title + twp.warehouse_path = warehouse_path + session.add(twp) + print(f" + Added dev path: {warehouse_name}/{folder_name}") + + # Associate prod warehouse paths + for warehouse_name, folder_name in title_config.get("prod_paths", []): + warehouse_path = get_warehouse_path( + session, warehouse_name, folder_name + ) + twp = TitleWarehousePath(path_type="prod") + twp.title = title + twp.warehouse_path = warehouse_path + session.add(twp) + print(f" + Added prod path: {warehouse_name}/{folder_name}") + + # Commit all changes + session.commit() + print("\n+ All database changes committed") + + # Print summary + print("\n" + "=" * 70) + print("Titles configured:") + print("=" * 70) + for title_config in TITLES_CONFIG: + dev_paths = ", ".join( + f"{w}/{f}" for w, f in title_config.get("dev_paths", []) + ) + prod_paths = ", ".join( + f"{w}/{f}" for w, f in title_config.get("prod_paths", []) + ) + print(f" {title_config['name']}") + if (in_prod := title_config.get("in_prod")) is not None: + print(f" in_prod: {in_prod}") + print(f" dev: {dev_paths or '(none)'}") + print(f" prod: {prod_paths or '(none)'}") + print("=" * 70) + + except Exception as e: + session.rollback() + print(f"\n- Error: {e}") + raise + finally: + session.close() + + +if __name__ == "__main__": + create_titles() diff --git a/dev/scripts/setup_warehouses.py b/dev/scripts/setup_warehouses.py new file mode 100644 index 0000000..6a47c1a --- /dev/null +++ b/dev/scripts/setup_warehouses.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +""" +Development warehouse setup script. + +Creates warehouse directories and database records. +""" + +import sys +from pathlib import Path +from uuid import UUID + +# Add backend source to path for imports +sys.path.insert(0, "/usr/local/lib/python3.13/site-packages") + +from cms_backend.db import Session +from cms_backend.db.models import Warehouse, WarehousePath + + +# Configuration: Define warehouses and their paths +# UUIDs must match those in docker-compose.yml LOCAL_WAREHOUSE_PATHS +WAREHOUSES_CONFIG = { + "hidden": { + "id": UUID("11111111-1111-1111-1111-111111111111"), + "paths": ["jail", "dev"], + "configuration": {}, + }, + "prod": { + "id": UUID("22222222-2222-2222-2222-222222222222"), + "paths": ["other"], + "configuration": {}, + }, + "client1": { + "id": UUID("33333333-3333-3333-3333-333333333333"), + "paths": ["all"], + "configuration": {}, + }, +} + +# Base directory where warehouse folders will be created (inside container) +WAREHOUSE_BASE_PATH = Path("/warehouses") + + +def create_warehouse_structure(): + """Create warehouse directories and database records.""" + session = Session() + + try: + for warehouse_name, config in WAREHOUSES_CONFIG.items(): + print(f"\nProcessing warehouse: {warehouse_name}") + warehouse_id = config["id"] + + # Check if warehouse already exists + existing = ( + session.query(Warehouse).filter(Warehouse.id == warehouse_id).first() + ) + + if existing: + print(f" ⊘ Warehouse '{warehouse_name}' already exists (skipping)") + warehouse = existing + else: + # Create warehouse DB record with predefined ID + warehouse = Warehouse( + name=warehouse_name, + configuration=config.get("configuration", {}), + ) + warehouse.id = warehouse_id + session.add(warehouse) + session.flush() + print( + f" ✓ Created warehouse '{warehouse_name}' with ID {warehouse.id}" + ) + + # Create paths for this warehouse + for path_name in config["paths"]: + # Check if path already exists + existing_path = ( + session.query(WarehousePath) + .filter( + WarehousePath.warehouse_id == warehouse.id, + WarehousePath.folder_name == path_name, + ) + .first() + ) + + if existing_path: + print(f" ⊘ Path '{path_name}' already exists (skipping)") + warehouse_path = existing_path + else: + # Create warehouse path DB record + warehouse_path = WarehousePath( + folder_name=path_name, + ) + warehouse_path.warehouse = warehouse + session.add(warehouse_path) + session.flush() + print( + f" ✓ Created path '{path_name}' with ID {warehouse_path.id}" + ) + + # Create physical directory + physical_path = WAREHOUSE_BASE_PATH / warehouse_name / path_name + if physical_path.exists(): + print(f" ⊘ Directory already exists: {physical_path}") + else: + physical_path.mkdir(parents=True, exist_ok=True) + print(f" ✓ Created directory: {physical_path}") + + # (no action needed - IDs are predefined in config) + + # Commit all changes + session.commit() + print("\n✓ All database changes committed") + + # Print configuration summary + print("\n" + "=" * 70) + print("LOCAL_WAREHOUSE_PATHS configuration (already in docker-compose.yml):") + print("=" * 70) + env_parts = [] + for warehouse_name, config in WAREHOUSES_CONFIG.items(): + warehouse_base = WAREHOUSE_BASE_PATH / warehouse_name + env_parts.append(f"{config['id']}:{warehouse_base}") + env_value = ",".join(env_parts) + print(f" {env_value}") + print("\n" + "=" * 70) + + except Exception as e: + session.rollback() + print(f"\n✗ Error: {e}", file=sys.stderr) + raise + finally: + session.close() + + +if __name__ == "__main__": + create_warehouse_structure() diff --git a/dev/scripts/wipe.py b/dev/scripts/wipe.py new file mode 100644 index 0000000..e84917e --- /dev/null +++ b/dev/scripts/wipe.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +Development wipe script. + +Deletes all data from the database and all ZIM files from warehouses. +Run inside the shuttle container: + docker exec cms_shuttle python /scripts/wipe.py +""" + +from pathlib import Path + +from cms_backend.db import Session +from cms_backend.db.models import ( + Book, + BookLocation, + Title, + TitleWarehousePath, + Warehouse, + WarehousePath, + ZimfarmNotification, +) + + +# Base directory where warehouse folders are located (inside container) +WAREHOUSE_BASE_PATH = Path("/warehouses") + + +def wipe_database(session): + """Delete all data from the database in the correct order.""" + print("Wiping database...") + + # Delete in order respecting foreign key constraints + # (children before parents) + + # 1. BookLocation (depends on Book and WarehousePath) + count = session.query(BookLocation).delete() + print(f" - Deleted {count} BookLocation records") + + # 2. ZimfarmNotification (depends on Book) + count = session.query(ZimfarmNotification).delete() + print(f" - Deleted {count} ZimfarmNotification records") + + # 3. Book (depends on Title) + count = session.query(Book).delete() + print(f" - Deleted {count} Book records") + + # 4. TitleWarehousePath (depends on Title and WarehousePath) + count = session.query(TitleWarehousePath).delete() + print(f" - Deleted {count} TitleWarehousePath records") + + # 5. Title + count = session.query(Title).delete() + print(f" - Deleted {count} Title records") + + # 6. WarehousePath (depends on Warehouse) + count = session.query(WarehousePath).delete() + print(f" - Deleted {count} WarehousePath records") + + # 7. Warehouse + count = session.query(Warehouse).delete() + print(f" - Deleted {count} Warehouse records") + + +def wipe_warehouse_files(): + """Delete all ZIM files in warehouse directories.""" + print("\nWiping warehouse files...") + + if not WAREHOUSE_BASE_PATH.exists(): + print(f" - Warehouse path {WAREHOUSE_BASE_PATH} does not exist") + return + + zim_files = list(WAREHOUSE_BASE_PATH.rglob("*.zim")) + + if not zim_files: + print(" - No ZIM files to delete") + return + + for file_path in zim_files: + file_path.unlink() + print(f" - Deleted {file_path}") + + print(f" - Total files deleted: {len(zim_files)}") + + +def wipe(): + """Wipe database and warehouse files.""" + session = Session() + + try: + wipe_database(session) + session.commit() + print("\n+ Database wiped successfully") + + wipe_warehouse_files() + print("\n+ Warehouse files wiped successfully") + + except Exception as e: + session.rollback() + print(f"\n- Error: {e}") + raise + finally: + session.close() + + +if __name__ == "__main__": + wipe() diff --git a/dev/warehouses/.gitkeep b/dev/warehouses/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/frontend/src/components/BookStatus.vue b/frontend/src/components/BookStatus.vue index 7aa3678..f0d038c 100644 --- a/frontend/src/components/BookStatus.vue +++ b/frontend/src/components/BookStatus.vue @@ -11,9 +11,13 @@ Pending Title - - - Processed + + + Pending Move + + + + Published