diff --git a/.gitignore b/.gitignore
index 5e30185..38088a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,3 +135,4 @@ test.db
# dump of scripts
scripts/*csv
+*.zim
diff --git a/backend/src/cms_backend/db/book.py b/backend/src/cms_backend/db/book.py
index a1ab21f..8fceaca 100644
--- a/backend/src/cms_backend/db/book.py
+++ b/backend/src/cms_backend/db/book.py
@@ -1,6 +1,7 @@
from typing import Any
from uuid import UUID
+from sqlalchemy import select
from sqlalchemy.orm import Session as OrmSession
from cms_backend.db.models import Book, BookLocation, WarehousePath, ZimfarmNotification
@@ -97,3 +98,14 @@ def create_book_location(
)
return location
+
+
+def get_next_book_to_move_files_or_none(
+ session: OrmSession,
+) -> Book | None:
+ return session.scalars(
+ select(Book)
+ .where(Book.status == "pending_move")
+ .order_by(Book.created_at)
+ .limit(1)
+ ).one_or_none()
diff --git a/backend/src/cms_backend/db/models.py b/backend/src/cms_backend/db/models.py
index 2566db9..d6396d0 100644
--- a/backend/src/cms_backend/db/models.py
+++ b/backend/src/cms_backend/db/models.py
@@ -1,5 +1,6 @@
from datetime import datetime
from ipaddress import IPv4Address
+from pathlib import Path
from typing import Any, Optional
from uuid import UUID
@@ -143,6 +144,12 @@ class Book(Base):
postgresql_where=text("status = 'errored'"),
)
+Index(
+ "idx_book_status_pending_move",
+ Book.status,
+ postgresql_where=text("status = 'pending_move'"),
+)
+
class Title(Base):
__tablename__ = "title"
@@ -225,3 +232,17 @@ class BookLocation(Base):
book: Mapped["Book"] = relationship(back_populates="locations", init=False)
warehouse_path: Mapped["WarehousePath"] = relationship(init=False)
+
+ def full_local_path(self, warehouse_local_folders_map: dict[UUID, str]) -> Path:
+ folder_in_warehouse = Path(self.warehouse_path.folder_name) / self.filename
+ warehouse_folder = Path(
+ warehouse_local_folders_map[self.warehouse_path.warehouse.id]
+ )
+ return warehouse_folder / folder_in_warehouse
+
+ @property
+ def full_str(self) -> str:
+ return (
+ f"{self.warehouse_path.warehouse.name}:"
+ f"{self.warehouse_path.folder_name}/{self.filename}"
+ )
diff --git a/backend/src/cms_backend/migrations/versions/add_pending_move_index.py b/backend/src/cms_backend/migrations/versions/add_pending_move_index.py
new file mode 100644
index 0000000..e9c2907
--- /dev/null
+++ b/backend/src/cms_backend/migrations/versions/add_pending_move_index.py
@@ -0,0 +1,30 @@
+"""Add partial index for pending_move book status
+
+Revision ID: add_pending_move_index
+Revises: add_book_location_table
+Create Date: 2025-11-13 00:00:00.000000
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "add_pending_move_index"
+down_revision = "add_book_location_table"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # Create partial index for pending_move status
+ op.create_index(
+ "idx_book_status_pending_move",
+ "book",
+ ["status"],
+ postgresql_where="status = 'pending_move'",
+ )
+
+
+def downgrade():
+ # Drop the partial index
+ op.drop_index("idx_book_status_pending_move", table_name="book")
diff --git a/backend/src/cms_backend/mill/process_zimfarm_notifications.py b/backend/src/cms_backend/mill/process_zimfarm_notifications.py
index 552b1ce..5c16404 100644
--- a/backend/src/cms_backend/mill/process_zimfarm_notifications.py
+++ b/backend/src/cms_backend/mill/process_zimfarm_notifications.py
@@ -8,7 +8,6 @@
def process_zimfarm_notifications(session: OrmSession):
logger.info("Processing Zimfarm notifications")
nb_notifications_processed = 0
- raise Exception("foo")
while True:
with session.begin_nested():
notification = get_next_notification_to_process_or_none(session)
diff --git a/backend/src/cms_backend/processors/book.py b/backend/src/cms_backend/processors/book.py
index a509474..baf93d6 100644
--- a/backend/src/cms_backend/processors/book.py
+++ b/backend/src/cms_backend/processors/book.py
@@ -157,6 +157,7 @@ def create_book_target_locations(
f"{getnow()}: book already at all target locations, skipping target "
"creation"
)
+ book.status = "published"
return
# Create target locations for each applicable warehouse path
@@ -168,3 +169,5 @@ def create_book_target_locations(
filename=target_filename,
status="target",
)
+
+ book.status = "pending_move"
diff --git a/backend/src/cms_backend/processors/title.py b/backend/src/cms_backend/processors/title.py
index 66c1728..87536d1 100644
--- a/backend/src/cms_backend/processors/title.py
+++ b/backend/src/cms_backend/processors/title.py
@@ -20,7 +20,6 @@ def add_book_to_title(session: OrmSession, book: Book, title: Title):
title.books.append(book)
book.events.append(f"{getnow()}: book added to title {title.id}")
title.events.append(f"{getnow()}: book {book.id} added to title")
- book.status = "processed"
if title.name != book.name:
title.events.append(f"{getnow()}: updating title name to {book.name}")
diff --git a/backend/src/cms_backend/shuttle/context.py b/backend/src/cms_backend/shuttle/context.py
index 683b4ca..40d42a4 100644
--- a/backend/src/cms_backend/shuttle/context.py
+++ b/backend/src/cms_backend/shuttle/context.py
@@ -1,14 +1,28 @@
-import dataclasses
import os
+from dataclasses import dataclass
from datetime import timedelta
-from typing import TypeVar
+from typing import ClassVar
+from uuid import UUID
from humanfriendly import parse_timespan
-T = TypeVar("T")
+WarehouseId = str
+LocalWarehousePath = str
-@dataclasses.dataclass(kw_only=True)
+def _parse_local_warehouse_paths() -> dict[UUID, str]:
+ env_value = os.getenv("LOCAL_WAREHOUSE_PATHS", default="")
+ if not env_value:
+ return {}
+ return {
+ UUID(warehouse_id): local_path
+ for item in env_value.split(",")
+ if item
+ for (warehouse_id, local_path) in [item.split(":", 1)]
+ }
+
+
+@dataclass(kw_only=True)
class Context:
"""Class holding every contextual / configuration bits which can be moved
@@ -20,8 +34,8 @@ class Context:
os.getenv("PAUSE_IN_THE_LOOP", default="10s")
)
- process_zimfarm_notifications_interval: timedelta = timedelta(
- seconds=parse_timespan(
- os.getenv("PROCESS_ZIMFARM_NOTIFICATIONS_INTERVAL", default="1m")
- )
+ move_files_interval: timedelta = timedelta(
+ seconds=parse_timespan(os.getenv("MOVE_FILES_INTERVAL", default="1m"))
)
+
+ local_warehouse_paths: ClassVar[dict[UUID, str]] = _parse_local_warehouse_paths()
diff --git a/backend/src/cms_backend/shuttle/main.py b/backend/src/cms_backend/shuttle/main.py
index 5ef4791..b890114 100644
--- a/backend/src/cms_backend/shuttle/main.py
+++ b/backend/src/cms_backend/shuttle/main.py
@@ -9,12 +9,18 @@
from cms_backend.context import Context
from cms_backend.db import Session
from cms_backend.shuttle.context import Context as ShuttleContext
+from cms_backend.shuttle.move_files import move_files
from cms_backend.utils.database import upgrade_db_schema
from cms_backend.utils.datetime import getnow
from cms_backend.utils.task_config import TaskConfig
# Configure background tasks with their execution intervals
-tasks: list[TaskConfig] = []
+tasks: list[TaskConfig] = [
+ TaskConfig(
+ func=move_files,
+ interval=ShuttleContext.move_files_interval,
+ ),
+]
def main():
diff --git a/backend/src/cms_backend/shuttle/move_files.py b/backend/src/cms_backend/shuttle/move_files.py
new file mode 100644
index 0000000..7a28d5a
--- /dev/null
+++ b/backend/src/cms_backend/shuttle/move_files.py
@@ -0,0 +1,132 @@
+import shutil
+
+from sqlalchemy.orm import Session as OrmSession
+
+from cms_backend import logger
+from cms_backend.db.book import get_next_book_to_move_files_or_none
+from cms_backend.db.models import Book, BookLocation
+from cms_backend.shuttle.context import Context as ShuttleContext
+from cms_backend.utils.datetime import getnow
+
+
+def move_files(session: OrmSession):
+ logger.info("Moving ZIM files")
+ nb_zim_files_moved = 0
+ while True:
+ with session.begin_nested():
+ book = get_next_book_to_move_files_or_none(session)
+ if not book:
+ break
+ logger.debug(f"Processing ZIM file of book {book.id}")
+ move_book_files(session, book)
+ nb_zim_files_moved += 1
+
+ logger.info(f"Done moving {nb_zim_files_moved} ZIM files")
+
+
+def move_book_files(session: OrmSession, book: Book):
+ inaccessible_warehouse_names = {
+ loc.warehouse_path.warehouse.name
+ for loc in book.locations
+ if loc.warehouse_path.warehouse_id
+ not in ShuttleContext.local_warehouse_paths.keys()
+ }
+
+ # if any warehouse is not accessible, we do not proceed (complex scenarii not yet
+ # implemented)
+ if len(inaccessible_warehouse_names) > 0:
+ logger.debug(
+ f"Ignoring book {book.id}, no access to "
+ f"{','.join(inaccessible_warehouse_names)} warehouses"
+ )
+ return
+
+ current_locations: list[BookLocation] = [
+ loc for loc in book.locations if loc.status == "current"
+ ]
+
+ target_locations: list[BookLocation] = [
+ loc for loc in book.locations if loc.status == "target"
+ ]
+
+ if len(current_locations) == 0:
+ book.events.append(
+ f"{getnow()}: error encountered while moving files, no current location"
+ )
+ book.status = "errored"
+ return
+
+ if len(target_locations) == 0:
+ book.events.append(
+ f"{getnow()}: ignoring move files operation, no target location set"
+ )
+ book.status = "published"
+ return
+
+ # start with copies
+ while len(target_locations) > len(current_locations):
+ current_location = current_locations[0]
+ target_location = target_locations[0]
+
+ current_path = current_location.full_local_path(
+ ShuttleContext.local_warehouse_paths
+ )
+ target_path = target_location.full_local_path(
+ ShuttleContext.local_warehouse_paths
+ )
+
+ target_path.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy(current_path, target_path)
+ logger.debug(f"Copied book {book.id} from {current_path} to {target_path}")
+ book.events.append(
+ f"{getnow()}: copied book from {current_location.full_str} to "
+ f"{target_location.full_str}"
+ )
+ target_locations.remove(target_location)
+ target_location.status = "current"
+
+ # continue with moves
+ while len(current_locations) > 0 and len(target_locations) > 0:
+ current_location = current_locations[0]
+ target_location = target_locations[0]
+
+ current_path = current_location.full_local_path(
+ ShuttleContext.local_warehouse_paths
+ )
+ target_path = target_location.full_local_path(
+ ShuttleContext.local_warehouse_paths
+ )
+
+ target_path.parent.mkdir(parents=True, exist_ok=True)
+ shutil.move(current_path, target_path)
+ logger.debug(f"Moved book {book.id} from {current_path} to {target_path}")
+ book.events.append(
+ f"{getnow()}: moved book from {current_location.full_str} to "
+ f"{target_location.full_str}"
+ )
+ current_locations.remove(current_location)
+ target_locations.remove(target_location)
+ book.locations.remove(current_location)
+ session.delete(current_location)
+ session.flush()
+ target_location.status = "current"
+
+ # cleanup phase: delete extra current locations
+ while len(current_locations) > 0:
+ current_location = current_locations[0]
+ current_path = current_location.full_local_path(
+ ShuttleContext.local_warehouse_paths
+ )
+
+ current_path.unlink(missing_ok=True)
+ logger.debug(
+ f"Deleted extra current location for book {book.id} at {current_path}"
+ )
+ book.events.append(
+ f"{getnow()}: deleted old location {current_location.full_str}"
+ )
+ current_locations.remove(current_location)
+ book.locations.remove(current_location)
+ session.delete(current_location)
+
+ book.status = "published"
diff --git a/backend/tests/processors/test_book_location_integration.py b/backend/tests/processors/test_book_location_integration.py
index ca694df..9f3bf92 100644
--- a/backend/tests/processors/test_book_location_integration.py
+++ b/backend/tests/processors/test_book_location_integration.py
@@ -452,6 +452,9 @@ def test_no_target_when_current_matches_single_path(
"book already at all target locations" in event for event in book.events
)
+ # Book should be marked as published (no move needed)
+ assert book.status == "published"
+
def test_no_target_when_current_matches_multiple_paths(
self,
dbsession: OrmSession,
@@ -522,6 +525,9 @@ def test_no_target_when_current_matches_multiple_paths(
"book already at all target locations" in event for event in book.events
)
+ # Book should be marked as published (no move needed)
+ assert book.status == "published"
+
def test_target_created_when_partial_match(
self,
dbsession: OrmSession,
@@ -574,6 +580,9 @@ def test_target_created_when_partial_match(
"book already at all target locations" in event for event in book.events
)
+ # Book should be marked as pending_move (needs file movement)
+ assert book.status == "pending_move"
+
def test_target_created_when_filename_differs(
self,
dbsession: OrmSession,
@@ -619,3 +628,6 @@ def test_target_created_when_filename_differs(
assert not any(
"book already at all target locations" in event for event in book.events
)
+
+ # Book should be marked as pending_move (needs file movement)
+ assert book.status == "pending_move"
diff --git a/backend/tests/processors/test_zimfarm_notification.py b/backend/tests/processors/test_zimfarm_notification.py
index 95bc919..fb67460 100644
--- a/backend/tests/processors/test_zimfarm_notification.py
+++ b/backend/tests/processors/test_zimfarm_notification.py
@@ -58,7 +58,7 @@ def test_process_notification_success(
assert notification.book is not None
assert notification.book.title == title
assert notification.book.title_id == title.id
- assert notification.book.status == "processed"
+ assert notification.book.status == "pending_move"
assert any(
event
for event in notification.events
@@ -421,7 +421,7 @@ def test_process_notification_with_existing_books(
assert notification.book is not None
assert notification.book.title == title
- assert notification.book.status == "processed"
+ assert notification.book.status == "pending_move"
assert len(title.books) == 2
assert existing_book in title.books
assert notification.book in title.books
diff --git a/backend/tests/shuttle/__init__.py b/backend/tests/shuttle/__init__.py
new file mode 100644
index 0000000..12c62ce
--- /dev/null
+++ b/backend/tests/shuttle/__init__.py
@@ -0,0 +1 @@
+"""Tests for the shuttle module."""
diff --git a/backend/tests/shuttle/conftest.py b/backend/tests/shuttle/conftest.py
new file mode 100644
index 0000000..2fefc7f
--- /dev/null
+++ b/backend/tests/shuttle/conftest.py
@@ -0,0 +1,48 @@
+"""Fixtures for shuttle tests."""
+
+from collections.abc import Callable
+from pathlib import Path
+
+import pytest
+from sqlalchemy.orm import Session as OrmSession
+
+from cms_backend.db.models import Warehouse, WarehousePath
+
+
+@pytest.fixture
+def temp_warehouse_dirs(tmp_path: Path) -> dict[str, Path]:
+ """Create temporary warehouse directories for testing."""
+ warehouse_1 = tmp_path / "warehouse_1"
+ warehouse_2 = tmp_path / "warehouse_2"
+
+ warehouse_1.mkdir()
+ warehouse_2.mkdir()
+
+ return {
+ "warehouse_1": warehouse_1,
+ "warehouse_2": warehouse_2,
+ }
+
+
+@pytest.fixture
+def create_warehouse_with_path(
+ dbsession: OrmSession,
+) -> Callable[..., tuple[Warehouse, WarehousePath]]:
+ """Factory to create a warehouse and its path for testing."""
+
+ def _create(
+ name: str,
+ folder_name: str = "zims",
+ ) -> tuple[Warehouse, WarehousePath]:
+ warehouse = Warehouse(name=name, configuration={})
+ dbsession.add(warehouse)
+ dbsession.flush()
+
+ warehouse_path = WarehousePath(folder_name=folder_name)
+ warehouse_path.warehouse = warehouse
+ dbsession.add(warehouse_path)
+ dbsession.flush()
+
+ return warehouse, warehouse_path
+
+ return _create
diff --git a/backend/tests/shuttle/test_move_files.py b/backend/tests/shuttle/test_move_files.py
new file mode 100644
index 0000000..ecba1f2
--- /dev/null
+++ b/backend/tests/shuttle/test_move_files.py
@@ -0,0 +1,717 @@
+"""Tests for the move_files module."""
+
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy.orm import Session as OrmSession
+
+from cms_backend.shuttle.context import Context
+from cms_backend.shuttle.move_files import move_book_files, move_files
+
+
+class TestMoveFilesEndToEnd:
+ """End-to-end tests for the move_files function."""
+
+ def test_move_files_processes_multiple_books_in_order(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test that move_files processes multiple books in created_at order."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1",
+ folder_name="zims",
+ )
+ warehouse_2, warehouse_path_2 = create_warehouse_with_path(
+ name="warehouse_2",
+ folder_name="zims",
+ )
+ dbsession.flush()
+
+ # Create test files
+ source_file_1 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book1.zim"
+ source_file_2 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book2.zim"
+ source_file_3 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book3.zim"
+ for src_file in [source_file_1, source_file_2, source_file_3]:
+ src_file.parent.mkdir(parents=True, exist_ok=True)
+ source_file_1.write_text("book1 content")
+ source_file_2.write_text("book2 content")
+ source_file_3.write_text("book3 content")
+
+ # Create books with locations
+ book_1 = create_book()
+ create_book_location(
+ book=book_1,
+ warehouse_path=warehouse_path_1,
+ filename="book1.zim",
+ status="current",
+ )
+ create_book_location(
+ book=book_1,
+ warehouse_path=warehouse_path_2,
+ filename="book1.zim",
+ status="target",
+ )
+ book_1.status = "pending_move"
+
+ book_2 = create_book()
+ create_book_location(
+ book=book_2,
+ warehouse_path=warehouse_path_1,
+ filename="book2.zim",
+ status="current",
+ )
+ create_book_location(
+ book=book_2,
+ warehouse_path=warehouse_path_2,
+ filename="book2.zim",
+ status="target",
+ )
+ book_2.status = "pending_move"
+
+ book_3 = create_book()
+ create_book_location(
+ book=book_3,
+ warehouse_path=warehouse_path_1,
+ filename="book3.zim",
+ status="current",
+ )
+ create_book_location(
+ book=book_3,
+ warehouse_path=warehouse_path_2,
+ filename="book3.zim",
+ status="target",
+ )
+ book_3.status = "pending_move"
+
+ dbsession.flush()
+
+ warehouse_paths = {
+ warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]),
+ warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]),
+ }
+
+ Context.local_warehouse_paths = warehouse_paths
+ move_files(dbsession)
+ dbsession.flush()
+
+ # Verify all books processed
+ assert book_1.status == "published"
+ assert book_2.status == "published"
+ assert book_3.status == "published"
+
+ # Verify files moved
+ target_file_1 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book1.zim"
+ target_file_2 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book2.zim"
+ target_file_3 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book3.zim"
+
+ assert target_file_1.read_text() == "book1 content"
+ assert target_file_2.read_text() == "book2 content"
+ assert target_file_3.read_text() == "book3 content"
+
+ assert not source_file_1.exists()
+ assert not source_file_2.exists()
+ assert not source_file_3.exists()
+
+
+class TestMoveBookFilesSuccess:
+ """Tests for successful move_book_files scenarios."""
+
+ def test_move_book_files_simple_move(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test simple move with 1 current and 1 target location."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1",
+ folder_name="zims",
+ )
+ dbsession.flush()
+
+ source_dir = temp_warehouse_dirs["warehouse_1"] / "zims"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ source_file = source_dir / "old_book.zim"
+ source_file.write_text("test content")
+
+ book = create_book()
+ current_loc = create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="old_book.zim",
+ status="current",
+ )
+ target_loc = create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="new_book.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])}
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ target_file = source_dir / "new_book.zim"
+ assert target_file.exists()
+ assert target_file.read_text() == "test content"
+ assert not source_file.exists()
+ assert book.status == "published"
+ assert target_loc.status == "current"
+ assert current_loc not in book.locations
+
+ def test_move_book_files_copy_then_move(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test copying to first target, then moving to second (1→2)."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="source"
+ )
+ warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path(
+ name="warehouse_2", folder_name="target_a"
+ )
+ warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path(
+ name="warehouse_2", folder_name="target_b"
+ )
+ dbsession.flush()
+
+ source_dir = temp_warehouse_dirs["warehouse_1"] / "source"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ source_file = source_dir / "source.zim"
+ source_file.write_text("book content")
+
+ book = create_book()
+ # current_loc
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="source.zim",
+ status="current",
+ )
+ # target_loc_1
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2_a,
+ filename="target1.zim",
+ status="target",
+ )
+ # target_loc_2
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2_b,
+ filename="target2.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ for subfolder in ["target_a", "target_b"]:
+ (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir(
+ parents=True, exist_ok=True
+ )
+
+ warehouse_paths = {
+ warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]),
+ warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]),
+ warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]),
+ }
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ target_file_1 = temp_warehouse_dirs["warehouse_2"] / "target_a" / "target1.zim"
+ target_file_2 = temp_warehouse_dirs["warehouse_2"] / "target_b" / "target2.zim"
+ assert target_file_1.exists()
+ assert target_file_1.read_text() == "book content"
+ assert target_file_2.exists()
+ assert target_file_2.read_text() == "book content"
+ assert not source_file.exists()
+ assert book.status == "published"
+
+ def test_move_book_files_multiple_copies_one_move(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test 1→3: 2 copies then 1 move."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="source"
+ )
+ warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path(
+ name="warehouse_2", folder_name="target_a"
+ )
+ warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path(
+ name="warehouse_2", folder_name="target_b"
+ )
+ warehouse_2_c, warehouse_path_2_c = create_warehouse_with_path(
+ name="warehouse_2", folder_name="target_c"
+ )
+ dbsession.flush()
+
+ source_dir = temp_warehouse_dirs["warehouse_1"] / "source"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ source_file = source_dir / "source.zim"
+ source_file.write_text("book content")
+
+ book = create_book()
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="source.zim",
+ status="current",
+ )
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2_a,
+ filename="target1.zim",
+ status="target",
+ )
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2_b,
+ filename="target2.zim",
+ status="target",
+ )
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2_c,
+ filename="target3.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ for subfolder in ["target_a", "target_b", "target_c"]:
+ (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir(
+ parents=True, exist_ok=True
+ )
+
+ warehouse_paths = {
+ warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]),
+ warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]),
+ warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]),
+ warehouse_2_c.id: str(temp_warehouse_dirs["warehouse_2"]),
+ }
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ for i, subfolder in enumerate(["target_a", "target_b", "target_c"], 1):
+ target_file = (
+ temp_warehouse_dirs["warehouse_2"] / subfolder / f"target{i}.zim"
+ )
+ assert target_file.exists()
+ assert target_file.read_text() == "book content"
+
+ assert not source_file.exists()
+ current_locs = [loc for loc in book.locations if loc.status == "current"]
+ assert len(current_locs) == 3
+
+
+class TestMoveBookFilesErrors:
+ """Tests for error handling in move_book_files."""
+
+ def test_move_book_files_no_current_locations(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test that book with no current locations is marked as errored."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="zims"
+ )
+ dbsession.flush()
+
+ book = create_book()
+ target_loc = create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="target.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])}
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ assert book.status == "errored"
+ assert any("no current location" in event for event in book.events)
+ assert target_loc.status == "target"
+
+ def test_move_book_files_no_target_locations(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test that book with no target locations is marked as published."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="zims"
+ )
+ dbsession.flush()
+
+ source_dir = temp_warehouse_dirs["warehouse_1"] / "zims"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ source_file = source_dir / "book.zim"
+ source_file.write_text("content")
+
+ book = create_book()
+ # current_loc
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="book.zim",
+ status="current",
+ )
+ dbsession.flush()
+
+ warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])}
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ assert book.status == "published"
+ assert any("no target location" in event for event in book.events)
+ assert source_file.exists()
+
+ def test_move_book_files_inaccessible_warehouse(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test that book with inaccessible warehouse is skipped."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="zims"
+ )
+ _, warehouse_path_2 = create_warehouse_with_path(
+ name="warehouse_2", folder_name="zims"
+ )
+ dbsession.flush()
+
+ book = create_book()
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="book.zim",
+ status="current",
+ )
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2,
+ filename="book_target.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ # Only warehouse_1 accessible (warehouse_2 is inaccessible)
+ warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])}
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ # Book should not be processed (status unchanged)
+ assert book.status == "pending_processing"
+
+
+class TestMoveBookFilesEdgeCases:
+ """Tests for edge cases in move_book_files."""
+
+ def test_move_book_files_same_location(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test when current location matches target."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="zims"
+ )
+ dbsession.flush()
+
+ source_dir = temp_warehouse_dirs["warehouse_1"] / "zims"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ source_file = source_dir / "book.zim"
+ source_file.write_text("content")
+
+ book = create_book()
+ # current_loc
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="book.zim",
+ status="current",
+ )
+ # target_loc
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="book.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])}
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ assert book.status == "published"
+ assert source_file.exists()
+ assert source_file.read_text() == "content"
+
+ def test_move_book_files_events_contain_warehouse_info(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test that events contain warehouse and folder info."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="primary_warehouse", folder_name="main_zims"
+ )
+ warehouse_2, warehouse_path_2 = create_warehouse_with_path(
+ name="backup_warehouse", folder_name="backup_zims"
+ )
+ dbsession.flush()
+
+ source_dir = temp_warehouse_dirs["warehouse_1"] / "main_zims"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ source_file = source_dir / "test_book.zim"
+ source_file.write_text("content")
+
+ book = create_book()
+ # current_loc
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="test_book.zim",
+ status="current",
+ )
+ # target_loc
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2,
+ filename="test_book_v2.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ (temp_warehouse_dirs["warehouse_2"] / "backup_zims").mkdir(
+ parents=True, exist_ok=True
+ )
+
+ warehouse_paths = {
+ warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]),
+ warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]),
+ }
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ move_events = [e for e in book.events if "moved book from" in e]
+ assert len(move_events) == 1
+ event = move_events[0]
+ assert "primary_warehouse" in event
+ assert "main_zims" in event
+ assert "backup_warehouse" in event
+ assert "backup_zims" in event
+
+ def test_move_book_files_file_content_preserved(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test that file content is preserved during copy and move."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="source"
+ )
+ warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path(
+ name="warehouse_2", folder_name="target_a"
+ )
+ warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path(
+ name="warehouse_2", folder_name="target_b"
+ )
+ dbsession.flush()
+
+ test_content = "This is a test ZIM file content with special chars: éàù\n" * 100
+ source_dir = temp_warehouse_dirs["warehouse_1"] / "source"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ source_file = source_dir / "source.zim"
+ source_file.write_text(test_content)
+
+ book = create_book()
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="source.zim",
+ status="current",
+ )
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2_a,
+ filename="target1.zim",
+ status="target",
+ )
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2_b,
+ filename="target2.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ for subfolder in ["target_a", "target_b"]:
+ (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir(
+ parents=True, exist_ok=True
+ )
+
+ warehouse_paths = {
+ warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]),
+ warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]),
+ warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]),
+ }
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ target_file_1 = temp_warehouse_dirs["warehouse_2"] / "target_a" / "target1.zim"
+ target_file_2 = temp_warehouse_dirs["warehouse_2"] / "target_b" / "target2.zim"
+
+ assert target_file_1.read_text() == test_content
+ assert target_file_2.read_text() == test_content
+
+ def test_move_book_files_three_current_one_target(
+ self,
+ dbsession: OrmSession,
+ create_book: Any,
+ create_book_location: Any,
+ create_warehouse_with_path: Any,
+ temp_warehouse_dirs: dict[str, Path],
+ ):
+ """Test that when there are 3 current and 1 target, one moves and two get
+ deleted."""
+ warehouse_1, warehouse_path_1 = create_warehouse_with_path(
+ name="warehouse_1", folder_name="zims"
+ )
+ warehouse_2, warehouse_path_2 = create_warehouse_with_path(
+ name="warehouse_2", folder_name="zims"
+ )
+ warehouse_3, warehouse_path_3 = create_warehouse_with_path(
+ name="warehouse_3", folder_name="zims"
+ )
+ warehouse_4, warehouse_path_4 = create_warehouse_with_path(
+ name="warehouse_4", folder_name="zims"
+ )
+ dbsession.flush()
+
+ # Create three current locations with actual files
+ source_dir_1 = temp_warehouse_dirs["warehouse_1"] / "zims"
+ source_dir_1.mkdir(parents=True, exist_ok=True)
+ source_file_1 = source_dir_1 / "book1.zim"
+ source_file_1.write_text("content 1")
+
+ source_dir_2 = temp_warehouse_dirs["warehouse_2"] / "zims"
+ source_dir_2.mkdir(parents=True, exist_ok=True)
+ source_file_2 = source_dir_2 / "book2.zim"
+ source_file_2.write_text("content 2")
+
+ source_dir_3 = temp_warehouse_dirs["warehouse_1"] / "zims"
+ source_file_3 = source_dir_3 / "book3.zim"
+ source_file_3.write_text("content 3")
+
+ book = create_book()
+ # current_loc_1
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_1,
+ filename="book1.zim",
+ status="current",
+ )
+ # current_loc_2
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_2,
+ filename="book2.zim",
+ status="current",
+ )
+ # current_loc_3
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_3,
+ filename="book3.zim",
+ status="current",
+ )
+ # target_loc
+ create_book_location(
+ book=book,
+ warehouse_path=warehouse_path_4,
+ filename="book_final.zim",
+ status="target",
+ )
+ dbsession.flush()
+
+ (temp_warehouse_dirs["warehouse_2"] / "zims").mkdir(parents=True, exist_ok=True)
+
+ warehouse_paths = {
+ warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]),
+ warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]),
+ warehouse_3.id: str(temp_warehouse_dirs["warehouse_1"]),
+ warehouse_4.id: str(temp_warehouse_dirs["warehouse_2"]),
+ }
+ Context.local_warehouse_paths = warehouse_paths
+ move_book_files(dbsession, book)
+ dbsession.flush()
+
+ # Verify one file moved to target
+ target_file = temp_warehouse_dirs["warehouse_2"] / "zims" / "book_final.zim"
+ assert target_file.exists()
+ assert target_file.read_text() == "content 1" # First current moved
+
+ # Verify other files deleted
+ assert not source_file_1.exists() # Moved
+ assert not source_file_2.exists() # Deleted
+ assert not source_file_3.exists() # Deleted
+
+ # Verify events
+ move_events = [e for e in book.events if "moved book from" in e]
+ delete_events = [e for e in book.events if "deleted old location" in e]
+ assert len(move_events) == 1
+ assert len(delete_events) == 2
+
+ # Verify only one current location remains
+ current_locations = [loc for loc in book.locations if loc.status == "current"]
+ assert len(current_locations) == 1
+ assert current_locations[0].warehouse_path_id == warehouse_path_4.id
+
+ assert book.status == "published"
diff --git a/dev/README.md b/dev/README.md
index a626e73..f8a7cb3 100644
--- a/dev/README.md
+++ b/dev/README.md
@@ -50,6 +50,64 @@ This sets up the containers, runs the migrations.
Note that to run tests, we use a separate DB with the backend-tests container
+### Setup warehouse paths
+
+Before using the shuttle service for file operations, you need to initialize the warehouse paths in the database:
+
+```sh
+docker exec cms_shuttle python /scripts/setup_warehouses.py
+```
+
+This script will:
+- Create warehouse directories in `dev/warehouses/`
+- Create corresponding database records (Warehouse and WarehousePath)
+- Print the LOCAL_WAREHOUSE_PATHS configuration (already configured in docker-compose.yml)
+
+Current warehouse configuration:
+- **hidden**: 2 paths (`jail`, `dev`)
+- **prod**: 1 path (`other`)
+
+To modify warehouse configuration, edit the `WAREHOUSES_CONFIG` dict in [scripts/setup_warehouses.py](scripts/setup_warehouses.py) and re-run the script.
+
+### Setup titles
+
+After setting up warehouse paths, you can create sample titles with their warehouse path associations:
+
+```sh
+docker exec cms_mill python /scripts/setup_titles.py
+```
+
+This script will:
+- Create Title records in the database
+- Associate titles with dev and prod warehouse paths via TitleWarehousePath
+
+To modify title configuration, edit the `TITLES_CONFIG` list in [scripts/setup_titles.py](scripts/setup_titles.py) and re-run the script.
+
+### Setup notifications
+
+After setting up titles, you can create sample zimfarm notifications for testing the mill processor:
+
+```sh
+docker exec cms_shuttle python /scripts/setup_notifications.py
+```
+
+This script will:
+- Create ZimfarmNotification records with status "pending"
+- Create "fake" ZIMs in warehouse folders
+- Each notification references a warehouse path and matches a title's producer_unique_id
+
+After creating notifications, the mill will process them into books. To modify notification configuration, edit the `NOTIFICATIONS_CONFIG` list in [scripts/setup_notifications.py](scripts/setup_notifications.py) and re-run the script.
+
+### Wipe database and files
+
+To delete all data from the database and all ZIM files from warehouses:
+
+```sh
+docker exec cms_shuttle python /scripts/wipe.py
+```
+
+This is useful when you need to reset everything to a clean state before re-running setup scripts.
+
### Restart the backend
The backend might typically fail if the DB schema is not up-to-date, or if you create some nasty bug while modifying the code.
diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml
index 3edb36a..714e941 100644
--- a/dev/docker-compose.yml
+++ b/dev/docker-compose.yml
@@ -59,6 +59,7 @@ services:
container_name: cms_mill
volumes:
- ../backend/src/cms_backend:/usr/local/lib/python3.13/site-packages/cms_backend
+ - ./scripts:/scripts
environment:
DEBUG: 1
DATABASE_URL: postgresql+psycopg://cms:cmspass@postgresdb:5432/cms
@@ -72,9 +73,12 @@ services:
container_name: cms_shuttle
volumes:
- ../backend/src/cms_backend:/usr/local/lib/python3.13/site-packages/cms_backend
+ - ./scripts:/scripts
+ - ./warehouses:/warehouses
environment:
DEBUG: 1
DATABASE_URL: postgresql+psycopg://cms:cmspass@postgresdb:5432/cms
+ LOCAL_WAREHOUSE_PATHS: "11111111-1111-1111-1111-111111111111:/warehouses/hidden,22222222-2222-2222-2222-222222222222:/warehouses/prod,33333333-3333-3333-3333-333333333333:/warehouses/client1"
depends_on:
postgresdb:
condition: service_healthy
@@ -89,6 +93,7 @@ services:
- ../backend/tests:/app/tests
environment:
DATABASE_URL: postgresql+psycopg://cms:cmspass@postgresdb:5432/cmstest
+ ALEMBIC_UPGRADE_HEAD_ON_START: false
depends_on:
- postgresdb
frontend:
diff --git a/dev/scripts/setup_notifications.py b/dev/scripts/setup_notifications.py
new file mode 100644
index 0000000..31271da
--- /dev/null
+++ b/dev/scripts/setup_notifications.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+Development zimfarm notifications setup script.
+
+Creates ZimfarmNotification records for testing the mill processor.
+"""
+
+from datetime import datetime
+from pathlib import Path
+from uuid import uuid4
+
+from cms_backend.db import Session
+from cms_backend.db.models import ZimfarmNotification
+
+
+# Base directory where warehouse folders are located (inside container)
+WAREHOUSE_BASE_PATH = Path("/warehouses")
+
+# Configuration: Define sample notifications
+# Each notification should match a title's producer_unique_id and warehouse path
+NOTIFICATIONS_CONFIG = [
+ {
+ "article_count": 10000,
+ "media_count": 5000,
+ "size": 1024000000,
+ "metadata": {
+ "Name": "wikipedia_en_all",
+ "Title": "Wikipedia English All Maxi",
+ "Creator": "openZIM",
+ "Publisher": "Kiwix",
+ "Date": "2025-01-15",
+ "Description": "Wikipedia English offline",
+ "Language": "eng",
+ "Flavour": "maxi",
+ },
+ "zimcheck": {"status": "pass"},
+ "warehouse_name": "hidden",
+ "folder_name": "jail",
+ "filename": "wikipedia_en_all_maxi_2025-01.zim",
+ "producer": {
+ "displayName": "farm.openzim.org: wikipedia_en_all_maxi",
+ "displayUrl": "https://farm.openzim.org/recipes/wikipedia_en_all_maxi",
+ "uniqueId": "farm.openzim.org:wikipedia_en_all_maxi",
+ },
+ },
+ {
+ "article_count": 500,
+ "media_count": 200,
+ "size": 50000000,
+ "metadata": {
+ "Name": "wiktionary_fr_all",
+ "Title": "Wiktionnaire Francais",
+ "Creator": "openZIM",
+ "Publisher": "Kiwix",
+ "Date": "2025-01-10",
+ "Description": "Wiktionnaire hors-ligne",
+ "Language": "fra",
+ "Flavour": "maxi",
+ },
+ "zimcheck": {"status": "pass"},
+ "warehouse_name": "hidden",
+ "folder_name": "jail",
+ "filename": "wiktionary_fr_all_maxi_2025-01.zim",
+ "producer": {
+ "displayName": "wiktionary_fr",
+ "displayUrl": "https://farm.openzim.org/recipes/wiktionary_fr",
+ "uniqueId": "farm.openzim.org:wiktionary_fr",
+ },
+ },
+ {
+ "article_count": 1500,
+ "media_count": 2020,
+ "size": 40000,
+ "metadata": {
+ "Name": "wiktionary_en_all",
+ "Title": "English Wiktionary",
+ "Creator": "openZIM",
+ "Publisher": "Kiwix",
+ "Date": "2025-01-10",
+ "Description": "Offline wiktionary",
+ "Language": "eng",
+ "Flavour": "maxi",
+ },
+ "zimcheck": {"status": "pass"},
+ "warehouse_name": "hidden",
+ "folder_name": "jail",
+ "filename": "wiktionary_en_all_maxi_2025-01.zim",
+ "producer": {
+ "displayName": "wiktionary_en",
+ "displayUrl": "https://farm.openzim.org/recipes/wiktionary_en",
+ "uniqueId": "farm.openzim.org:wiktionary_en",
+ },
+ },
+]
+
+
+def create_notifications():
+ """Create zimfarm notification records and placeholder files."""
+ session = Session()
+
+ created_notifications = []
+
+ try:
+ for content in NOTIFICATIONS_CONFIG:
+ filename = content.get("filename", "unknown")
+ warehouse_name = content["warehouse_name"]
+ folder_name = content["folder_name"]
+
+ print(f"\nProcessing notification: {filename}")
+
+ # Check if file already exists in warehouse
+ file_path = WAREHOUSE_BASE_PATH / warehouse_name / folder_name / filename
+ if file_path.exists():
+ print(f" - File already exists at {file_path} (skipping)")
+ continue
+
+ # Generate random notification ID
+ notification_id = uuid4()
+
+ # Create notification record
+ notification = ZimfarmNotification(
+ id=notification_id,
+ received_at=datetime.now(),
+ content=content,
+ )
+ session.add(notification)
+ session.flush()
+ print(f" + Created notification '{filename}' with ID {notification.id}")
+ print(f" Status: {notification.status}")
+
+ # Create placeholder file with notification ID
+ file_path.parent.mkdir(parents=True, exist_ok=True)
+ file_path.write_text(str(notification_id))
+ print(f" + Created file: {file_path}")
+
+ created_notifications.append(
+ (filename, warehouse_name, folder_name, notification_id)
+ )
+
+ # Commit all changes
+ session.commit()
+ print("\n+ All database changes committed")
+
+ # Print summary
+ if created_notifications:
+ print("\n" + "=" * 70)
+ print("Notifications created (status: pending):")
+ print("=" * 70)
+ for (
+ filename,
+ warehouse_name,
+ folder_name,
+ notification_id,
+ ) in created_notifications:
+ print(f" {filename}")
+ print(f" warehouse: {warehouse_name}/{folder_name}")
+ print(f" id: {notification_id}")
+ print("=" * 70)
+ print("\nThe mill will now process these notifications into books.")
+ else:
+ print("\nNo new notifications created (all files already exist).")
+
+ except Exception as e:
+ session.rollback()
+ print(f"\n- Error: {e}")
+ raise
+ finally:
+ session.close()
+
+
+if __name__ == "__main__":
+ create_notifications()
diff --git a/dev/scripts/setup_titles.py b/dev/scripts/setup_titles.py
new file mode 100644
index 0000000..4b61619
--- /dev/null
+++ b/dev/scripts/setup_titles.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""
+Development titles setup script.
+
+Creates Title records and associates them with warehouse paths.
+"""
+
+from cms_backend.db import Session
+from cms_backend.db.models import Title, TitleWarehousePath, Warehouse, WarehousePath
+
+# Configuration: Define titles and their warehouse path associations
+# Format for paths: (warehouse_name, folder_name)
+TITLES_CONFIG = [
+ {
+ "name": "wikipedia_en_all",
+ "producer_unique_id": "farm.openzim.org:wikipedia_en_all_maxi",
+ "dev_paths": [("hidden", "dev")],
+ "prod_paths": [("prod", "other")],
+ },
+ {
+ "name": "wiktionary_fr_all",
+ "producer_unique_id": "farm.openzim.org:wiktionary_fr",
+ "producer_display_name": "wiktionary_fr",
+ "producer_display_url": "https://farm.openzim.org/recipes/wiktionary_fr",
+ "in_prod": True,
+ "dev_paths": [("hidden", "dev")],
+ "prod_paths": [("prod", "other"), ("client1", "all")],
+ },
+]
+
+
+def get_warehouse_path(session, warehouse_name: str, folder_name: str) -> WarehousePath:
+ """Look up a WarehousePath by warehouse name and folder name."""
+ result = (
+ session.query(WarehousePath)
+ .join(Warehouse)
+ .filter(
+ Warehouse.name == warehouse_name,
+ WarehousePath.folder_name == folder_name,
+ )
+ .first()
+ )
+ if not result:
+ raise ValueError(
+ f"WarehousePath not found: {warehouse_name}/{folder_name}. "
+ "Run setup_warehouses.py first."
+ )
+ return result
+
+
+def create_titles():
+ """Create title records and associate them with warehouse paths."""
+ session = Session()
+
+ try:
+ for title_config in TITLES_CONFIG:
+ title_name = title_config["name"]
+ print(f"\nProcessing title: {title_name}")
+
+ # Check if title already exists
+ existing = session.query(Title).filter(Title.name == title_name).first()
+
+ if existing:
+ print(f" - Title '{title_name}' already exists (skipping)")
+ continue
+
+ # Create title record
+ title = Title(
+ name=title_name,
+ producer_unique_id=title_config["producer_unique_id"],
+ )
+ # Set optional fields
+ if "in_prod" in title_config:
+ title.in_prod = title_config["in_prod"]
+ if "producer_display_name" in title_config:
+ title.producer_display_name = title_config["producer_display_name"]
+ if "producer_display_url" in title_config:
+ title.producer_display_url = title_config["producer_display_url"]
+
+ session.add(title)
+ session.flush() # Get the generated UUID
+ print(f" + Created title '{title_name}' with ID {title.id}")
+
+ # Associate dev warehouse paths
+ for warehouse_name, folder_name in title_config.get("dev_paths", []):
+ warehouse_path = get_warehouse_path(
+ session, warehouse_name, folder_name
+ )
+ twp = TitleWarehousePath(path_type="dev")
+ twp.title = title
+ twp.warehouse_path = warehouse_path
+ session.add(twp)
+ print(f" + Added dev path: {warehouse_name}/{folder_name}")
+
+ # Associate prod warehouse paths
+ for warehouse_name, folder_name in title_config.get("prod_paths", []):
+ warehouse_path = get_warehouse_path(
+ session, warehouse_name, folder_name
+ )
+ twp = TitleWarehousePath(path_type="prod")
+ twp.title = title
+ twp.warehouse_path = warehouse_path
+ session.add(twp)
+ print(f" + Added prod path: {warehouse_name}/{folder_name}")
+
+ # Commit all changes
+ session.commit()
+ print("\n+ All database changes committed")
+
+ # Print summary
+ print("\n" + "=" * 70)
+ print("Titles configured:")
+ print("=" * 70)
+ for title_config in TITLES_CONFIG:
+ dev_paths = ", ".join(
+ f"{w}/{f}" for w, f in title_config.get("dev_paths", [])
+ )
+ prod_paths = ", ".join(
+ f"{w}/{f}" for w, f in title_config.get("prod_paths", [])
+ )
+ print(f" {title_config['name']}")
+ if (in_prod := title_config.get("in_prod")) is not None:
+ print(f" in_prod: {in_prod}")
+ print(f" dev: {dev_paths or '(none)'}")
+ print(f" prod: {prod_paths or '(none)'}")
+ print("=" * 70)
+
+ except Exception as e:
+ session.rollback()
+ print(f"\n- Error: {e}")
+ raise
+ finally:
+ session.close()
+
+
+if __name__ == "__main__":
+ create_titles()
diff --git a/dev/scripts/setup_warehouses.py b/dev/scripts/setup_warehouses.py
new file mode 100644
index 0000000..6a47c1a
--- /dev/null
+++ b/dev/scripts/setup_warehouses.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+"""
+Development warehouse setup script.
+
+Creates warehouse directories and database records.
+"""
+
+import sys
+from pathlib import Path
+from uuid import UUID
+
+# Add backend source to path for imports
+sys.path.insert(0, "/usr/local/lib/python3.13/site-packages")
+
+from cms_backend.db import Session
+from cms_backend.db.models import Warehouse, WarehousePath
+
+
+# Configuration: Define warehouses and their paths
+# UUIDs must match those in docker-compose.yml LOCAL_WAREHOUSE_PATHS
+WAREHOUSES_CONFIG = {
+ "hidden": {
+ "id": UUID("11111111-1111-1111-1111-111111111111"),
+ "paths": ["jail", "dev"],
+ "configuration": {},
+ },
+ "prod": {
+ "id": UUID("22222222-2222-2222-2222-222222222222"),
+ "paths": ["other"],
+ "configuration": {},
+ },
+ "client1": {
+ "id": UUID("33333333-3333-3333-3333-333333333333"),
+ "paths": ["all"],
+ "configuration": {},
+ },
+}
+
+# Base directory where warehouse folders will be created (inside container)
+WAREHOUSE_BASE_PATH = Path("/warehouses")
+
+
+def create_warehouse_structure():
+ """Create warehouse directories and database records."""
+ session = Session()
+
+ try:
+ for warehouse_name, config in WAREHOUSES_CONFIG.items():
+ print(f"\nProcessing warehouse: {warehouse_name}")
+ warehouse_id = config["id"]
+
+ # Check if warehouse already exists
+ existing = (
+ session.query(Warehouse).filter(Warehouse.id == warehouse_id).first()
+ )
+
+ if existing:
+ print(f" ⊘ Warehouse '{warehouse_name}' already exists (skipping)")
+ warehouse = existing
+ else:
+ # Create warehouse DB record with predefined ID
+ warehouse = Warehouse(
+ name=warehouse_name,
+ configuration=config.get("configuration", {}),
+ )
+ warehouse.id = warehouse_id
+ session.add(warehouse)
+ session.flush()
+ print(
+ f" ✓ Created warehouse '{warehouse_name}' with ID {warehouse.id}"
+ )
+
+ # Create paths for this warehouse
+ for path_name in config["paths"]:
+ # Check if path already exists
+ existing_path = (
+ session.query(WarehousePath)
+ .filter(
+ WarehousePath.warehouse_id == warehouse.id,
+ WarehousePath.folder_name == path_name,
+ )
+ .first()
+ )
+
+ if existing_path:
+ print(f" ⊘ Path '{path_name}' already exists (skipping)")
+ warehouse_path = existing_path
+ else:
+ # Create warehouse path DB record
+ warehouse_path = WarehousePath(
+ folder_name=path_name,
+ )
+ warehouse_path.warehouse = warehouse
+ session.add(warehouse_path)
+ session.flush()
+ print(
+ f" ✓ Created path '{path_name}' with ID {warehouse_path.id}"
+ )
+
+ # Create physical directory
+ physical_path = WAREHOUSE_BASE_PATH / warehouse_name / path_name
+ if physical_path.exists():
+ print(f" ⊘ Directory already exists: {physical_path}")
+ else:
+ physical_path.mkdir(parents=True, exist_ok=True)
+ print(f" ✓ Created directory: {physical_path}")
+
+ # (no action needed - IDs are predefined in config)
+
+ # Commit all changes
+ session.commit()
+ print("\n✓ All database changes committed")
+
+ # Print configuration summary
+ print("\n" + "=" * 70)
+ print("LOCAL_WAREHOUSE_PATHS configuration (already in docker-compose.yml):")
+ print("=" * 70)
+ env_parts = []
+ for warehouse_name, config in WAREHOUSES_CONFIG.items():
+ warehouse_base = WAREHOUSE_BASE_PATH / warehouse_name
+ env_parts.append(f"{config['id']}:{warehouse_base}")
+ env_value = ",".join(env_parts)
+ print(f" {env_value}")
+ print("\n" + "=" * 70)
+
+ except Exception as e:
+ session.rollback()
+ print(f"\n✗ Error: {e}", file=sys.stderr)
+ raise
+ finally:
+ session.close()
+
+
+if __name__ == "__main__":
+ create_warehouse_structure()
diff --git a/dev/scripts/wipe.py b/dev/scripts/wipe.py
new file mode 100644
index 0000000..e84917e
--- /dev/null
+++ b/dev/scripts/wipe.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""
+Development wipe script.
+
+Deletes all data from the database and all ZIM files from warehouses.
+Run inside the shuttle container:
+ docker exec cms_shuttle python /scripts/wipe.py
+"""
+
+from pathlib import Path
+
+from cms_backend.db import Session
+from cms_backend.db.models import (
+ Book,
+ BookLocation,
+ Title,
+ TitleWarehousePath,
+ Warehouse,
+ WarehousePath,
+ ZimfarmNotification,
+)
+
+
+# Base directory where warehouse folders are located (inside container)
+WAREHOUSE_BASE_PATH = Path("/warehouses")
+
+
+def wipe_database(session):
+ """Delete all data from the database in the correct order."""
+ print("Wiping database...")
+
+ # Delete in order respecting foreign key constraints
+ # (children before parents)
+
+ # 1. BookLocation (depends on Book and WarehousePath)
+ count = session.query(BookLocation).delete()
+ print(f" - Deleted {count} BookLocation records")
+
+ # 2. ZimfarmNotification (depends on Book)
+ count = session.query(ZimfarmNotification).delete()
+ print(f" - Deleted {count} ZimfarmNotification records")
+
+ # 3. Book (depends on Title)
+ count = session.query(Book).delete()
+ print(f" - Deleted {count} Book records")
+
+ # 4. TitleWarehousePath (depends on Title and WarehousePath)
+ count = session.query(TitleWarehousePath).delete()
+ print(f" - Deleted {count} TitleWarehousePath records")
+
+ # 5. Title
+ count = session.query(Title).delete()
+ print(f" - Deleted {count} Title records")
+
+ # 6. WarehousePath (depends on Warehouse)
+ count = session.query(WarehousePath).delete()
+ print(f" - Deleted {count} WarehousePath records")
+
+ # 7. Warehouse
+ count = session.query(Warehouse).delete()
+ print(f" - Deleted {count} Warehouse records")
+
+
+def wipe_warehouse_files():
+ """Delete all ZIM files in warehouse directories."""
+ print("\nWiping warehouse files...")
+
+ if not WAREHOUSE_BASE_PATH.exists():
+ print(f" - Warehouse path {WAREHOUSE_BASE_PATH} does not exist")
+ return
+
+ zim_files = list(WAREHOUSE_BASE_PATH.rglob("*.zim"))
+
+ if not zim_files:
+ print(" - No ZIM files to delete")
+ return
+
+ for file_path in zim_files:
+ file_path.unlink()
+ print(f" - Deleted {file_path}")
+
+ print(f" - Total files deleted: {len(zim_files)}")
+
+
+def wipe():
+ """Wipe database and warehouse files."""
+ session = Session()
+
+ try:
+ wipe_database(session)
+ session.commit()
+ print("\n+ Database wiped successfully")
+
+ wipe_warehouse_files()
+ print("\n+ Warehouse files wiped successfully")
+
+ except Exception as e:
+ session.rollback()
+ print(f"\n- Error: {e}")
+ raise
+ finally:
+ session.close()
+
+
+if __name__ == "__main__":
+ wipe()
diff --git a/dev/warehouses/.gitkeep b/dev/warehouses/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/frontend/src/components/BookStatus.vue b/frontend/src/components/BookStatus.vue
index 7aa3678..f0d038c 100644
--- a/frontend/src/components/BookStatus.vue
+++ b/frontend/src/components/BookStatus.vue
@@ -11,9 +11,13 @@
Pending Title
-
-
- Processed
+
+
+ Pending Move
+
+
+
+ Published