diff --git a/backend/src/cms_backend/api/main.py b/backend/src/cms_backend/api/main.py index 1438803..a344593 100644 --- a/backend/src/cms_backend/api/main.py +++ b/backend/src/cms_backend/api/main.py @@ -10,14 +10,14 @@ from pydantic import ValidationError from cms_backend.api.routes.books import router as books_router +from cms_backend.api.routes.collection import router as collection_router from cms_backend.api.routes.healthcheck import router as healthcheck_router from cms_backend.api.routes.http_errors import BadRequestError -from cms_backend.api.routes.library import router as library_router from cms_backend.api.routes.titles import router as titles_router -from cms_backend.api.routes.warehouse_paths import router as warehouse_paths_router from cms_backend.api.routes.zimfarm_notifications import ( router as zimfarm_notification_router, ) +from cms_backend.context import Context from cms_backend.db.exceptions import ( RecordAlreadyExistsError, RecordDisabledError, @@ -30,7 +30,8 @@ @asynccontextmanager async def lifespan(_: FastAPI): - upgrade_db_schema() + if Context.alembic_upgrade_head_on_start: + upgrade_db_schema() yield @@ -58,8 +59,7 @@ def create_app(*, debug: bool = True): main_router.include_router(router=healthcheck_router) main_router.include_router(router=titles_router) main_router.include_router(router=books_router) - main_router.include_router(router=warehouse_paths_router) - main_router.include_router(router=library_router) + main_router.include_router(router=collection_router) app.include_router(router=main_router) diff --git a/backend/src/cms_backend/api/routes/books.py b/backend/src/cms_backend/api/routes/books.py index a739ca3..7994d31 100644 --- a/backend/src/cms_backend/api/routes/books.py +++ b/backend/src/cms_backend/api/routes/books.py @@ -14,7 +14,6 @@ BookFullSchema, BookLightSchema, BookLocationSchema, - ProducerSchema, ) router = APIRouter(prefix="/books", tags=["books"]) @@ -67,9 +66,8 @@ async def get_book( # Separate current and target locations current_locations = [ BookLocationSchema( - warehouse_path_id=location.warehouse_path_id, - warehouse_name=location.warehouse_path.warehouse.name, - folder_name=location.warehouse_path.folder_name, + warehouse_name=location.warehouse.name, + path=str(location.path), filename=location.filename, status=location.status, ) @@ -79,9 +77,8 @@ async def get_book( target_locations = [ BookLocationSchema( - warehouse_path_id=location.warehouse_path_id, - warehouse_name=location.warehouse_path.warehouse.name, - folder_name=location.warehouse_path.folder_name, + warehouse_name=location.warehouse.name, + path=str(location.path), filename=location.filename, status=location.status, ) @@ -103,11 +100,6 @@ async def get_book( zimcheck_result=db_book.zimcheck_result, zim_metadata=db_book.zim_metadata, events=db_book.events, - producer=ProducerSchema( - display_name=db_book.producer_display_name, - display_url=db_book.producer_display_url, - unique_id=db_book.producer_unique_id, - ), current_locations=current_locations, target_locations=target_locations, ) diff --git a/backend/src/cms_backend/api/routes/library.py b/backend/src/cms_backend/api/routes/collection.py similarity index 81% rename from backend/src/cms_backend/api/routes/library.py rename to backend/src/cms_backend/api/routes/collection.py index 69a356a..9f6060f 100644 --- a/backend/src/cms_backend/api/routes/library.py +++ b/backend/src/cms_backend/api/routes/collection.py @@ -8,15 +8,15 @@ from sqlalchemy.orm import Session as OrmSession from cms_backend.db import gen_dbsession -from cms_backend.db.exceptions import RecordDoesNotExistError -from cms_backend.db.library import ( - get_latest_books_for_library, - get_library, - get_library_by_name_or_none, +from cms_backend.db.collection import ( + get_collection, + get_collection_by_name_or_none, + get_latest_books_for_collection, ) +from cms_backend.db.exceptions import RecordDoesNotExistError from cms_backend.db.models import Book -router = APIRouter(prefix="/libraries", tags=["libraries"]) +router = APIRouter(prefix="/collections", tags=["collections"]) def _build_library_xml(books: list[Book]) -> str: @@ -67,25 +67,25 @@ def _build_library_xml(books: list[Book]) -> str: return ET.tostring(library_elem, encoding="unicode") -@router.get("/{library_id_or_name}/catalog.xml") +@router.get("/{collection_id_or_name}/catalog.xml") async def get_library_catalog_xml( - library_id_or_name: Annotated[str, Path()], + collection_id_or_name: Annotated[str, Path()], session: Annotated[OrmSession, Depends(gen_dbsession)], ): - """Get library catalog as XML. Library can be specified by ID (UUID) or name.""" + """Get collection catalog as XML library by collection ID (UUID) or name.""" # Try to parse as UUID first, otherwise treat as name - library = None + collection = None try: - library_id = UUID(library_id_or_name) + collection_id = UUID(collection_id_or_name) try: - library = get_library(session, library_id) + collection = get_collection(session, collection_id) except RecordDoesNotExistError: pass except ValueError: # Not a valid UUID, try as name - library = get_library_by_name_or_none(session, library_id_or_name) + collection = get_collection_by_name_or_none(session, collection_id_or_name) - if library is None: + if collection is None: return Response( content='' '', @@ -93,7 +93,7 @@ async def get_library_catalog_xml( media_type="application/xml", ) - books = get_latest_books_for_library(session, library.id) + books = get_latest_books_for_collection(session, collection.id) xml_content = _build_library_xml(books) return Response( diff --git a/backend/src/cms_backend/api/routes/titles.py b/backend/src/cms_backend/api/routes/titles.py index fbf1dfa..032f0cf 100644 --- a/backend/src/cms_backend/api/routes/titles.py +++ b/backend/src/cms_backend/api/routes/titles.py @@ -13,9 +13,9 @@ from cms_backend.schemas import BaseModel from cms_backend.schemas.orms import ( BookLightSchema, + TitleCollectionSchema, TitleFullSchema, TitleLightSchema, - WarehousePathInfoSchema, ) router = APIRouter(prefix="/titles", tags=["titles"]) @@ -29,12 +29,7 @@ class TitlesGetSchema(BaseModel): class TitleCreateSchema(BaseModel): name: str - producer_unique_id: str - producer_display_name: str | None = None - producer_display_url: str | None = None - dev_warehouse_path_ids: list[UUID] - prod_warehouse_path_ids: list[UUID] - in_prod: bool = False + maturity: str | None = None @router.get("") @@ -67,29 +62,10 @@ def get_title( """Get a title by ID with full details including books""" title = db_get_title_by_id(session, title_id=title_id) - # Build sorted warehouse path lists by path_type - def build_warehouse_paths(path_type: str) -> list[WarehousePathInfoSchema]: - paths = [ - WarehousePathInfoSchema( - path_id=twp.warehouse_path.id, - folder_name=twp.warehouse_path.folder_name, - warehouse_name=twp.warehouse_path.warehouse.name, - ) - for twp in title.warehouse_paths - if twp.path_type == path_type - ] - # Sort alphabetically by warehouse_name then folder_name - return sorted(paths, key=lambda p: (p.warehouse_name, p.folder_name)) - return TitleFullSchema( id=title.id, name=title.name, - producer_unique_id=title.producer_unique_id, - producer_display_name=title.producer_display_name, - producer_display_url=title.producer_display_url, - dev_warehouse_paths=build_warehouse_paths("dev"), - prod_warehouse_paths=build_warehouse_paths("prod"), - in_prod=title.in_prod, + maturity=title.maturity, events=title.events, books=[ BookLightSchema( @@ -103,6 +79,14 @@ def build_warehouse_paths(path_type: str) -> list[WarehousePathInfoSchema]: ) for book in title.books ], + collections=[ + TitleCollectionSchema( + collection_id=tc.collection_id, + collection_name=tc.collection.name, + path=str(tc.path), + ) + for tc in title.collections + ], ) @@ -115,17 +99,10 @@ def create_title( title = db_create_title( session, name=title_data.name, - producer_unique_id=title_data.producer_unique_id, - producer_display_name=title_data.producer_display_name, - producer_display_url=title_data.producer_display_url, - dev_warehouse_path_ids=title_data.dev_warehouse_path_ids, - prod_warehouse_path_ids=title_data.prod_warehouse_path_ids, - in_prod=title_data.in_prod, + maturity=title_data.maturity, ) return TitleLightSchema( id=title.id, name=title.name, - producer_unique_id=title.producer_unique_id, - producer_display_name=title.producer_display_name, - producer_display_url=title.producer_display_url, + maturity=title.maturity, ) diff --git a/backend/src/cms_backend/api/routes/warehouse_paths.py b/backend/src/cms_backend/api/routes/warehouse_paths.py deleted file mode 100644 index a39c45d..0000000 --- a/backend/src/cms_backend/api/routes/warehouse_paths.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Annotated - -from fastapi import APIRouter, Depends -from sqlalchemy.orm import Session as OrmSession - -from cms_backend.db import gen_dbsession -from cms_backend.db.warehouse_path import get_warehouse_paths as db_get_warehouse_paths -from cms_backend.schemas.orms import WarehousePathSchema - -router = APIRouter(prefix="/warehouse-paths", tags=["warehouse-paths"]) - - -@router.get("") -async def get_warehouse_paths( - session: Annotated[OrmSession, Depends(gen_dbsession)], -) -> list[WarehousePathSchema]: - """Get all warehouse paths with warehouse information""" - - return db_get_warehouse_paths(session) diff --git a/backend/src/cms_backend/db/book.py b/backend/src/cms_backend/db/book.py index 8fceaca..183fab0 100644 --- a/backend/src/cms_backend/db/book.py +++ b/backend/src/cms_backend/db/book.py @@ -1,10 +1,11 @@ +from pathlib import Path from typing import Any from uuid import UUID from sqlalchemy import select from sqlalchemy.orm import Session as OrmSession -from cms_backend.db.models import Book, BookLocation, WarehousePath, ZimfarmNotification +from cms_backend.db.models import Book, BookLocation, Warehouse, ZimfarmNotification from cms_backend.utils.datetime import getnow @@ -18,9 +19,6 @@ def create_book( zim_metadata: dict[str, Any], zimcheck_result: dict[str, Any], zimfarm_notification: ZimfarmNotification, - producer_display_name: str, - producer_display_url: str, - producer_unique_id: str, ) -> Book: """Create a new book""" @@ -41,9 +39,6 @@ def create_book( date=date, flavour=flavour, zimfarm_notification=zimfarm_notification, - producer_display_name=producer_display_name, - producer_display_url=producer_display_url, - producer_unique_id=producer_unique_id, ) session.add(book) zimfarm_notification.events.append( @@ -60,7 +55,8 @@ def create_book_location( session: OrmSession, *, book: Book, - warehouse_path_id: UUID, + warehouse_id: UUID, + path: Path, filename: str, status: str = "current", ) -> BookLocation: @@ -69,32 +65,33 @@ def create_book_location( Args: session: SQLAlchemy session book: Book instance - warehouse_path_id: ID of the warehouse path + warehouse_id: ID of the warehouse + path: Folder path within the warehouse (e.g., "dev-zim") filename: Filename in warehouse status: Location status ('current' or 'target'), defaults to 'current' Returns: Created BookLocation instance """ - # Get warehouse path info for event message - warehouse_path = session.get(WarehousePath, warehouse_path_id) - if not warehouse_path: - raise ValueError(f"WarehousePath with id {warehouse_path_id} not found") + # Get warehouse info for event message + warehouse = session.get(Warehouse, warehouse_id) + if not warehouse: + raise ValueError(f"Warehouse with id {warehouse_id} not found") - warehouse_name = warehouse_path.warehouse.name - folder_name = warehouse_path.folder_name + warehouse_name = warehouse.name location = BookLocation( book_id=book.id, + warehouse_id=warehouse_id, + path=path, status=status, filename=filename, ) - location.warehouse_path_id = warehouse_path_id session.add(location) book.locations.append(location) book.events.append( f"{getnow()}: added {status} location: {filename} in {warehouse_name}: " - f"{folder_name} ({warehouse_path_id})" + f"{path} ({warehouse_id})" ) return location diff --git a/backend/src/cms_backend/db/collection.py b/backend/src/cms_backend/db/collection.py new file mode 100644 index 0000000..9acca41 --- /dev/null +++ b/backend/src/cms_backend/db/collection.py @@ -0,0 +1,86 @@ +from uuid import UUID + +from sqlalchemy import and_, select +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.db.exceptions import RecordDoesNotExistError +from cms_backend.db.models import ( + Book, + BookLocation, + Collection, + CollectionTitle, + Title, +) + + +def get_collection_or_none(session: OrmSession, library_id: UUID) -> Collection | None: + """Get a collection by ID if possible else None""" + return session.scalars( + select(Collection).where(Collection.id == library_id) + ).one_or_none() + + +def get_collection(session: OrmSession, library_id: UUID) -> Collection: + """Get a collection by ID if possible else raise an exception""" + if (collection := get_collection_or_none(session, library_id=library_id)) is None: + raise RecordDoesNotExistError(f"Collection with ID {library_id} does not exist") + return collection + + +def get_collection_by_name_or_none( + session: OrmSession, collection_name: str +) -> Collection | None: + """Get a collection by name if possible else None""" + return session.scalars( + select(Collection).where(Collection.name == collection_name) + ).one_or_none() + + +def get_latest_books_for_collection( + session: OrmSession, collection_id: UUID +) -> list[Book]: + """ + Get the latest published book for each name+flavour combination in a collection. + + A collection contains many books, this function return only the most recently + published book (by created_at) for each name+flavour combination. + + Args: + session: ORM session + collection_id: ID of the collection + + Returns: + List of Book objects, one per name+flavour combination + """ + # Get all books in the library's warehouse paths that are published + # and currently located there + stmt = ( + select(Book) + .join(BookLocation) + .join(Title, Book.title_id == Title.id) + .join(CollectionTitle) + .join(Collection) + .where( + and_( + BookLocation.status == "current", + BookLocation.warehouse_id == Collection.warehouse_id, + BookLocation.path == CollectionTitle.path, + Book.status == "published", + Collection.id == collection_id, + ) + ) + .order_by(Book.name, Book.flavour, Book.created_at.desc()) + ) + + books = session.scalars(stmt).all() + + # Filter to keep only the latest book per name+flavour combination + seen: set[tuple[str | None, str | None]] = set() + latest_books: list[Book] = [] + for book in books: + key = (book.name, book.flavour) + if key not in seen: + seen.add(key) + latest_books.append(book) + + return latest_books diff --git a/backend/src/cms_backend/db/library.py b/backend/src/cms_backend/db/library.py deleted file mode 100644 index 14022f6..0000000 --- a/backend/src/cms_backend/db/library.py +++ /dev/null @@ -1,78 +0,0 @@ -from uuid import UUID - -from sqlalchemy import and_, select -from sqlalchemy.orm import Session as OrmSession - -from cms_backend.db.exceptions import RecordDoesNotExistError -from cms_backend.db.models import Book, BookLocation, Library, LibraryWarehousePath - - -def get_library_or_none(session: OrmSession, library_id: UUID) -> Library | None: - """Get a library by ID if possible else None""" - return session.scalars( - select(Library).where(Library.id == library_id) - ).one_or_none() - - -def get_library(session: OrmSession, library_id: UUID) -> Library: - """Get a library by ID if possible else raise an exception""" - if (library := get_library_or_none(session, library_id=library_id)) is None: - raise RecordDoesNotExistError(f"Library with ID {library_id} does not exist") - return library - - -def get_library_by_name_or_none( - session: OrmSession, library_name: str -) -> Library | None: - """Get a library by name if possible else None""" - return session.scalars( - select(Library).where(Library.name == library_name) - ).one_or_none() - - -def get_latest_books_for_library(session: OrmSession, library_id: UUID) -> list[Book]: - """ - Get the latest book for each name+flavour combination in a library. - - A library contains multiple warehouse paths. For each unique name+flavour - combination found in the library's warehouse paths, return only the most - recent published book (by created_at). - - Args: - session: ORM session - library_id: ID of the library - - Returns: - List of Book objects, one per name+flavour combination - """ - # Get all books in the library's warehouse paths that are published - # and currently located there - stmt = ( - select(Book) - .join(BookLocation) - .join( - LibraryWarehousePath, - BookLocation.warehouse_path_id == LibraryWarehousePath.warehouse_path_id, - ) - .where( - and_( - LibraryWarehousePath.library_id == library_id, - BookLocation.status == "current", - Book.status == "published", - ) - ) - .order_by(Book.name, Book.flavour, Book.created_at.desc()) - ) - - books = session.scalars(stmt).all() - - # Filter to keep only the latest book per name+flavour combination - seen: set[tuple[str | None, str | None]] = set() - latest_books: list[Book] = [] - for book in books: - key = (book.name, book.flavour) - if key not in seen: - seen.add(key) - latest_books.append(book) - - return latest_books diff --git a/backend/src/cms_backend/db/models.py b/backend/src/cms_backend/db/models.py index 5ddaba1..ea62020 100644 --- a/backend/src/cms_backend/db/models.py +++ b/backend/src/cms_backend/db/models.py @@ -1,3 +1,4 @@ +import typing from datetime import datetime from ipaddress import IPv4Address from pathlib import Path @@ -21,6 +22,29 @@ relationship, ) from sqlalchemy.sql.schema import MetaData +from sqlalchemy.types import TypeDecorator + + +class PathType(TypeDecorator[Path]): + """A SQLAlchemy TypeDecorator that converts between Python Path objects + and strings.""" + + impl = String + cache_ok = True + + @typing.override + def process_bind_param(self, value: Path | None, dialect: Any) -> str | None: + """Convert Path to string for storage in the database.""" + if value is not None: + return str(value) + return value + + @typing.override + def process_result_value(self, value: str | None, dialect: Any) -> Path | None: + """Convert string back to Path when retrieving from the database.""" + if value is not None: + return Path(value) + return value class Base(MappedAsDataclass, DeclarativeBase): @@ -41,6 +65,7 @@ class Base(MappedAsDataclass, DeclarativeBase): ARRAY(item_type=String) ), # transform Python List[str] into PostgreSQL Array of strings IPv4Address: INET, # transform Python IPV4Address into PostgreSQL INET + Path: PathType, } # This metadata specifies some naming conventions that will be used by @@ -96,9 +121,6 @@ class Book(Base): name: Mapped[str | None] date: Mapped[str | None] flavour: Mapped[str | None] - producer_display_name: Mapped[str] - producer_display_url: Mapped[str] - producer_unique_id: Mapped[str] status: Mapped[str] = mapped_column( init=False, default="pending_processing", server_default="pending_processing" ) @@ -127,9 +149,9 @@ class Book(Base): ) Index( - "idx_book_status_qa_failed", + "idx_book_status_bad_book", Book.status, - postgresql_where=text("status = 'qa_failed'"), + postgresql_where=text("status = 'bad_book'"), ) Index( @@ -157,41 +179,54 @@ class Title(Base): init=False, primary_key=True, server_default=text("uuid_generate_v4()") ) name: Mapped[str] = mapped_column(unique=True, index=True) - producer_unique_id: Mapped[str] + maturity: Mapped[str] = mapped_column(init=False, index=True, default="dev") events: Mapped[list[str]] = mapped_column(init=False, default_factory=list) - producer_display_name: Mapped[str | None] = mapped_column(init=False, default=None) - producer_display_url: Mapped[str | None] = mapped_column(init=False, default=None) - # Warehouse paths via junction table - warehouse_paths: Mapped[list["TitleWarehousePath"]] = relationship( + books: Mapped[list["Book"]] = relationship( + back_populates="title", + cascade="save-update, merge, refresh-expire", + init=False, + foreign_keys=[Book.title_id], + ) + + collections: Mapped[list["CollectionTitle"]] = relationship( back_populates="title", cascade="all, delete-orphan", init=False, ) - in_prod: Mapped[bool] = mapped_column( - init=False, default=False, server_default=text("false") + + +class Collection(Base): + __tablename__ = "collection" + id: Mapped[UUID] = mapped_column( + init=False, primary_key=True, server_default=text("uuid_generate_v4()") ) + name: Mapped[str] = mapped_column(unique=True, index=True) + warehouse_id: Mapped[UUID] = mapped_column(ForeignKey("warehouse.id")) - books: Mapped[list["Book"]] = relationship( - back_populates="title", - cascade="save-update, merge, refresh-expire", + titles: Mapped[list["CollectionTitle"]] = relationship( + back_populates="collection", + cascade="all, delete-orphan", init=False, - foreign_keys=[Book.title_id], + ) + + warehouse: Mapped["Warehouse"] = relationship( + back_populates="collections", init=False ) -class TitleWarehousePath(Base): - __tablename__ = "title_warehouse_path" +class CollectionTitle(Base): + __tablename__ = "collection_title" title_id: Mapped[UUID] = mapped_column( ForeignKey("title.id"), primary_key=True, init=False ) - warehouse_path_id: Mapped[UUID] = mapped_column( - ForeignKey("warehouse_path.id"), primary_key=True, init=False + collection_id: Mapped[UUID] = mapped_column( + ForeignKey("collection.id"), primary_key=True, init=False ) - path_type: Mapped[str] = mapped_column(primary_key=True) # 'dev' or 'prod' + path: Mapped[Path] = mapped_column() - title: Mapped["Title"] = relationship(back_populates="warehouse_paths", init=False) - warehouse_path: Mapped["WarehousePath"] = relationship(init=False) + title: Mapped["Title"] = relationship(back_populates="collections", init=False) + collection: Mapped["Collection"] = relationship(back_populates="titles", init=False) class Warehouse(Base): @@ -200,79 +235,34 @@ class Warehouse(Base): init=False, primary_key=True, server_default=text("uuid_generate_v4()") ) name: Mapped[str] - configuration: Mapped[dict[str, Any]] - warehouse_paths: Mapped[list["WarehousePath"]] = relationship( + + collections: Mapped[list["Collection"]] = relationship( back_populates="warehouse", cascade="all, delete-orphan", init=False, ) -class WarehousePath(Base): - __tablename__ = "warehouse_path" - id: Mapped[UUID] = mapped_column( - init=False, primary_key=True, server_default=text("uuid_generate_v4()") - ) - folder_name: Mapped[str] - warehouse_id: Mapped[UUID] = mapped_column(ForeignKey("warehouse.id"), init=False) - warehouse: Mapped["Warehouse"] = relationship( - back_populates="warehouse_paths", init=False - ) - - class BookLocation(Base): __tablename__ = "book_location" book_id: Mapped[UUID] = mapped_column(ForeignKey("book.id"), primary_key=True) - warehouse_path_id: Mapped[UUID] = mapped_column( - ForeignKey("warehouse_path.id"), primary_key=True, init=False + warehouse_id: Mapped[UUID] = mapped_column( + ForeignKey("warehouse.id"), primary_key=True ) + path: Mapped[Path] = mapped_column(primary_key=True) status: Mapped[str] = mapped_column(primary_key=True) # 'current' or 'target' - filename: Mapped[str] book: Mapped["Book"] = relationship(back_populates="locations", init=False) - warehouse_path: Mapped["WarehousePath"] = relationship(init=False) + warehouse: Mapped["Warehouse"] = relationship(init=False) - def full_local_path(self, warehouse_local_folders_map: dict[UUID, str]) -> Path: - folder_in_warehouse = Path(self.warehouse_path.folder_name) / self.filename - warehouse_folder = Path( - warehouse_local_folders_map[self.warehouse_path.warehouse.id] - ) - return warehouse_folder / folder_in_warehouse + def full_local_path(self, warehouse_local_folders_map: dict[UUID, Path]) -> Path: + return warehouse_local_folders_map[self.warehouse.id] / self.path_in_warehouse @property - def full_str(self) -> str: - return ( - f"{self.warehouse_path.warehouse.name}:" - f"{self.warehouse_path.folder_name}/{self.filename}" - ) - - -class Library(Base): - __tablename__ = "library" - id: Mapped[UUID] = mapped_column( - init=False, primary_key=True, server_default=text("uuid_generate_v4()") - ) - name: Mapped[str] = mapped_column(unique=True, index=True) - - # Warehouse paths via junction table - warehouse_paths: Mapped[list["LibraryWarehousePath"]] = relationship( - back_populates="library", - cascade="all, delete-orphan", - init=False, - ) - + def path_in_warehouse(self) -> Path: + return self.path / self.filename -class LibraryWarehousePath(Base): - __tablename__ = "library_warehouse_path" - library_id: Mapped[UUID] = mapped_column( - ForeignKey("library.id"), primary_key=True, init=False - ) - warehouse_path_id: Mapped[UUID] = mapped_column( - ForeignKey("warehouse_path.id"), primary_key=True, init=False - ) - - library: Mapped["Library"] = relationship( - back_populates="warehouse_paths", init=False - ) - warehouse_path: Mapped["WarehousePath"] = relationship(init=False) + @property + def full_str(self) -> str: + return f"{self.warehouse.name}:{self.path_in_warehouse}" diff --git a/backend/src/cms_backend/db/title.py b/backend/src/cms_backend/db/title.py index 56004f3..85a7e86 100644 --- a/backend/src/cms_backend/db/title.py +++ b/backend/src/cms_backend/db/title.py @@ -8,7 +8,7 @@ from cms_backend import logger from cms_backend.db import count_from_stmt from cms_backend.db.exceptions import RecordAlreadyExistsError -from cms_backend.db.models import Title, TitleWarehousePath +from cms_backend.db.models import Title from cms_backend.schemas.orms import ListResult, TitleLightSchema from cms_backend.utils.datetime import getnow @@ -30,18 +30,6 @@ def get_title_by_name_or_none(session: OrmSession, *, name: str) -> Title | None return session.scalars(select(Title).where(Title.name == name)).one_or_none() -def get_title_by_name_and_producer_or_none( - session: OrmSession, *, name: str, producer_unique_id: str -) -> Title | None: - """Get a title by name and producer_unique_id if possible else None""" - - return session.scalars( - select(Title).where( - Title.name == name, Title.producer_unique_id == producer_unique_id - ) - ).one_or_none() - - def get_titles( session: OrmSession, *, @@ -56,9 +44,7 @@ def get_titles( select( Title.id.label("title_id"), Title.name.label("title_name"), - Title.producer_unique_id.label("producer_unique_id"), - Title.producer_display_name.label("producer_display_name"), - Title.producer_display_url.label("producer_display_url"), + Title.maturity.label("title_maturity"), ) .order_by(Title.name) .where( @@ -80,16 +66,12 @@ def get_titles( TitleLightSchema( id=title_id, name=title_name, - producer_unique_id=producer_unique_id, - producer_display_name=producer_display_name, - producer_display_url=producer_display_url, + maturity=title_maturity, ) for ( title_id, title_name, - producer_unique_id, - producer_display_name, - producer_display_url, + title_maturity, ) in session.execute(stmt.offset(skip).limit(limit)).all() ], ) @@ -99,48 +81,24 @@ def create_title( session: OrmSession, *, name: str, - producer_unique_id: str, - producer_display_name: str | None, - producer_display_url: str | None, - dev_warehouse_path_ids: list[UUID], - prod_warehouse_path_ids: list[UUID], - in_prod: bool, + maturity: str | None, ) -> Title: - """Create a new title with multiple warehouse paths + """Create a new title Args: - dev_warehouse_path_ids: List of warehouse path IDs for dev environment - prod_warehouse_path_ids: List of warehouse path IDs for prod environment + name: name of the title Raises: - ValueError: If dev_warehouse_path_ids or prod_warehouse_path_ids is empty RecordAlreadyExistsError: If title with same name already exists """ - # Validate that at least one path of each type is provided - if not dev_warehouse_path_ids: - raise ValueError("At least one dev warehouse path is required") - if not prod_warehouse_path_ids: - raise ValueError("At least one prod warehouse path is required") title = Title( name=name, - producer_unique_id=producer_unique_id, ) - title.producer_display_name = producer_display_name - title.producer_display_url = producer_display_url - title.in_prod = in_prod + if maturity: + title.maturity = maturity title.events.append(f"{getnow()}: title created") - # Add warehouse path associations - for path_id in dev_warehouse_path_ids: - twp = TitleWarehousePath(path_type="dev") - twp.warehouse_path_id = path_id - title.warehouse_paths.append(twp) - for path_id in prod_warehouse_path_ids: - twp = TitleWarehousePath(path_type="prod") - twp.warehouse_path_id = path_id - title.warehouse_paths.append(twp) - session.add(title) try: session.flush() diff --git a/backend/src/cms_backend/db/warehouse_path.py b/backend/src/cms_backend/db/warehouse_path.py deleted file mode 100644 index b18e0b1..0000000 --- a/backend/src/cms_backend/db/warehouse_path.py +++ /dev/null @@ -1,32 +0,0 @@ -from sqlalchemy import select -from sqlalchemy.orm import Session as OrmSession - -from cms_backend.db.models import Warehouse, WarehousePath -from cms_backend.schemas.orms import WarehousePathSchema - - -def get_warehouse_paths(session: OrmSession) -> list[WarehousePathSchema]: - """Get all warehouse paths with their warehouse information""" - - stmt = ( - select( - WarehousePath.id, - WarehousePath.folder_name, - Warehouse.id, - Warehouse.name, - ) - .join(Warehouse, WarehousePath.warehouse_id == Warehouse.id) - .order_by(Warehouse.name, WarehousePath.folder_name) - ) - - results = session.execute(stmt).all() - - return [ - WarehousePathSchema( - path_id=path_id, - folder_name=folder_name, - warehouse_id=warehouse_id, - warehouse_name=warehouse_name, - ) - for path_id, folder_name, warehouse_id, warehouse_name in results - ] diff --git a/backend/src/cms_backend/migrations/versions/40b58eace3fb_use_alter.py b/backend/src/cms_backend/migrations/versions/40b58eace3fb_use_alter.py deleted file mode 100644 index 669f2c5..0000000 --- a/backend/src/cms_backend/migrations/versions/40b58eace3fb_use_alter.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Use alter - -Revision ID: 40b58eace3fb -Revises: a37d94d59b8b -Create Date: 2025-10-24 09:38:03.364905 - -""" - -# revision identifiers, used by Alembic. -revision = "40b58eace3fb" -down_revision = "a37d94d59b8b" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### diff --git a/backend/src/cms_backend/migrations/versions/5376af219730_initial_schema.py b/backend/src/cms_backend/migrations/versions/5376af219730_initial_schema.py new file mode 100644 index 0000000..4257231 --- /dev/null +++ b/backend/src/cms_backend/migrations/versions/5376af219730_initial_schema.py @@ -0,0 +1,243 @@ +"""initial schema + +Revision ID: 5376af219730 +Revises: +Create Date: 2025-12-19 16:29:46.586314 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "5376af219730" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "book", + sa.Column("id", sa.Uuid(), nullable=False), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.Column("article_count", sa.Integer(), nullable=False), + sa.Column("media_count", sa.Integer(), nullable=False), + sa.Column("size", sa.Integer(), nullable=False), + sa.Column( + "zimcheck_result", postgresql.JSONB(astext_type=sa.Text()), nullable=False + ), + sa.Column( + "zim_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=False + ), + sa.Column("name", sa.String(), nullable=True), + sa.Column("date", sa.String(), nullable=True), + sa.Column("flavour", sa.String(), nullable=True), + sa.Column( + "status", sa.String(), server_default="pending_processing", nullable=False + ), + sa.Column("events", postgresql.ARRAY(sa.String()), nullable=False), + sa.Column("title_id", sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint( + ["title_id"], + ["title.id"], + name=op.f("fk_book_title_id_title"), + use_alter=True, + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_book")), + ) + op.create_index( + "idx_book_status_bad_book", + "book", + ["status"], + unique=False, + postgresql_where=sa.text("status = 'bad_book'"), + ) + op.create_index( + "idx_book_status_errored", + "book", + ["status"], + unique=False, + postgresql_where=sa.text("status = 'errored'"), + ) + op.create_index( + "idx_book_status_pending_move", + "book", + ["status"], + unique=False, + postgresql_where=sa.text("status = 'pending_move'"), + ) + op.create_index( + "idx_book_status_pending_processing", + "book", + ["status"], + unique=False, + postgresql_where=sa.text("status = 'pending_processing'"), + ) + op.create_index( + "idx_book_status_pending_title", + "book", + ["status"], + unique=False, + postgresql_where=sa.text("status = 'pending_title'"), + ) + op.create_table( + "title", + sa.Column( + "id", + sa.Uuid(), + server_default=sa.text("uuid_generate_v4()"), + nullable=False, + ), + sa.Column("name", sa.String(), nullable=False), + sa.Column("maturity", sa.String(), nullable=False), + sa.Column("events", postgresql.ARRAY(sa.String()), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_title")), + ) + op.create_index(op.f("ix_title_maturity"), "title", ["maturity"], unique=False) + op.create_index(op.f("ix_title_name"), "title", ["name"], unique=True) + op.create_table( + "warehouse", + sa.Column( + "id", + sa.Uuid(), + server_default=sa.text("uuid_generate_v4()"), + nullable=False, + ), + sa.Column("name", sa.String(), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_warehouse")), + ) + op.create_table( + "book_location", + sa.Column("book_id", sa.Uuid(), nullable=False), + sa.Column("warehouse_id", sa.Uuid(), nullable=False), + sa.Column("path", sa.String(), nullable=False), + sa.Column("status", sa.String(), nullable=False), + sa.Column("filename", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["book_id"], ["book.id"], name=op.f("fk_book_location_book_id_book") + ), + sa.ForeignKeyConstraint( + ["warehouse_id"], + ["warehouse.id"], + name=op.f("fk_book_location_warehouse_id_warehouse"), + ), + sa.PrimaryKeyConstraint( + "book_id", "warehouse_id", "path", "status", name=op.f("pk_book_location") + ), + ) + op.create_table( + "collection", + sa.Column( + "id", + sa.Uuid(), + server_default=sa.text("uuid_generate_v4()"), + nullable=False, + ), + sa.Column("name", sa.String(), nullable=False), + sa.Column("warehouse_id", sa.Uuid(), nullable=False), + sa.ForeignKeyConstraint( + ["warehouse_id"], + ["warehouse.id"], + name=op.f("fk_collection_warehouse_id_warehouse"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_collection")), + ) + op.create_index(op.f("ix_collection_name"), "collection", ["name"], unique=True) + op.create_table( + "zimfarm_notification", + sa.Column("id", sa.Uuid(), nullable=False), + sa.Column("received_at", sa.DateTime(), nullable=False), + sa.Column("content", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("status", sa.String(), server_default="pending", nullable=False), + sa.Column("events", postgresql.ARRAY(sa.String()), nullable=False), + sa.Column("book_id", sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint( + ["book_id"], ["book.id"], name=op.f("fk_zimfarm_notification_book_id_book") + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_zimfarm_notification")), + ) + op.create_index( + "idx_zimfarm_notification_status_bad_notification", + "zimfarm_notification", + ["status"], + unique=False, + postgresql_where=sa.text("status = 'bad_notification'"), + ) + op.create_index( + "idx_zimfarm_notification_status_pending", + "zimfarm_notification", + ["status"], + unique=False, + postgresql_where=sa.text("status = 'pending'"), + ) + op.create_table( + "collection_title", + sa.Column("title_id", sa.Uuid(), nullable=False), + sa.Column("collection_id", sa.Uuid(), nullable=False), + sa.Column("path", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["collection_id"], + ["collection.id"], + name=op.f("fk_collection_title_collection_id_collection"), + ), + sa.ForeignKeyConstraint( + ["title_id"], ["title.id"], name=op.f("fk_collection_title_title_id_title") + ), + sa.PrimaryKeyConstraint( + "title_id", "collection_id", name=op.f("pk_collection_title") + ), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("collection_title") + op.drop_index( + "idx_zimfarm_notification_status_pending", + table_name="zimfarm_notification", + postgresql_where=sa.text("status = 'pending'"), + ) + op.drop_index( + "idx_zimfarm_notification_status_bad_notification", + table_name="zimfarm_notification", + postgresql_where=sa.text("status = 'bad_notification'"), + ) + op.drop_table("zimfarm_notification") + op.drop_index(op.f("ix_collection_name"), table_name="collection") + op.drop_table("collection") + op.drop_table("book_location") + op.drop_table("warehouse") + op.drop_index(op.f("ix_title_name"), table_name="title") + op.drop_index(op.f("ix_title_maturity"), table_name="title") + op.drop_table("title") + op.drop_index( + "idx_book_status_pending_title", + table_name="book", + postgresql_where=sa.text("status = 'pending_title'"), + ) + op.drop_index( + "idx_book_status_pending_processing", + table_name="book", + postgresql_where=sa.text("status = 'pending_processing'"), + ) + op.drop_index( + "idx_book_status_pending_move", + table_name="book", + postgresql_where=sa.text("status = 'pending_move'"), + ) + op.drop_index( + "idx_book_status_errored", + table_name="book", + postgresql_where=sa.text("status = 'errored'"), + ) + op.drop_index( + "idx_book_status_bad_book", + table_name="book", + postgresql_where=sa.text("status = 'bad_book'"), + ) + op.drop_table("book") + # ### end Alembic commands ### diff --git a/backend/src/cms_backend/migrations/versions/6c6181d36517_add_support_for_flavours.py b/backend/src/cms_backend/migrations/versions/6c6181d36517_add_support_for_flavours.py deleted file mode 100644 index b93d89d..0000000 --- a/backend/src/cms_backend/migrations/versions/6c6181d36517_add_support_for_flavours.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Add support for flavours - -Revision ID: 6c6181d36517 -Revises: 40b58eace3fb -Create Date: 2025-10-24 14:29:19.514445 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "6c6181d36517" -down_revision = "40b58eace3fb" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint("fk_title_last_book_id_book", "title", type_="foreignkey") - op.drop_column("title", "last_book_id") - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "title", - sa.Column("last_book_id", sa.UUID(), autoincrement=False, nullable=True), - ) - op.create_foreign_key( - "fk_title_last_book_id_book", "title", "book", ["last_book_id"], ["id"] - ) - # ### end Alembic commands ### diff --git a/backend/src/cms_backend/migrations/versions/92d03596d8f7_title_name_is_unique.py b/backend/src/cms_backend/migrations/versions/92d03596d8f7_title_name_is_unique.py deleted file mode 100644 index c610e96..0000000 --- a/backend/src/cms_backend/migrations/versions/92d03596d8f7_title_name_is_unique.py +++ /dev/null @@ -1,90 +0,0 @@ -"""title name is unique - -Revision ID: 92d03596d8f7 -Revises: add_producer_fields -Create Date: 2025-11-07 14:25:11.815803 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "92d03596d8f7" -down_revision = "add_producer_fields" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index( - "idx_book_status_pending_notification", - table_name="book", - postgresql_where="((status)::text = 'pending_notification'::text)", - ) - op.create_index( - "idx_book_status_pending_processing", - "book", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'pending_processing'"), - ) - op.create_index(op.f("ix_title_name"), "title", ["name"], unique=True) - op.drop_index( - "ix_zimfarm_notification_status_pending_bad", - table_name="zimfarm_notification", - postgresql_where="((status)::text = ANY ((ARRAY['pending'::character varying, " - "'bad_notification'::character varying])::text[]))", - ) - op.create_index( - "idx_zimfarm_notification_status_bad_notification", - "zimfarm_notification", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'bad_notification'"), - ) - op.create_index( - "idx_zimfarm_notification_status_pending", - "zimfarm_notification", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'pending'"), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index( - "idx_zimfarm_notification_status_pending", - table_name="zimfarm_notification", - postgresql_where=sa.text("status = 'pending'"), - ) - op.drop_index( - "idx_zimfarm_notification_status_bad_notification", - table_name="zimfarm_notification", - postgresql_where=sa.text("status = 'bad_notification'"), - ) - op.create_index( - "ix_zimfarm_notification_status_pending_bad", - "zimfarm_notification", - ["status"], - unique=False, - postgresql_where="((status)::text = ANY ((ARRAY['pending'::character varying, " - "'bad_notification'::character varying])::text[]))", - ) - op.drop_index(op.f("ix_title_name"), table_name="title") - op.drop_index( - "idx_book_status_pending_processing", - table_name="book", - postgresql_where=sa.text("status = 'pending_processing'"), - ) - op.create_index( - "idx_book_status_pending_notification", - "book", - ["status"], - unique=False, - postgresql_where="((status)::text = 'pending_notification'::text)", - ) - # ### end Alembic commands ### diff --git a/backend/src/cms_backend/migrations/versions/a37d94d59b8b_add_more_columns.py b/backend/src/cms_backend/migrations/versions/a37d94d59b8b_add_more_columns.py deleted file mode 100644 index 4248d61..0000000 --- a/backend/src/cms_backend/migrations/versions/a37d94d59b8b_add_more_columns.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Add more columns - -Revision ID: a37d94d59b8b -Revises: e7b49a8907c1 -Create Date: 2025-10-24 09:36:18.311092 - -""" - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "a37d94d59b8b" -down_revision = "e7b49a8907c1" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "book", sa.Column("events", postgresql.ARRAY(sa.String()), nullable=False) - ) - op.add_column( - "title", sa.Column("events", postgresql.ARRAY(sa.String()), nullable=False) - ) - op.add_column( - "zimfarm_notification", - sa.Column("events", postgresql.ARRAY(sa.String()), nullable=False), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("zimfarm_notification", "events") - op.drop_column("title", "events") - op.drop_column("book", "events") - # ### end Alembic commands ### diff --git a/backend/src/cms_backend/migrations/versions/add_book_location_table.py b/backend/src/cms_backend/migrations/versions/add_book_location_table.py deleted file mode 100644 index f9cfe4d..0000000 --- a/backend/src/cms_backend/migrations/versions/add_book_location_table.py +++ /dev/null @@ -1,43 +0,0 @@ -"""Add book_location table to store current and target file locations - -Revision ID: add_book_location_table -Revises: title_warehouse_paths -Create Date: 2025-11-11 00:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects.postgresql import UUID - -# revision identifiers, used by Alembic. -revision = "add_book_location_table" -down_revision = "title_warehouse_paths" -branch_labels = None -depends_on = None - - -def upgrade(): - # Create the book_location table with composite primary key - op.create_table( - "book_location", - sa.Column("book_id", UUID(), nullable=False), - sa.Column("warehouse_path_id", UUID(), nullable=False), - sa.Column("status", sa.String(), nullable=False), - sa.Column("filename", sa.String(), nullable=False), - sa.ForeignKeyConstraint( - ["book_id"], ["book.id"], name="fk_book_location_book_id_book" - ), - sa.ForeignKeyConstraint( - ["warehouse_path_id"], - ["warehouse_path.id"], - name="fk_book_location_warehouse_path_id_warehouse_path", - ), - sa.PrimaryKeyConstraint( - "book_id", "warehouse_path_id", "status", name="pk_book_location" - ), - ) - - -def downgrade(): - op.drop_table("book_location") diff --git a/backend/src/cms_backend/migrations/versions/add_book_metadata_fields.py b/backend/src/cms_backend/migrations/versions/add_book_metadata_fields.py deleted file mode 100644 index 30bc11e..0000000 --- a/backend/src/cms_backend/migrations/versions/add_book_metadata_fields.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Add metadata fields to book table - -Revision ID: add_book_metadata_fields -Revises: make_warehouse_paths_required -Create Date: 2025-11-10 00:00:01.000000 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "add_book_metadata_fields" -down_revision = "make_warehouse_paths_required" -branch_labels = None -depends_on = None - - -def upgrade(): - # Add created_at field to book table (mandatory) - op.add_column( - "book", - sa.Column("created_at", sa.DateTime(), nullable=False), - ) - # Add metadata fields to book table (optional) - op.add_column( - "book", - sa.Column("name", sa.String(), nullable=True), - ) - op.add_column( - "book", - sa.Column("date", sa.String(), nullable=True), - ) - op.add_column( - "book", - sa.Column("flavour", sa.String(), nullable=True), - ) - - -def downgrade(): - # Drop metadata fields from book table - op.drop_column("book", "flavour") - op.drop_column("book", "date") - op.drop_column("book", "name") - op.drop_column("book", "created_at") diff --git a/backend/src/cms_backend/migrations/versions/add_library_tables.py b/backend/src/cms_backend/migrations/versions/add_library_tables.py deleted file mode 100644 index aeddc6a..0000000 --- a/backend/src/cms_backend/migrations/versions/add_library_tables.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Add library and library_warehouse_path tables - -Revision ID: add_library_tables -Revises: add_pending_move_index -Create Date: 2025-11-18 00:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects.postgresql import UUID - -# revision identifiers, used by Alembic. -revision = "add_library_tables" -down_revision = "add_pending_move_index" -branch_labels = None -depends_on = None - - -def upgrade(): - # Create library table - op.create_table( - "library", - sa.Column( - "id", UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False - ), - sa.Column("name", sa.String(), nullable=False), - sa.PrimaryKeyConstraint("id", name="pk_library"), - sa.UniqueConstraint("name", name="uq_library_name"), - ) - - # Create unique index on library name for fast lookups - op.create_index("ix_library_name", "library", ["name"], unique=True) - - # Create library_warehouse_path junction table - op.create_table( - "library_warehouse_path", - sa.Column("library_id", UUID(), nullable=False), - sa.Column("warehouse_path_id", UUID(), nullable=False), - sa.ForeignKeyConstraint( - ["library_id"], - ["library.id"], - name="fk_library_warehouse_path_library_id_library", - ), - sa.ForeignKeyConstraint( - ["warehouse_path_id"], - ["warehouse_path.id"], - name="fk_library_warehouse_path_warehouse_path_id_warehouse_path", - ), - sa.PrimaryKeyConstraint( - "library_id", "warehouse_path_id", name="pk_library_warehouse_path" - ), - ) - - -def downgrade(): - # Drop tables in reverse order due to foreign key constraints - op.drop_table("library_warehouse_path") - op.drop_table("library") diff --git a/backend/src/cms_backend/migrations/versions/add_pending_move_index.py b/backend/src/cms_backend/migrations/versions/add_pending_move_index.py deleted file mode 100644 index e9c2907..0000000 --- a/backend/src/cms_backend/migrations/versions/add_pending_move_index.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Add partial index for pending_move book status - -Revision ID: add_pending_move_index -Revises: add_book_location_table -Create Date: 2025-11-13 00:00:00.000000 - -""" - -from alembic import op - -# revision identifiers, used by Alembic. -revision = "add_pending_move_index" -down_revision = "add_book_location_table" -branch_labels = None -depends_on = None - - -def upgrade(): - # Create partial index for pending_move status - op.create_index( - "idx_book_status_pending_move", - "book", - ["status"], - postgresql_where="status = 'pending_move'", - ) - - -def downgrade(): - # Drop the partial index - op.drop_index("idx_book_status_pending_move", table_name="book") diff --git a/backend/src/cms_backend/migrations/versions/add_producer_fields.py b/backend/src/cms_backend/migrations/versions/add_producer_fields.py deleted file mode 100644 index 78a5436..0000000 --- a/backend/src/cms_backend/migrations/versions/add_producer_fields.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Add producer fields to book and title - -Revision ID: add_producer_fields -Revises: add_warehouse_paths_to_title -Create Date: 2025-11-07 00:00:01.000000 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "add_producer_fields" -down_revision = "add_warehouse_paths_to_title" -branch_labels = None -depends_on = None - - -def upgrade(): - # Add producer fields to book table (mandatory) - op.add_column( - "book", - sa.Column("producer_display_name", sa.String(), nullable=False), - ) - op.add_column( - "book", - sa.Column("producer_display_url", sa.String(), nullable=False), - ) - op.add_column( - "book", - sa.Column("producer_unique_id", sa.String(), nullable=False), - ) - - # Add producer fields to title table - op.add_column( - "title", - sa.Column("producer_display_name", sa.String(), nullable=True), - ) - op.add_column( - "title", - sa.Column("producer_display_url", sa.String(), nullable=True), - ) - op.add_column( - "title", - sa.Column("producer_unique_id", sa.String(), nullable=False), - ) - - -def downgrade(): - # Drop producer fields from title table - op.drop_column("title", "producer_unique_id") - op.drop_column("title", "producer_display_url") - op.drop_column("title", "producer_display_name") - - # Drop producer fields from book table - op.drop_column("book", "producer_unique_id") - op.drop_column("book", "producer_display_url") - op.drop_column("book", "producer_display_name") diff --git a/backend/src/cms_backend/migrations/versions/add_status_to_book.py b/backend/src/cms_backend/migrations/versions/add_status_to_book.py deleted file mode 100644 index 851a381..0000000 --- a/backend/src/cms_backend/migrations/versions/add_status_to_book.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Add status column to book - -Revision ID: add_status_to_book -Revises: add_status_to_notif -Create Date: 2025-11-03 00:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "add_status_to_book" -down_revision = "add_status_to_notif" -branch_labels = None -depends_on = None - - -def upgrade(): - # Add status column with default value 'pending' - op.add_column( - "book", - sa.Column("status", sa.String(), nullable=False, server_default="pending"), - ) - - # For existing books, set status based on title_id - # - If book has title_id, it was processed successfully -> "processed" - # - If book has no title_id, it's pending a title -> "pending_title" - conn = op.get_bind() - - # Set status to "processed" where title_id is not null - conn.execute( - sa.text("UPDATE book SET status = 'processed' WHERE title_id IS NOT NULL") - ) - - # Set status to "pending_title" where title_id is null - conn.execute( - sa.text("UPDATE book SET status = 'pending_title' WHERE title_id IS NULL") - ) - - # Create partial indexes for specific status values - # These indexes help efficiently query for books that need attention - op.create_index( - "idx_book_status_qa_failed", - "book", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'qa_failed'"), - ) - - op.create_index( - "idx_book_status_pending_title", - "book", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'pending_title'"), - ) - - op.create_index( - "idx_book_status_errored", - "book", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'errored'"), - ) - - -def downgrade(): - # Drop the partial indexes - op.drop_index("idx_book_status_errored", table_name="book") - op.drop_index("idx_book_status_pending_title", table_name="book") - op.drop_index("idx_book_status_qa_failed", table_name="book") - - # Drop status column - op.drop_column("book", "status") diff --git a/backend/src/cms_backend/migrations/versions/add_status_to_zimfarm_notification.py b/backend/src/cms_backend/migrations/versions/add_status_to_zimfarm_notification.py deleted file mode 100644 index 4fbbaa4..0000000 --- a/backend/src/cms_backend/migrations/versions/add_status_to_zimfarm_notification.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Add status column to zimfarm_notification - -Revision ID: add_status_to_notif -Revises: 6c6181d36517 -Create Date: 2025-11-03 00:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "add_status_to_notif" -down_revision = "6c6181d36517" -branch_labels = None -depends_on = None - - -def upgrade(): - # Add status column with default value 'pending' - op.add_column( - "zimfarm_notification", - sa.Column("status", sa.String(), nullable=False, server_default="pending"), - ) - - # Migrate existing data to status column based on processed/errored logic - # Status logic: - # - errored=true -> "errored" - # - processed=true, errored=false, book_id is null -> "bad_notification" - # - processed=true, errored=false, book_id is not null -> "processed" - # - processed=false -> "pending" - - conn = op.get_bind() - - # Set status to "errored" where errored=true - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET status = 'errored' WHERE errored = true" - ) - ) - - # Set status to "bad_notification" where processed=true, errored=false, book_id - # is null - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET status = 'bad_notification' " - "WHERE processed = true AND errored = false AND book_id IS NULL" - ) - ) - - # Set status to "processed" where processed=true, errored=false, book_id is not - # null - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET status = 'processed' " - "WHERE processed = true AND errored = false AND book_id IS NOT NULL" - ) - ) - - # Set status to "pending" where processed=false (should already be default, but - # explicit) - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET status = 'pending' WHERE processed = false" - ) - ) - - # Drop the old columns - op.drop_column("zimfarm_notification", "processed") - op.drop_column("zimfarm_notification", "errored") - - # Create partial indexes for pending and bad_notification status values - # These indexes help efficiently query for notifications that need attention - op.create_index( - "idx_zimfarm_notification_status_pending", - "zimfarm_notification", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'pending'"), - ) - - op.create_index( - "idx_zimfarm_notification_status_bad_notification", - "zimfarm_notification", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'bad_notification'"), - ) - - -def downgrade(): - # Drop the partial indexes - op.drop_index( - "idx_zimfarm_notification_status_bad_notification", - table_name="zimfarm_notification", - ) - op.drop_index( - "idx_zimfarm_notification_status_pending", - table_name="zimfarm_notification", - ) - - # Add back processed and errored columns - op.add_column( - "zimfarm_notification", - sa.Column("processed", sa.Boolean(), nullable=False, server_default=sa.false()), - ) - op.add_column( - "zimfarm_notification", - sa.Column("errored", sa.Boolean(), nullable=False, server_default=sa.false()), - ) - - # Migrate data back from status to processed/errored - conn = op.get_bind() - - # pending -> processed=false, errored=false - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET processed = false, errored = false " - "WHERE status = 'pending'" - ) - ) - - # errored -> processed=true, errored=true - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET processed = true, errored = true " - "WHERE status = 'errored'" - ) - ) - - # bad_notification -> processed=true, errored=false - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET processed = true, errored = false " - "WHERE status = 'bad_notification'" - ) - ) - - # processed -> processed=true, errored=false - conn.execute( - sa.text( - "UPDATE zimfarm_notification SET processed = true, errored = false " - "WHERE status = 'processed'" - ) - ) - - # Drop status column - op.drop_column("zimfarm_notification", "status") diff --git a/backend/src/cms_backend/migrations/versions/add_warehouse_paths_to_title.py b/backend/src/cms_backend/migrations/versions/add_warehouse_paths_to_title.py deleted file mode 100644 index f903bee..0000000 --- a/backend/src/cms_backend/migrations/versions/add_warehouse_paths_to_title.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Add warehouse paths and in_prod to title - -Revision ID: add_warehouse_paths_to_title -Revises: add_warehouse_tables -Create Date: 2025-11-07 00:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects.postgresql import UUID - -# revision identifiers, used by Alembic. -revision = "add_warehouse_paths_to_title" -down_revision = "add_warehouse_tables" -branch_labels = None -depends_on = None - - -def upgrade(): - # Add warehouse path references and in_prod flag to title table - op.add_column( - "title", - sa.Column("dev_warehouse_path_id", UUID(), nullable=True), - ) - op.add_column( - "title", - sa.Column("prod_warehouse_path_id", UUID(), nullable=True), - ) - op.add_column( - "title", - sa.Column( - "in_prod", - sa.Boolean(), - nullable=False, - server_default=sa.text("false"), - ), - ) - - # Add foreign key constraints - op.create_foreign_key( - "fk_title_dev_warehouse_path_id_warehouse_path", - "title", - "warehouse_path", - ["dev_warehouse_path_id"], - ["id"], - ) - op.create_foreign_key( - "fk_title_prod_warehouse_path_id_warehouse_path", - "title", - "warehouse_path", - ["prod_warehouse_path_id"], - ["id"], - ) - - -def downgrade(): - # Drop foreign key constraints first - op.drop_constraint( - "fk_title_prod_warehouse_path_id_warehouse_path", "title", type_="foreignkey" - ) - op.drop_constraint( - "fk_title_dev_warehouse_path_id_warehouse_path", "title", type_="foreignkey" - ) - - # Drop columns - op.drop_column("title", "in_prod") - op.drop_column("title", "prod_warehouse_path_id") - op.drop_column("title", "dev_warehouse_path_id") diff --git a/backend/src/cms_backend/migrations/versions/add_warehouse_tables.py b/backend/src/cms_backend/migrations/versions/add_warehouse_tables.py deleted file mode 100644 index 43e7a38..0000000 --- a/backend/src/cms_backend/migrations/versions/add_warehouse_tables.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Add warehouse and warehouse_path tables - -Revision ID: add_warehouse_tables -Revises: rename_book_pending -Create Date: 2025-11-03 02:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects.postgresql import JSONB, UUID - -# revision identifiers, used by Alembic. -revision = "add_warehouse_tables" -down_revision = "rename_book_pending" -branch_labels = None -depends_on = None - - -def upgrade(): - # Create warehouse table - op.create_table( - "warehouse", - sa.Column( - "id", UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False - ), - sa.Column("name", sa.String(), nullable=False), - sa.Column("configuration", JSONB(), nullable=False), - sa.PrimaryKeyConstraint("id", name="pk_warehouse"), - ) - - # Create warehouse_path table - op.create_table( - "warehouse_path", - sa.Column( - "id", UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False - ), - sa.Column("folder_name", sa.String(), nullable=False), - sa.Column("warehouse_id", UUID(), nullable=False), - sa.ForeignKeyConstraint( - ["warehouse_id"], - ["warehouse.id"], - name="fk_warehouse_path_warehouse_id_warehouse", - ), - sa.PrimaryKeyConstraint("id", name="pk_warehouse_path"), - ) - - -def downgrade(): - # Drop tables in reverse order due to foreign key constraint - op.drop_table("warehouse_path") - op.drop_table("warehouse") diff --git a/backend/src/cms_backend/migrations/versions/e7b49a8907c1_initial_database_schema.py b/backend/src/cms_backend/migrations/versions/e7b49a8907c1_initial_database_schema.py deleted file mode 100644 index 7235f0b..0000000 --- a/backend/src/cms_backend/migrations/versions/e7b49a8907c1_initial_database_schema.py +++ /dev/null @@ -1,104 +0,0 @@ -"""Initial database schema - -Revision ID: e7b49a8907c1 -Revises: -Create Date: 2025-10-23 13:59:04.894412 - -""" - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "e7b49a8907c1" -down_revision = None -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "book", - sa.Column("id", sa.Uuid(), nullable=False), - sa.Column("article_count", sa.Integer(), nullable=False), - sa.Column("media_count", sa.Integer(), nullable=False), - sa.Column("size", sa.Integer(), nullable=False), - sa.Column( - "zimcheck_result", postgresql.JSONB(astext_type=sa.Text()), nullable=False - ), - sa.Column( - "zim_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=False - ), - sa.Column("title_id", sa.Uuid(), nullable=True), - sa.PrimaryKeyConstraint("id", name=op.f("pk_book")), - ) - op.create_table( - "title", - sa.Column( - "id", - sa.Uuid(), - server_default=sa.text("uuid_generate_v4()"), - nullable=False, - ), - sa.Column("name", sa.String(), nullable=False), - sa.Column("last_book_id", sa.Uuid(), nullable=True), - sa.ForeignKeyConstraint( - ["last_book_id"], ["book.id"], name=op.f("fk_title_last_book_id_book") - ), - sa.PrimaryKeyConstraint("id", name=op.f("pk_title")), - ) - op.create_foreign_key( - "fk_book_title_id_title", "book", "title", ["title_id"], ["id"] - ) - op.create_table( - "zimfarm_notification", - sa.Column("id", sa.Uuid(), nullable=False), - sa.Column("received_at", sa.DateTime(), nullable=False), - sa.Column("content", postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.Column( - "processed", sa.Boolean(), server_default=sa.text("false"), nullable=False - ), - sa.Column( - "errored", sa.Boolean(), server_default=sa.text("false"), nullable=False - ), - sa.Column("book_id", sa.Uuid(), nullable=True), - sa.ForeignKeyConstraint( - ["book_id"], ["book.id"], name=op.f("fk_zimfarm_notification_book_id_book") - ), - sa.PrimaryKeyConstraint("id", name=op.f("pk_zimfarm_notification")), - ) - op.create_index( - "idx_zimfarm_notification_errored_false", - "zimfarm_notification", - ["errored"], - unique=False, - postgresql_where=sa.text("errored IS true"), - ) - op.create_index( - "idx_zimfarm_notification_processed_false", - "zimfarm_notification", - ["processed"], - unique=False, - postgresql_where=sa.text("processed IS false"), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index( - "idx_zimfarm_notification_processed_false", - table_name="zimfarm_notification", - postgresql_where=sa.text("processed IS false"), - ) - op.drop_index( - "idx_zimfarm_notification_errored_false", - table_name="zimfarm_notification", - postgresql_where=sa.text("errored IS true"), - ) - op.drop_table("zimfarm_notification") - op.drop_table("title") - op.drop_table("book") - # ### end Alembic commands ### diff --git a/backend/src/cms_backend/migrations/versions/make_warehouse_paths_required.py b/backend/src/cms_backend/migrations/versions/make_warehouse_paths_required.py deleted file mode 100644 index c443a4d..0000000 --- a/backend/src/cms_backend/migrations/versions/make_warehouse_paths_required.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Make warehouse paths required on title - -Revision ID: make_warehouse_paths_required -Revises: 92d03596d8f7 -Create Date: 2025-11-10 00:00:00.000000 - -""" - -from alembic import op -from sqlalchemy.dialects.postgresql import UUID - -# revision identifiers, used by Alembic. -revision = "make_warehouse_paths_required" -down_revision = "92d03596d8f7" -branch_labels = None -depends_on = None - - -def upgrade(): - # Make warehouse path columns non-nullable - op.alter_column( - "title", - "dev_warehouse_path_id", - existing_type=UUID(), - nullable=False, - ) - op.alter_column( - "title", - "prod_warehouse_path_id", - existing_type=UUID(), - nullable=False, - ) - - -def downgrade(): - # Revert warehouse path columns to nullable - op.alter_column( - "title", - "prod_warehouse_path_id", - existing_type=UUID(), - nullable=True, - ) - op.alter_column( - "title", - "dev_warehouse_path_id", - existing_type=UUID(), - nullable=True, - ) diff --git a/backend/src/cms_backend/migrations/versions/rename_book_pending_status.py b/backend/src/cms_backend/migrations/versions/rename_book_pending_status.py deleted file mode 100644 index 8cac6bd..0000000 --- a/backend/src/cms_backend/migrations/versions/rename_book_pending_status.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Rename book pending status to pending_processing - -Revision ID: rename_book_pending -Revises: add_status_to_book -Create Date: 2025-11-03 01:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "rename_book_pending" -down_revision = "add_status_to_book" -branch_labels = None -depends_on = None - - -def upgrade(): - # Update existing books with status='pending' to status='pending_processing' - conn = op.get_bind() - conn.execute( - sa.text( - "UPDATE book SET status = 'pending_processing' WHERE status = 'pending'" - ) - ) - - # Update the default value for the status column - op.alter_column( - "book", - "status", - server_default="pending_processing", - existing_type=sa.String(), - existing_nullable=False, - ) - - # Create partial index for pending_processing status - op.create_index( - "idx_book_status_pending_processing", - "book", - ["status"], - unique=False, - postgresql_where=sa.text("status = 'pending_processing'"), - ) - - -def downgrade(): - # Drop the partial index - op.drop_index( - "idx_book_status_pending_processing", - table_name="book", - ) - - # Restore the old default value - op.alter_column( - "book", - "status", - server_default="pending", - existing_type=sa.String(), - existing_nullable=False, - ) - - # Update existing books with status='pending_processing' back to status='pending' - conn = op.get_bind() - conn.execute( - sa.text( - "UPDATE book SET status = 'pending' WHERE status = 'pending_processing'" - ) - ) diff --git a/backend/src/cms_backend/migrations/versions/title_warehouse_paths.py b/backend/src/cms_backend/migrations/versions/title_warehouse_paths.py deleted file mode 100644 index 72f29b4..0000000 --- a/backend/src/cms_backend/migrations/versions/title_warehouse_paths.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Add title_warehouse_path junction table to support multiple warehouse paths - -Revision ID: title_warehouse_paths -Revises: add_book_metadata_fields -Create Date: 2025-11-11 00:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects.postgresql import UUID - -# revision identifiers, used by Alembic. -revision = "title_warehouse_paths" -down_revision = "add_book_metadata_fields" -branch_labels = None -depends_on = None - - -def upgrade(): - # Create the junction table with composite primary key - op.create_table( - "title_warehouse_path", - sa.Column("title_id", UUID(), nullable=False), - sa.Column("warehouse_path_id", UUID(), nullable=False), - sa.Column("path_type", sa.String(), nullable=False), - sa.ForeignKeyConstraint( - ["title_id"], ["title.id"], name="fk_title_warehouse_path_title_id_title" - ), - sa.ForeignKeyConstraint( - ["warehouse_path_id"], - ["warehouse_path.id"], - name="fk_title_warehouse_path_warehouse_path_id_warehouse_path", - ), - sa.PrimaryKeyConstraint( - "title_id", "warehouse_path_id", "path_type", name="pk_title_warehouse_path" - ), - ) - - # Migrate data from old columns to junction table - op.execute( - """ - INSERT INTO title_warehouse_path (title_id, warehouse_path_id, path_type) - SELECT id, dev_warehouse_path_id, 'dev' FROM title - WHERE dev_warehouse_path_id IS NOT NULL - """ - ) - - op.execute( - """ - INSERT INTO title_warehouse_path (title_id, warehouse_path_id, path_type) - SELECT id, prod_warehouse_path_id, 'prod' FROM title - WHERE prod_warehouse_path_id IS NOT NULL - """ - ) - - # Drop the old columns and foreign keys - op.drop_constraint( - "fk_title_prod_warehouse_path_id_warehouse_path", "title", type_="foreignkey" - ) - op.drop_constraint( - "fk_title_dev_warehouse_path_id_warehouse_path", "title", type_="foreignkey" - ) - - op.drop_column("title", "dev_warehouse_path_id") - op.drop_column("title", "prod_warehouse_path_id") - - -def downgrade(): - # Add back the old columns - op.add_column( - "title", - sa.Column("dev_warehouse_path_id", UUID(), nullable=True), - ) - op.add_column( - "title", - sa.Column("prod_warehouse_path_id", UUID(), nullable=True), - ) - - # Migrate data back from junction table to old columns - op.execute( - """ - UPDATE title - SET dev_warehouse_path_id = ( - SELECT warehouse_path_id FROM title_warehouse_path - WHERE title_warehouse_path.title_id = title.id AND path_type = 'dev' - LIMIT 1 - ) - WHERE EXISTS ( - SELECT 1 FROM title_warehouse_path - WHERE title_warehouse_path.title_id = title.id AND path_type = 'dev' - ) - """ - ) - - op.execute( - """ - UPDATE title - SET prod_warehouse_path_id = ( - SELECT warehouse_path_id FROM title_warehouse_path - WHERE title_warehouse_path.title_id = title.id AND path_type = 'prod' - LIMIT 1 - ) - WHERE EXISTS ( - SELECT 1 FROM title_warehouse_path - WHERE title_warehouse_path.title_id = title.id AND path_type = 'prod' - ) - """ - ) - - # Add back foreign key constraints - op.create_foreign_key( - "fk_title_dev_warehouse_path_id_warehouse_path", - "title", - "warehouse_path", - ["dev_warehouse_path_id"], - ["id"], - ) - op.create_foreign_key( - "fk_title_prod_warehouse_path_id_warehouse_path", - "title", - "warehouse_path", - ["prod_warehouse_path_id"], - ["id"], - ) - - # Drop the junction table - op.drop_table("title_warehouse_path") diff --git a/backend/src/cms_backend/mill/context.py b/backend/src/cms_backend/mill/context.py index 683b4ca..858923d 100644 --- a/backend/src/cms_backend/mill/context.py +++ b/backend/src/cms_backend/mill/context.py @@ -1,14 +1,18 @@ -import dataclasses import os +from dataclasses import dataclass, field from datetime import timedelta +from pathlib import Path from typing import TypeVar +from uuid import UUID from humanfriendly import parse_timespan +from cms_backend.context import get_mandatory_env + T = TypeVar("T") -@dataclasses.dataclass(kw_only=True) +@dataclass(kw_only=True) class Context: """Class holding every contextual / configuration bits which can be moved @@ -25,3 +29,13 @@ class Context: os.getenv("PROCESS_ZIMFARM_NOTIFICATIONS_INTERVAL", default="1m") ) ) + + jail_warehouse_id: UUID = field( + default=UUID(get_mandatory_env("JAIL_WAREHOUSE_ID")) + ) + jail_base_path: Path = field(default=Path(os.getenv("JAIL_BASE_PATH", ""))) + + staging_warehouse_id: UUID = field( + default=UUID(get_mandatory_env("STAGING_WAREHOUSE_ID")) + ) + staging_base_path: Path = field(default=Path(os.getenv("STAGING_BASE_PATH", ""))) diff --git a/backend/src/cms_backend/mill/process_zimfarm_notifications.py b/backend/src/cms_backend/mill/process_zimfarm_notifications.py index 5c16404..de32f4a 100644 --- a/backend/src/cms_backend/mill/process_zimfarm_notifications.py +++ b/backend/src/cms_backend/mill/process_zimfarm_notifications.py @@ -2,7 +2,7 @@ from cms_backend import logger from cms_backend.db.zimfarm_notification import get_next_notification_to_process_or_none -from cms_backend.processors.zimfarm_notification import process_notification +from cms_backend.mill.processors.zimfarm_notification import process_notification def process_zimfarm_notifications(session: OrmSession): diff --git a/backend/src/cms_backend/mill/processors/book.py b/backend/src/cms_backend/mill/processors/book.py new file mode 100644 index 0000000..557553a --- /dev/null +++ b/backend/src/cms_backend/mill/processors/book.py @@ -0,0 +1,85 @@ +from sqlalchemy.orm import Session as ORMSession + +from cms_backend import logger +from cms_backend.db.models import Book, Title +from cms_backend.db.title import get_title_by_name_or_none +from cms_backend.mill.processors.title import add_book_to_title +from cms_backend.utils.datetime import getnow + + +def process_book(session: ORMSession, book: Book): + if not check_book_zim_spec(book): + return + + title = get_matching_title(session, book) + + if not title: + return + + add_book_to_title(session, book, title) + + +def check_book_zim_spec(book: Book) -> bool: + try: + missing_metadata_keys = [ + key + for key in sorted( + [ + "Name", + "Title", + "Creator", + "Publisher", + "Date", + "Description", + "Language", + ] + ) + if key not in book.zim_metadata or not book.zim_metadata.get(key) + ] + + if missing_metadata_keys: + book.events.append( + f"{getnow()}: book is missing mandatory metadata: " + f"{','.join(missing_metadata_keys)}" + ) + book.status = "bad_book" + return False + + book.events.append(f"{getnow()}: book passed ZIM specification checks") + return True + + except Exception as exc: + book.events.append( + f"{getnow()}: error encountered while checking ZIM specification\n{exc}" + ) + logger.exception(f"Failed to check ZIM specification for book {book.id}") + book.status = "errored" + return False + + +def get_matching_title(session: ORMSession, book: Book) -> Title | None: + try: + if not book.name: + book.events.append( + f"{getnow()}: no title can be found because name is missing" + ) + book.status = "bad_book" + return None + + title = get_title_by_name_or_none(session, name=book.name) + + if not title: + book.events.append(f"{getnow()}: no matching title found for book") + book.status = "pending_title" + return None + + book.events.append(f"{getnow()}: found matching title {title.id}") + return title + + except Exception as exc: + book.events.append( + f"{getnow()}: error encountered while get matching title\n{exc}" + ) + logger.exception(f"Failed to get matching title for {book.id}") + book.status = "errored" + return None diff --git a/backend/src/cms_backend/mill/processors/title.py b/backend/src/cms_backend/mill/processors/title.py new file mode 100644 index 0000000..ed84a8f --- /dev/null +++ b/backend/src/cms_backend/mill/processors/title.py @@ -0,0 +1,164 @@ +from dataclasses import dataclass +from pathlib import Path +from uuid import UUID + +from sqlalchemy.orm import Session as OrmSession + +from cms_backend import logger +from cms_backend.db.book import create_book_location +from cms_backend.db.models import Book, Title +from cms_backend.mill.context import Context as MillContext +from cms_backend.utils.datetime import getnow +from cms_backend.utils.filename import compute_target_filename + + +@dataclass(eq=True, frozen=True) +class FileLocation: + warehouse_id: UUID + path: Path + filename: str + + +def add_book_to_title(session: OrmSession, book: Book, title: Title): + try: + # Retrieve name from book.name directly + if not book.name: + raise Exception("book name is missing or invalid") + + # Validate book.date is also present and valid + if not book.date: + raise Exception("book date is missing or invalid") + + title.books.append(book) + book.events.append(f"{getnow()}: book added to title {title.id}") + title.events.append(f"{getnow()}: book {book.id} added to title") + + # Update title name should it have changed (e.g. stackexchange domain updated + # leading to ZIM name automatically updated as well) + if title.name != book.name: + title.events.append(f"{getnow()}: updating title name to {book.name}") + title.name = book.name + + # Compute target filename once for this book + target_filename = compute_target_filename( + session, + name=book.name, + flavour=book.flavour, + date=book.date, + book_id=book.id, + ) + + # For now, only 'robust' maturity move straight to prod, + # other maturity moves through staging first + target_locations = ( + [ + FileLocation( + MillContext.staging_warehouse_id, + MillContext.staging_base_path, + target_filename, + ) + ] + if title.maturity != "robust" + else [ + FileLocation(tc.collection.warehouse_id, tc.path, target_filename) + for tc in title.collections + ] + ) + + # Create target locations if not already at expected locations + create_book_target_locations( + session=session, + book=book, + target_locations=target_locations, + ) + + except Exception as exc: + book.events.append( + f"{getnow()}: error encountered while adding to title {title.id}\n{exc}" + ) + title.events.append( + f"{getnow()}: error encountered while adding book {book.id}\n{exc}" + ) + book.status = "errored" + logger.exception(f"Failed to add book {book.id} to title {title.id}") + + +def _current_locations_match_targets( + book: Book, + target_locations: list[FileLocation], +) -> bool: + """Check if book's current locations exactly match the target locations. + + Args: + book: The book to check + target_locations: List of file locations representing target locations + + Returns: + True if the set of current locations is strictly identical to target locations + """ + # Extract current locations as set of (warehouse_id, path, filename) tuples + current_set = { + FileLocation( + warehouse_id=loc.warehouse_id, path=loc.path, filename=loc.filename + ) + for loc in book.locations + if loc.status == "current" + } + + # Convert target list to set + target_set = set(target_locations) + + # Must be strictly identical + return current_set == target_set + + +def create_book_target_locations( + session: OrmSession, + book: Book, + target_locations: list[FileLocation], +) -> None: + """Create target locations for a book if not already at expected locations. + + Computes target locations based on the provided warehouse paths and filename, + then checks if the book's current locations already match. If they do, no new + target locations are created. Otherwise, target locations are created for each + warehouse path. + + Args: + session: SQLAlchemy session + book: Book to create target locations for + target_locations: List of FileLocation where the book should be + + Side effects: + - Adds event to book if targets already match current locations + - Creates BookLocation records if targets don't match current locations + """ + + if not book.name: + raise Exception("book name is missing or invalid") + + if not book.date: + raise Exception("book date is missing or invalid") + + # Check if current locations already match targets exactly + if _current_locations_match_targets(book, target_locations): + # Book is already at all expected locations - skip creating targets + book.events.append( + f"{getnow()}: book already at all target locations, skipping target " + "creation" + ) + book.status = "published" + return + + # Create target locations for each applicable warehouse path + for target_location in target_locations: + create_book_location( + session=session, + book=book, + warehouse_id=target_location.warehouse_id, + path=target_location.path, + filename=target_location.filename, + status="target", + ) + + book.status = "pending_move" diff --git a/backend/src/cms_backend/processors/zimfarm_notification.py b/backend/src/cms_backend/mill/processors/zimfarm_notification.py similarity index 54% rename from backend/src/cms_backend/processors/zimfarm_notification.py rename to backend/src/cms_backend/mill/processors/zimfarm_notification.py index 12e40cd..7994925 100644 --- a/backend/src/cms_backend/processors/zimfarm_notification.py +++ b/backend/src/cms_backend/mill/processors/zimfarm_notification.py @@ -1,13 +1,10 @@ -from typing import cast - -from sqlalchemy import select from sqlalchemy.orm import Session as ORMSession from cms_backend import logger from cms_backend.db.book import create_book, create_book_location -from cms_backend.db.models import Warehouse, WarehousePath, ZimfarmNotification -from cms_backend.processors.book import check_book_qa, get_matching_title -from cms_backend.processors.title import add_book_to_title +from cms_backend.db.models import ZimfarmNotification +from cms_backend.mill.context import Context as MillContext +from cms_backend.mill.processors.book import process_book from cms_backend.utils.datetime import getnow @@ -16,8 +13,9 @@ def process_notification(session: ORMSession, notification: ZimfarmNotification) - check all mandatory fields are present in notification - create a book - - check book for QA rules + - check book matches ZIM specification requirements - associate book with matching title if it already exists + - move book from jail to staging """ try: missing_notification_keys = [ @@ -28,10 +26,8 @@ def process_notification(session: ORMSession, notification: ZimfarmNotification) "size", "metadata", "zimcheck", - "warehouse_name", "folder_name", "filename", - "producer", ] if key not in notification.content ] @@ -44,34 +40,6 @@ def process_notification(session: ORMSession, notification: ZimfarmNotification) notification.status = "bad_notification" return - # Validate producer information - producer = notification.content.get("producer") - if not isinstance(producer, dict) or not all( - isinstance(k, str) and isinstance(v, str) - for k, v in producer.items() # pyright: ignore[reportUnknownVariableType] - ): - notification.events.append(f"{getnow()}: producer must be a dict[str, str]") - notification.status = "bad_notification" - return - else: - producer = cast(dict[str, str], producer) - - missing_producer_keys = [ - key - for key in ["displayName", "displayUrl", "uniqueId"] - if key not in producer - ] - - if missing_producer_keys: - notification.events.append( - f"{getnow()}: producer is missing mandatory keys: " - f"{','.join(missing_producer_keys)}" - ) - notification.status = "bad_notification" - return - - # Look up warehouse path by warehouse_name and folder_name - warehouse_name = notification.content.get("warehouse_name") folder_name = notification.content.get("folder_name") filename = notification.content.get("filename") @@ -84,19 +52,11 @@ def process_notification(session: ORMSession, notification: ZimfarmNotification) notification.status = "bad_notification" return - stmt = ( - select(WarehousePath) - .join(Warehouse) - .where( - Warehouse.name == warehouse_name, - WarehousePath.folder_name == folder_name, - ) - ) - warehouse_path = session.scalars(stmt).one_or_none() - - if not warehouse_path: + # Validate folder_name is a non-empty string + if not isinstance(folder_name, str) or not folder_name: notification.events.append( - f"{getnow()}: warehouse path not found: {warehouse_name}/{folder_name}" + f"{getnow()}: folder_name must be a non-empty string, got " + f"{type(folder_name).__name__}: {folder_name}" ) notification.status = "bad_notification" return @@ -110,31 +70,22 @@ def process_notification(session: ORMSession, notification: ZimfarmNotification) zim_metadata=notification.content["metadata"], zimcheck_result=notification.content["zimcheck"], zimfarm_notification=notification, - producer_display_name=producer["displayName"], - producer_display_url=producer["displayUrl"], - producer_unique_id=producer["uniqueId"], ) # Create current book location create_book_location( session=session, book=book, - warehouse_path_id=warehouse_path.id, + warehouse_id=MillContext.jail_warehouse_id, + path=MillContext.jail_base_path / folder_name, filename=filename, status="current", ) notification.status = "processed" - if not check_book_qa(book): - return - - title = get_matching_title(session, book) - - if not title: - return - - add_book_to_title(session, book, title) + # Try to move book to staging + process_book(session, book) except Exception as exc: notification.events.append( diff --git a/backend/src/cms_backend/processors/book.py b/backend/src/cms_backend/processors/book.py deleted file mode 100644 index baf93d6..0000000 --- a/backend/src/cms_backend/processors/book.py +++ /dev/null @@ -1,173 +0,0 @@ -from uuid import UUID - -from sqlalchemy.orm import Session as ORMSession - -from cms_backend import logger -from cms_backend.db.book import create_book_location -from cms_backend.db.models import Book, Title, TitleWarehousePath -from cms_backend.db.title import get_title_by_name_and_producer_or_none -from cms_backend.utils.datetime import getnow -from cms_backend.utils.filename import compute_target_filename - - -def check_book_qa(book: Book) -> bool: - try: - missing_metadata_keys = [ - key - for key in sorted( - [ - "Name", - "Title", - "Creator", - "Publisher", - "Date", - "Description", - "Language", - ] - ) - if key not in book.zim_metadata - ] - - if missing_metadata_keys: - book.events.append( - f"{getnow()}: book is missing mandatory metadata: " - f"{','.join(missing_metadata_keys)}" - ) - book.status = "qa_failed" - return False - - book.events.append(f"{getnow()}: book passed QA checks") - return True - - except Exception as exc: - book.events.append( - f"{getnow()}: error encountered while checking book QA\n{exc}" - ) - logger.exception(f"Failed to check book QA for {book.id}") - book.status = "errored" - return False - - -def get_matching_title(session: ORMSession, book: Book) -> Title | None: - try: - if not book.name: - book.events.append( - f"{getnow()}: no title can be found because name is missing" - ) - book.status = "qa_failed" - return None - - title = get_title_by_name_and_producer_or_none( - session, name=book.name, producer_unique_id=book.producer_unique_id - ) - - if not title: - book.events.append(f"{getnow()}: no matching title found for book") - book.status = "pending_title" - return None - - book.events.append(f"{getnow()}: found matching title {title.id}") - return title - - except Exception as exc: - book.events.append( - f"{getnow()}: error encountered while get matching title\n{exc}" - ) - logger.exception(f"Failed to get matching title for {book.id}") - book.status = "errored" - return None - - -def _current_locations_match_targets( - book: Book, - target_locations: list[tuple[UUID, str]], -) -> bool: - """Check if book's current locations exactly match the target locations. - - Args: - book: The book to check - target_locations: List of (warehouse_path_id, filename) tuples representing - target locations - - Returns: - True if the set of current locations is strictly identical to target locations - """ - # Extract current locations as set of (warehouse_path_id, filename) tuples - current_set = { - (loc.warehouse_path_id, loc.filename) - for loc in book.locations - if loc.status == "current" - } - - # Convert target list to set - target_set = set(target_locations) - - # Must be strictly identical - return current_set == target_set - - -def create_book_target_locations( - session: ORMSession, - book: Book, - target_warehouse_paths: list[TitleWarehousePath], -) -> None: - """Create target locations for a book if not already at expected locations. - - Computes target locations based on the provided warehouse paths and filename, - then checks if the book's current locations already match. If they do, no new - target locations are created. Otherwise, target locations are created for each - warehouse path. - - Args: - session: SQLAlchemy session - book: Book to create target locations for - target_warehouse_paths: List of TitleWarehousePath objects defining where the - book should be - - Side effects: - - Adds event to book if targets already match current locations - - Creates BookLocation records if targets don't match current locations - """ - - if not book.name: - raise Exception("book name is missing or invalid") - - if not book.date: - raise Exception("book date is missing or invalid") - - # Compute target filename once for this book - target_filename = compute_target_filename( - session, - name=book.name, - flavour=book.flavour, - date=book.date, - book_id=book.id, - ) - - # Compute all target locations as (warehouse_path_id, filename) tuples - target_locations = [ - (title_warehouse_path.warehouse_path_id, target_filename) - for title_warehouse_path in target_warehouse_paths - ] - - # Check if current locations already match targets exactly - if _current_locations_match_targets(book, target_locations): - # Book is already at all expected locations - skip creating targets - book.events.append( - f"{getnow()}: book already at all target locations, skipping target " - "creation" - ) - book.status = "published" - return - - # Create target locations for each applicable warehouse path - for title_warehouse_path in target_warehouse_paths: - create_book_location( - session=session, - book=book, - warehouse_path_id=title_warehouse_path.warehouse_path_id, - filename=target_filename, - status="target", - ) - - book.status = "pending_move" diff --git a/backend/src/cms_backend/processors/title.py b/backend/src/cms_backend/processors/title.py deleted file mode 100644 index 87536d1..0000000 --- a/backend/src/cms_backend/processors/title.py +++ /dev/null @@ -1,68 +0,0 @@ -from sqlalchemy import select -from sqlalchemy.orm import Session as OrmSession - -from cms_backend import logger -from cms_backend.db.models import Book, Title, TitleWarehousePath -from cms_backend.processors.book import create_book_target_locations -from cms_backend.utils.datetime import getnow - - -def add_book_to_title(session: OrmSession, book: Book, title: Title): - try: - # Retrieve name from book.name directly - if not book.name: - raise Exception("book name is missing or invalid") - - # Validate book.date is also present and valid - if not book.date: - raise Exception("book date is missing or invalid") - - title.books.append(book) - book.events.append(f"{getnow()}: book added to title {title.id}") - title.events.append(f"{getnow()}: book {book.id} added to title") - - if title.name != book.name: - title.events.append(f"{getnow()}: updating title name to {book.name}") - title.name = book.name - - # Update title producer display fields from book - if title.producer_display_name != book.producer_display_name: - title.events.append( - f"{getnow()}: updating title producer_display_name to " - f"{book.producer_display_name}" - ) - title.producer_display_name = book.producer_display_name - - if title.producer_display_url != book.producer_display_url: - title.events.append( - f"{getnow()}: updating title producer_display_url to " - f"{book.producer_display_url}" - ) - title.producer_display_url = book.producer_display_url - - # Determine which warehouse paths to use based on title.in_prod - path_type = "prod" if title.in_prod else "dev" - - # Get all warehouse paths for this title and path_type - stmt = select(TitleWarehousePath).where( - TitleWarehousePath.title_id == title.id, - TitleWarehousePath.path_type == path_type, - ) - target_warehouse_paths = session.scalars(stmt).all() - - # Create target locations if not already at expected locations - create_book_target_locations( - session=session, - book=book, - target_warehouse_paths=list(target_warehouse_paths), - ) - - except Exception as exc: - book.events.append( - f"{getnow()}: error encountered while adding to title {title.id}\n{exc}" - ) - title.events.append( - f"{getnow()}: error encountered while adding book {book.id}\n{exc}" - ) - book.status = "errored" - logger.exception(f"Failed to add book {book.id} to title {title.id}") diff --git a/backend/src/cms_backend/schemas/orms.py b/backend/src/cms_backend/schemas/orms.py index 568e3c7..49ecfd8 100644 --- a/backend/src/cms_backend/schemas/orms.py +++ b/backend/src/cms_backend/schemas/orms.py @@ -19,19 +19,13 @@ class TitleLightSchema(BaseModel): id: UUID name: str - producer_unique_id: str - producer_display_name: str | None - producer_display_url: str | None + maturity: str -class WarehousePathInfoSchema(BaseModel): - """ - Schema for warehouse path information (with warehouse details) - """ - - path_id: UUID - folder_name: str - warehouse_name: str +class TitleCollectionSchema(BaseModel): + collection_id: UUID + collection_name: str + path: str class TitleFullSchema(TitleLightSchema): @@ -39,11 +33,9 @@ class TitleFullSchema(TitleLightSchema): Schema for reading a title model with all fields including books """ - dev_warehouse_paths: list[WarehousePathInfoSchema] - prod_warehouse_paths: list[WarehousePathInfoSchema] - in_prod: bool events: list[str] books: list["BookLightSchema"] + collections: list["TitleCollectionSchema"] class ZimfarmNotificationLightSchema(BaseModel): @@ -62,24 +54,13 @@ class ZimfarmNotificationFullSchema(ZimfarmNotificationLightSchema): events: list[str] -class ProducerSchema(BaseModel): - """ - Schema for producer information - """ - - display_name: str - display_url: str - unique_id: str - - class BookLocationSchema(BaseModel): """ Schema for book location information """ - warehouse_path_id: UUID warehouse_name: str - folder_name: str + path: str filename: str status: str # 'current' or 'target' @@ -105,7 +86,6 @@ class BookFullSchema(BookLightSchema): zimcheck_result: dict[str, Any] zim_metadata: dict[str, Any] events: list[str] - producer: ProducerSchema current_locations: list[BookLocationSchema] target_locations: list[BookLocationSchema] diff --git a/backend/src/cms_backend/shuttle/context.py b/backend/src/cms_backend/shuttle/context.py index 40d42a4..77ce30c 100644 --- a/backend/src/cms_backend/shuttle/context.py +++ b/backend/src/cms_backend/shuttle/context.py @@ -1,6 +1,7 @@ import os from dataclasses import dataclass from datetime import timedelta +from pathlib import Path from typing import ClassVar from uuid import UUID @@ -10,12 +11,12 @@ LocalWarehousePath = str -def _parse_local_warehouse_paths() -> dict[UUID, str]: +def _parse_local_warehouse_paths() -> dict[UUID, Path]: env_value = os.getenv("LOCAL_WAREHOUSE_PATHS", default="") if not env_value: return {} return { - UUID(warehouse_id): local_path + UUID(warehouse_id): Path(local_path) for item in env_value.split(",") if item for (warehouse_id, local_path) in [item.split(":", 1)] @@ -38,4 +39,4 @@ class Context: seconds=parse_timespan(os.getenv("MOVE_FILES_INTERVAL", default="1m")) ) - local_warehouse_paths: ClassVar[dict[UUID, str]] = _parse_local_warehouse_paths() + local_warehouse_paths: ClassVar[dict[UUID, Path]] = _parse_local_warehouse_paths() diff --git a/backend/src/cms_backend/shuttle/move_files.py b/backend/src/cms_backend/shuttle/move_files.py index 7a28d5a..d9ea70b 100644 --- a/backend/src/cms_backend/shuttle/move_files.py +++ b/backend/src/cms_backend/shuttle/move_files.py @@ -26,10 +26,9 @@ def move_files(session: OrmSession): def move_book_files(session: OrmSession, book: Book): inaccessible_warehouse_names = { - loc.warehouse_path.warehouse.name + loc.warehouse.name for loc in book.locations - if loc.warehouse_path.warehouse_id - not in ShuttleContext.local_warehouse_paths.keys() + if loc.warehouse_id not in ShuttleContext.local_warehouse_paths.keys() } # if any warehouse is not accessible, we do not proceed (complex scenarii not yet diff --git a/backend/tests/api/routes/test_books.py b/backend/tests/api/routes/test_books.py index 171b93b..ca440a6 100644 --- a/backend/tests/api/routes/test_books.py +++ b/backend/tests/api/routes/test_books.py @@ -222,14 +222,7 @@ def test_get_book_by_id( assert response_doc["zim_metadata"] == book.zim_metadata assert "events" in response_doc assert response_doc["events"] == book.events - assert "producer" in response_doc - assert isinstance(response_doc["producer"], dict) - assert "display_name" in response_doc["producer"] - assert response_doc["producer"]["display_name"] == book.producer_display_name - assert "display_url" in response_doc["producer"] - assert response_doc["producer"]["display_url"] == book.producer_display_url - assert "unique_id" in response_doc["producer"] - assert response_doc["producer"]["unique_id"] == book.producer_unique_id + # Note: producer fields are no longer part of the Book model def test_get_book_by_id_not_found( diff --git a/backend/tests/api/routes/test_library.py b/backend/tests/api/routes/test_library.py index 3ff0b4e..8d28ee1 100644 --- a/backend/tests/api/routes/test_library.py +++ b/backend/tests/api/routes/test_library.py @@ -7,16 +7,24 @@ from fastapi.testclient import TestClient from sqlalchemy.orm import Session as OrmSession -from cms_backend.db.models import Book, BookLocation, Library, WarehousePath +from cms_backend.db.models import ( + Book, + BookLocation, + Collection, + CollectionTitle, + Title, + Warehouse, +) from cms_backend.utils.datetime import getnow -def test_get_library_catalog_xml_not_found_by_id( +def test_get_collection_catalog_xml_not_found_by_id( client: TestClient, ): - """Test that requesting a non-existent library by ID returns 404 with empty XML""" - non_existent_library_id = uuid4() - response = client.get(f"/v1/libraries/{non_existent_library_id}/catalog.xml") + """Test that requesting a non-existent collection by ID returns 404 + with empty XML""" + non_existent_collection_id = uuid4() + response = client.get(f"/v1/collections/{non_existent_collection_id}/catalog.xml") assert response.status_code == HTTPStatus.NOT_FOUND assert response.headers["content-type"] == "application/xml" # Should return valid empty XML @@ -26,11 +34,12 @@ def test_get_library_catalog_xml_not_found_by_id( assert len(list(root)) == 0 -def test_get_library_catalog_xml_not_found_by_name( +def test_get_collection_catalog_xml_not_found_by_name( client: TestClient, ): - """Test that requesting a non-existent library by name returns 404 with empty XML""" - response = client.get("/v1/libraries/nonexistent_library/catalog.xml") + """Test that requesting a non-existent collection by name returns 404 + with empty XML""" + response = client.get("/v1/collections/nonexistent_collection/catalog.xml") assert response.status_code == HTTPStatus.NOT_FOUND assert response.headers["content-type"] == "application/xml" # Should return valid empty XML @@ -40,14 +49,14 @@ def test_get_library_catalog_xml_not_found_by_name( assert len(list(root)) == 0 -def test_get_library_catalog_xml_empty( +def test_get_collection_catalog_xml_empty( client: TestClient, - create_library: Callable[..., Library], + create_collection: Callable[..., Collection], ): - """Test that an empty library returns valid XML with no books""" - library = create_library(name="empty_library") + """Test that an empty collection returns valid XML with no books""" + collection = create_collection(name="empty_collection") - response = client.get(f"/v1/libraries/{library.id}/catalog.xml") + response = client.get(f"/v1/collections/{collection.id}/catalog.xml") assert response.status_code == HTTPStatus.OK assert response.headers["content-type"] == "application/xml" @@ -57,22 +66,42 @@ def test_get_library_catalog_xml_empty( assert len(list(root)) == 0 -def test_get_library_catalog_xml_by_name( +def _add_title_to_collection( + dbsession: OrmSession, + collection: Collection, + title: Title, + path: str, +) -> None: + """Helper to add a title to a collection""" + from pathlib import Path + + ct = CollectionTitle(path=Path(path)) + ct.title = title + ct.collection = collection + dbsession.add(ct) + dbsession.flush() + + +def test_get_collection_catalog_xml_by_name( client: TestClient, dbsession: OrmSession, - create_library: Callable[..., Library], + create_collection: Callable[..., Collection], + create_title: Callable[..., Title], create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], ): - """Test that library can be queried by name""" + """Test that collection can be queried by name""" # Setup - warehouse_path = create_warehouse_path() - create_library(name="my_library", warehouse_path_ids=[warehouse_path.id]) + warehouse = create_warehouse() + collection = create_collection(name="my_collection", warehouse=warehouse) + title = create_title(name="test_title") + + # Add title to collection with a path + path = "wikipedia" + _add_title_to_collection(dbsession, collection, title, path) book = create_book( - name="test_title", - flavour="full", zim_metadata={ "Name": "test_title", "Title": "Test Title", @@ -83,12 +112,15 @@ def test_get_library_catalog_xml_by_name( "Date": "2025-01-01", }, ) + book.title = title book.status = "published" - create_book_location(book=book, warehouse_path=warehouse_path, status="current") + create_book_location( + book=book, warehouse_id=warehouse.id, path=path, status="current" + ) dbsession.flush() # Test by name - response = client.get("/v1/libraries/my_library/catalog.xml") + response = client.get("/v1/collections/my_collection/catalog.xml") assert response.status_code == HTTPStatus.OK assert response.headers["content-type"] == "application/xml" @@ -98,22 +130,25 @@ def test_get_library_catalog_xml_by_name( assert books[0].get("title") == "Test Title" -def test_get_library_catalog_xml_single_book( +def test_get_collection_catalog_xml_single_book( client: TestClient, dbsession: OrmSession, - create_library: Callable[..., Library], + create_collection: Callable[..., Collection], + create_title: Callable[..., Title], create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], ): - """Test library XML with a single book""" + """Test collection XML with a single book""" # Setup - warehouse_path = create_warehouse_path() - library = create_library(warehouse_path_ids=[warehouse_path.id]) + warehouse = create_warehouse() + collection = create_collection(warehouse=warehouse) + title = create_title(name="test_title") + + path = "wikipedia" + _add_title_to_collection(dbsession, collection, title, path) book = create_book( - name="test_title", - flavour="full", zim_metadata={ "Name": "test_title", "Title": "Test Title", @@ -125,12 +160,15 @@ def test_get_library_catalog_xml_single_book( "Tags": "_category:test;_pictures:yes", }, ) + book.title = title book.status = "published" - create_book_location(book=book, warehouse_path=warehouse_path, status="current") + create_book_location( + book=book, warehouse_id=warehouse.id, path=path, status="current" + ) dbsession.flush() # Test - response = client.get(f"/v1/libraries/{library.id}/catalog.xml") + response = client.get(f"/v1/collections/{collection.id}/catalog.xml") assert response.status_code == HTTPStatus.OK assert response.headers["content-type"] == "application/xml" @@ -156,89 +194,96 @@ def test_get_library_catalog_xml_single_book( assert book_elem.get("tags") == "_category:test;_pictures:yes" -def test_get_library_catalog_xml_multiple_books_different_formats( +def test_get_collection_catalog_xml_multiple_books_different_formats( client: TestClient, dbsession: OrmSession, - create_library: Callable[..., Library], + create_collection: Callable[..., Collection], + create_title: Callable[..., Title], create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], ): - """Test that books with different flavours are included""" + """Test that books with different flavours in same name are properly handled""" # Setup - warehouse_path = create_warehouse_path() - library = create_library(warehouse_path_ids=[warehouse_path.id]) + warehouse = create_warehouse() + collection = create_collection(warehouse=warehouse) + title = create_title(name="wiki") + + path = "wikipedia" + _add_title_to_collection(dbsession, collection, title, path) - # Create books with same name but different flavours - book_full = create_book( - name="wiki", - flavour="full", + # Create old book that will be superseded + book_old = create_book( zim_metadata={ "Name": "wiki", - "Title": "Wikipedia Full", - "Description": "Full version", + "Title": "Wikipedia Old", + "Description": "Old version", "Language": "eng", "Creator": "Kiwix", "Publisher": "Kiwix", - "Date": "2025-01-15", + "Date": "2025-01-01", + "Flavour": "full", }, ) - book_full.status = "published" + book_old.title = title + book_old.status = "published" + dbsession.flush() + create_book_location( + book=book_old, warehouse_id=warehouse.id, path=path, status="current" + ) - book_nopic = create_book( - name="wiki", - flavour="nopic", + # Create newer book with same flavour (should supersede old) + book_new = create_book( zim_metadata={ "Name": "wiki", - "Title": "Wikipedia No Pics", - "Description": "No pictures version", + "Title": "Wikipedia New", + "Description": "New version", "Language": "eng", "Creator": "Kiwix", "Publisher": "Kiwix", - "Date": "2025-01-10", + "Date": "2025-01-15", + "Flavour": "full", }, ) - book_nopic.status = "published" - - create_book_location( - book=book_full, warehouse_path=warehouse_path, status="current" - ) + book_new.title = title + book_new.status = "published" + dbsession.flush() create_book_location( - book=book_nopic, warehouse_path=warehouse_path, status="current" + book=book_new, warehouse_id=warehouse.id, path=path, status="current" ) dbsession.flush() # Test - response = client.get(f"/v1/libraries/{library.id}/catalog.xml") + response = client.get(f"/v1/collections/{collection.id}/catalog.xml") assert response.status_code == HTTPStatus.OK root = ET.fromstring(response.text) books = list(root.findall("book")) - assert len(books) == 2 - - # Check that we have both variants (full and nopic) - book_titles = {book.get("title") for book in books} - assert "Wikipedia Full" in book_titles - assert "Wikipedia No Pics" in book_titles + # Only the newest book should be returned + assert len(books) == 1 + assert books[0].get("title") == "Wikipedia New" -def test_get_library_catalog_xml_skips_unpublished_books( +def test_get_collection_catalog_xml_skips_unpublished_books( client: TestClient, dbsession: OrmSession, - create_library: Callable[..., Library], + create_collection: Callable[..., Collection], + create_title: Callable[..., Title], create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], ): """Test that unpublished books are not included""" # Setup - warehouse_path = create_warehouse_path() - library = create_library(warehouse_path_ids=[warehouse_path.id]) + warehouse = create_warehouse() + collection = create_collection(warehouse=warehouse) + title = create_title(name="test") + + path = "test" + _add_title_to_collection(dbsession, collection, title, path) # Create a published book published_book = create_book( - name="published", - flavour="full", zim_metadata={ "Name": "published", "Title": "Published Book", @@ -249,12 +294,11 @@ def test_get_library_catalog_xml_skips_unpublished_books( "Date": "2025-01-01", }, ) + published_book.title = title published_book.status = "published" # Create an unpublished book unpublished_book = create_book( - name="unpublished", - flavour="full", zim_metadata={ "Name": "unpublished", "Title": "Unpublished Book", @@ -265,18 +309,19 @@ def test_get_library_catalog_xml_skips_unpublished_books( "Date": "2025-01-01", }, ) + unpublished_book.title = title unpublished_book.status = "pending_processing" create_book_location( - book=published_book, warehouse_path=warehouse_path, status="current" + book=published_book, warehouse_id=warehouse.id, path=path, status="current" ) create_book_location( - book=unpublished_book, warehouse_path=warehouse_path, status="current" + book=unpublished_book, warehouse_id=warehouse.id, path=path, status="current" ) dbsession.flush() # Test - response = client.get(f"/v1/libraries/{library.id}/catalog.xml") + response = client.get(f"/v1/collections/{collection.id}/catalog.xml") assert response.status_code == HTTPStatus.OK root = ET.fromstring(response.text) @@ -285,26 +330,25 @@ def test_get_library_catalog_xml_skips_unpublished_books( assert books[0].get("title") == "Published Book" -def test_get_library_catalog_xml_multiple_warehouse_paths( +def test_get_collection_catalog_xml_single_warehouse( client: TestClient, dbsession: OrmSession, - create_library: Callable[..., Library], + create_collection: Callable[..., Collection], + create_title: Callable[..., Title], create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], ): - """Test that books from all warehouse paths in the library are included""" - # Setup - warehouse_path_1 = create_warehouse_path() - warehouse_path_2 = create_warehouse_path() - library = create_library( - warehouse_path_ids=[warehouse_path_1.id, warehouse_path_2.id] - ) + """Test that a collection with a single warehouse returns correct books""" + # Each collection is tied to exactly one warehouse, so this tests that design + warehouse = create_warehouse(name="warehouse1") + collection = create_collection(warehouse=warehouse) + title = create_title(name="book1") + + path = "book1" + _add_title_to_collection(dbsession, collection, title, path) - # Create books in different warehouse paths book_1 = create_book( - name="book1", - flavour="full", zim_metadata={ "Name": "book1", "Title": "Book 1", @@ -315,57 +359,44 @@ def test_get_library_catalog_xml_multiple_warehouse_paths( "Date": "2025-01-01", }, ) + book_1.title = title book_1.status = "published" - book_2 = create_book( - name="book2", - flavour="full", - zim_metadata={ - "Name": "book2", - "Title": "Book 2", - "Description": "In warehouse 2", - "Language": "eng", - "Creator": "Author", - "Publisher": "Publisher", - "Date": "2025-01-01", - }, + create_book_location( + book=book_1, warehouse_id=warehouse.id, path=path, status="current" ) - book_2.status = "published" - - create_book_location(book=book_1, warehouse_path=warehouse_path_1, status="current") - create_book_location(book=book_2, warehouse_path=warehouse_path_2, status="current") dbsession.flush() # Test - response = client.get(f"/v1/libraries/{library.id}/catalog.xml") + response = client.get(f"/v1/collections/{collection.id}/catalog.xml") assert response.status_code == HTTPStatus.OK root = ET.fromstring(response.text) books = list(root.findall("book")) - assert len(books) == 2 - - titles = {book.get("title") for book in books} - assert "Book 1" in titles - assert "Book 2" in titles + assert len(books) == 1 + assert books[0].get("title") == "Book 1" -def test_get_library_catalog_xml_latest_book_per_name_flavour( +def test_get_collection_catalog_xml_latest_book_per_name_flavour( client: TestClient, dbsession: OrmSession, - create_library: Callable[..., Library], + create_collection: Callable[..., Collection], + create_title: Callable[..., Title], create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], ): """Test that only the latest book per name+flavour combination is returned""" # Setup - warehouse_path = create_warehouse_path() - library = create_library(warehouse_path_ids=[warehouse_path.id]) + warehouse = create_warehouse() + collection = create_collection(warehouse=warehouse) + title = create_title(name="wiki") + + path = "wikipedia" + _add_title_to_collection(dbsession, collection, title, path) # Create older book older_book = create_book( - name="wiki", - flavour="full", created_at=getnow() - timedelta(days=30), zim_metadata={ "Name": "wiki", @@ -377,12 +408,11 @@ def test_get_library_catalog_xml_latest_book_per_name_flavour( "Date": "2024-12-01", }, ) + older_book.title = title older_book.status = "published" # Create newer book with same name+flavour newer_book = create_book( - name="wiki", - flavour="full", created_at=getnow(), zim_metadata={ "Name": "wiki", @@ -394,18 +424,19 @@ def test_get_library_catalog_xml_latest_book_per_name_flavour( "Date": "2025-01-01", }, ) + newer_book.title = title newer_book.status = "published" create_book_location( - book=older_book, warehouse_path=warehouse_path, status="current" + book=older_book, warehouse_id=warehouse.id, path=path, status="current" ) create_book_location( - book=newer_book, warehouse_path=warehouse_path, status="current" + book=newer_book, warehouse_id=warehouse.id, path=path, status="current" ) dbsession.flush() # Test - response = client.get(f"/v1/libraries/{library.id}/catalog.xml") + response = client.get(f"/v1/collections/{collection.id}/catalog.xml") assert response.status_code == HTTPStatus.OK root = ET.fromstring(response.text) diff --git a/backend/tests/api/routes/test_titles.py b/backend/tests/api/routes/test_titles.py index abb9c20..2d3fc86 100644 --- a/backend/tests/api/routes/test_titles.py +++ b/backend/tests/api/routes/test_titles.py @@ -5,7 +5,7 @@ from fastapi.testclient import TestClient from sqlalchemy.orm import Session as OrmSession -from cms_backend.db.models import Book, Title, WarehousePath +from cms_backend.db.models import Book, Title def test_get_titles_empty(client: TestClient): @@ -41,9 +41,7 @@ def test_get_titles( assert set(data["items"][0].keys()) == { "id", "name", - "producer_unique_id", - "producer_display_name", - "producer_display_url", + "maturity", } assert data["items"][0]["name"] == "wikipedia_fr_all" @@ -51,17 +49,10 @@ def test_get_titles( def test_create_title_required_fields_only( client: TestClient, dbsession: OrmSession, - create_warehouse_path: Callable[..., WarehousePath], ): """Test creating a title with only required fields""" - dev_warehouse_path = create_warehouse_path() - prod_warehouse_path = create_warehouse_path() - title_data = { "name": "wikipedia_en_test", - "producer_unique_id": "550e8400-e29b-41d4-a716-446655440000", - "dev_warehouse_path_ids": [str(dev_warehouse_path.id)], - "prod_warehouse_path_ids": [str(prod_warehouse_path.id)], } response = client.post("/v1/titles", json=title_data) @@ -76,107 +67,14 @@ def test_create_title_required_fields_only( title = dbsession.get(Title, data["id"]) assert title is not None assert title.name == "wikipedia_en_test" - assert title.producer_unique_id == "550e8400-e29b-41d4-a716-446655440000" - assert title.producer_display_name is None - assert title.producer_display_url is None - - # Verify warehouse paths - dev_paths = [ - twp.warehouse_path_id for twp in title.warehouse_paths if twp.path_type == "dev" - ] - prod_paths = [ - twp.warehouse_path_id - for twp in title.warehouse_paths - if twp.path_type == "prod" - ] - assert dev_warehouse_path.id in dev_paths - assert prod_warehouse_path.id in prod_paths - assert title.in_prod is False - - -def test_create_title_with_optional_fields( - client: TestClient, - dbsession: OrmSession, - create_warehouse_path: Callable[..., WarehousePath], -): - """Test creating a title with all fields including optional ones""" - dev_warehouse_path = create_warehouse_path() - prod_warehouse_path = create_warehouse_path() - - title_data = { - "name": "wikipedia_fr_test", - "producer_unique_id": "550e8400-e29b-41d4-a716-446655440001", - "producer_display_name": "farm.openzim.org: wikipedia_fr_test", - "producer_display_url": "https://farm.openzim.org/recipes/wikipedia_fr_test", - "dev_warehouse_path_ids": [str(dev_warehouse_path.id)], - "prod_warehouse_path_ids": [str(prod_warehouse_path.id)], - "in_prod": True, - } - - response = client.post("/v1/titles", json=title_data) - assert response.status_code == HTTPStatus.OK - data = response.json() - - assert "id" in data - assert "name" in data - assert data["name"] == "wikipedia_fr_test" - - # Verify all fields were stored correctly - title = dbsession.get(Title, data["id"]) - assert title is not None - assert title.name == "wikipedia_fr_test" - assert title.producer_unique_id == "550e8400-e29b-41d4-a716-446655440001" - assert title.producer_display_name == "farm.openzim.org: wikipedia_fr_test" - assert ( - title.producer_display_url - == "https://farm.openzim.org/recipes/wikipedia_fr_test" - ) - - # Verify warehouse paths - dev_paths = [ - twp.warehouse_path_id for twp in title.warehouse_paths if twp.path_type == "dev" - ] - prod_paths = [ - twp.warehouse_path_id - for twp in title.warehouse_paths - if twp.path_type == "prod" - ] - assert dev_warehouse_path.id in dev_paths - assert prod_warehouse_path.id in prod_paths - assert title.in_prod is True - - -def test_create_title_missing_required_field( - client: TestClient, - create_warehouse_path: Callable[..., WarehousePath], -): - """Test creating a title with missing required field returns validation error""" - dev_warehouse_path = create_warehouse_path() - - title_data = { - "name": "wikipedia_en_incomplete", - "producer_unique_id": "550e8400-e29b-41d4-a716-446655440002", - "dev_warehouse_path_ids": [str(dev_warehouse_path.id)], - # Missing prod_warehouse_path_ids - } - - response = client.post("/v1/titles", json=title_data) - assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY def test_create_title_duplicate_name( client: TestClient, - create_warehouse_path: Callable[..., WarehousePath], ): """Test creating a title with duplicate name returns conflict error""" - dev_warehouse_path = create_warehouse_path() - prod_warehouse_path = create_warehouse_path() - title_data = { "name": "wikipedia_en_duplicate", - "producer_unique_id": "550e8400-e29b-41d4-a716-446655440003", - "dev_warehouse_path_ids": [str(dev_warehouse_path.id)], - "prod_warehouse_path_ids": [str(prod_warehouse_path.id)], } # Create the first title @@ -196,53 +94,29 @@ def test_get_title_by_id( """Test retrieving a title by ID returns full details""" title = create_title( name="wikipedia_en_test", - producer_unique_id="550e8400-e29b-41d4-a716-446655440000", - producer_display_name="farm.openzim.org: wikipedia_en_test", - producer_display_url="https://farm.openzim.org/recipes/wikipedia_en_test", ) response = client.get(f"/v1/titles/{title.id}") assert response.status_code == HTTPStatus.OK data = response.json() - # Verify all TitleFullSchema fields are present + # Verify TitleFullSchema fields assert set(data.keys()) == { "id", "name", - "producer_unique_id", - "producer_display_name", - "producer_display_url", - "dev_warehouse_paths", - "prod_warehouse_paths", - "in_prod", + "maturity", "events", "books", + "collections", } # Verify field values assert data["id"] == str(title.id) assert data["name"] == "wikipedia_en_test" - assert data["producer_unique_id"] == "550e8400-e29b-41d4-a716-446655440000" - assert data["producer_display_name"] == "farm.openzim.org: wikipedia_en_test" - assert ( - data["producer_display_url"] - == "https://farm.openzim.org/recipes/wikipedia_en_test" - ) - assert isinstance(data["dev_warehouse_paths"], list) - assert isinstance(data["prod_warehouse_paths"], list) - assert len(data["dev_warehouse_paths"]) >= 1 - assert len(data["prod_warehouse_paths"]) >= 1 - assert data["in_prod"] == title.in_prod assert isinstance(data["events"], list) assert isinstance(data["books"], list) assert len(data["books"]) == 0 - # Verify warehouse path structure - dev_path = data["dev_warehouse_paths"][0] - assert set(dev_path.keys()) == {"path_id", "folder_name", "warehouse_name"} - prod_path = data["prod_warehouse_paths"][0] - assert set(prod_path.keys()) == {"path_id", "folder_name", "warehouse_name"} - def test_get_title_by_id_with_books( client: TestClient, @@ -253,27 +127,20 @@ def test_get_title_by_id_with_books( """Test retrieving a title with associated books""" title = create_title( name="wikipedia_en_test", - producer_unique_id="550e8400-e29b-41d4-a716-446655440000", ) # Create books associated with this title book1 = create_book( zim_metadata={"Name": "wikipedia_en_test"}, - producer_unique_id=title.producer_unique_id, - producer_display_name="farm.openzim.org: wikipedia_en_test", - producer_display_url="https://farm.openzim.org/recipes/wikipedia_en_test", ) book2 = create_book( zim_metadata={"Name": "wikipedia_en_test"}, - producer_unique_id=title.producer_unique_id, - producer_display_name="farm.openzim.org: wikipedia_en_test", - producer_display_url="https://farm.openzim.org/recipes/wikipedia_en_test", ) # Associate books with title title.books.append(book1) title.books.append(book2) - dbsession.flush() # Flush to update title_id on books + dbsession.flush() response = client.get(f"/v1/titles/{title.id}") assert response.status_code == HTTPStatus.OK diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index c3f5f88..b6ed000 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -1,6 +1,7 @@ # ruff: noqa: E501 from collections.abc import Callable, Generator from datetime import datetime +from pathlib import Path from typing import Any from uuid import UUID, uuid4 @@ -13,12 +14,10 @@ Base, Book, BookLocation, - Library, - LibraryWarehousePath, + Collection, + CollectionTitle, Title, - TitleWarehousePath, Warehouse, - WarehousePath, ZimfarmNotification, ) from cms_backend.utils.datetime import getnow @@ -95,10 +94,22 @@ def _create_book( date: str | None = None, flavour: str | None = None, zimfarm_notification: ZimfarmNotification | None = None, - producer_display_name: str | None = None, - producer_display_url: str | None = None, - producer_unique_id: str | None = None, ) -> Book: + if zim_metadata is None: + zim_metadata = {} + + # Extract name from zim_metadata if not explicitly provided + if name is None: + name = zim_metadata.get("Name") + + # Extract date from zim_metadata if not explicitly provided + if date is None: + date = zim_metadata.get("Date") + + # Extract flavour from zim_metadata if not explicitly provided + if flavour is None: + flavour = zim_metadata.get("Flavour") + book = Book( id=_id if _id is not None else uuid4(), created_at=created_at if created_at is not None else getnow(), @@ -107,29 +118,13 @@ def _create_book( ), media_count=media_count if media_count is not None else faker.random_int(), size=size if size is not None else faker.random_int(), - zim_metadata=zim_metadata if zim_metadata else {}, + zim_metadata=zim_metadata, zimcheck_result=zimcheck_result if zimcheck_result else {}, name=name, date=date, flavour=flavour, zimfarm_notification=zimfarm_notification, - producer_display_name=( - producer_display_name - if producer_display_name is not None - else faker.company() - ), - producer_display_url=( - producer_display_url - if producer_display_url is not None - else faker.url() - ), - producer_unique_id=( - producer_unique_id - if producer_unique_id is not None - else str(faker.uuid4()) - ), ) - # book.events = [] dbsession.add(book) dbsession.flush() return book @@ -147,50 +142,14 @@ def book( @pytest.fixture def create_title( dbsession: OrmSession, - faker: Faker, - create_warehouse_path: Callable[..., WarehousePath], ) -> Callable[..., Title]: def _create_title( *, name: str = "test_en_all", - producer_unique_id: str | None = None, - producer_display_name: str | None = None, - producer_display_url: str | None = None, - dev_warehouse_path_ids: list[UUID] | None = None, - prod_warehouse_path_ids: list[UUID] | None = None, - in_prod: bool = False, ) -> Title: title = Title( name=name, - producer_unique_id=( - producer_unique_id - if producer_unique_id is not None - else str(faker.uuid4()) - ), ) - title.producer_display_name = producer_display_name - title.producer_display_url = producer_display_url - title.in_prod = in_prod - - # Create default warehouse paths if not provided (None means create default) - # Empty list means explicitly no paths - if dev_warehouse_path_ids is None: - dev_warehouse_path = create_warehouse_path() - dev_warehouse_path_ids = [dev_warehouse_path.id] - if prod_warehouse_path_ids is None: - prod_warehouse_path = create_warehouse_path() - prod_warehouse_path_ids = [prod_warehouse_path.id] - - # Add warehouse path associations - for path_id in dev_warehouse_path_ids: - twp = TitleWarehousePath(path_type="dev") - twp.warehouse_path_id = path_id - title.warehouse_paths.append(twp) - for path_id in prod_warehouse_path_ids: - twp = TitleWarehousePath(path_type="prod") - twp.warehouse_path_id = path_id - title.warehouse_paths.append(twp) - dbsession.add(title) dbsession.flush() return title @@ -212,11 +171,9 @@ def create_warehouse( ) -> Callable[..., Warehouse]: def _create_warehouse( name: str | None = None, - configuration: dict[str, Any] | None = None, ) -> Warehouse: warehouse = Warehouse( name=name if name is not None else faker.company(), - configuration=configuration if configuration is not None else {}, ) dbsession.add(warehouse) dbsession.flush() @@ -232,62 +189,43 @@ def warehouse( return create_warehouse() -@pytest.fixture -def create_warehouse_path( - dbsession: OrmSession, - faker: Faker, - create_warehouse: Callable[..., Warehouse], -) -> Callable[..., WarehousePath]: - def _create_warehouse_path( - folder_name: str | None = None, - warehouse: Warehouse | None = None, - ) -> WarehousePath: - warehouse_path = WarehousePath( - folder_name=folder_name if folder_name is not None else faker.file_path(), - ) - warehouse_path.warehouse = ( - warehouse if warehouse is not None else create_warehouse() - ) - dbsession.add(warehouse_path) - dbsession.flush() - return warehouse_path - - return _create_warehouse_path - - -@pytest.fixture -def warehouse_path( - create_warehouse_path: Callable[..., WarehousePath], - warehouse: Warehouse, -) -> WarehousePath: - return create_warehouse_path(warehouse=warehouse) - - @pytest.fixture def create_book_location( dbsession: OrmSession, + faker: Faker, create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], + create_warehouse: Callable[..., Warehouse], ) -> Callable[..., BookLocation]: def _create_book_location( book: Book | None = None, - warehouse_path: WarehousePath | None = None, + warehouse_id: UUID | None = None, + path: str | Path | None = None, filename: str | None = None, status: str = "current", ) -> BookLocation: if book is None: book = create_book() - if warehouse_path is None: - warehouse_path = create_warehouse_path() + + if warehouse_id is None: + warehouse = create_warehouse() + warehouse_id = warehouse.id + + if path is None: + path = Path(faker.file_path()) + else: + # Convert string paths to Path objects + path = Path(path) if isinstance(path, str) else path + if filename is None: filename = "test_file.zim" location = BookLocation( book_id=book.id, + warehouse_id=warehouse_id, + path=path, status=status, filename=filename, ) - location.warehouse_path_id = warehouse_path.id dbsession.add(location) dbsession.flush() return location @@ -296,34 +234,47 @@ def _create_book_location( @pytest.fixture -def create_library( +def book_location( + create_book_location: Callable[..., BookLocation], +) -> BookLocation: + return create_book_location() + + +@pytest.fixture +def create_collection( dbsession: OrmSession, faker: Faker, -) -> Callable[..., Library]: - def _create_library( + create_warehouse: Callable[..., Warehouse], +) -> Callable[..., Collection]: + def _create_collection( name: str | None = None, - warehouse_path_ids: list[UUID] | None = None, - ) -> Library: - library = Library( + warehouse: Warehouse | None = None, + title_ids_with_paths: list[tuple[UUID, str]] | None = None, + ) -> Collection: + if warehouse is None: + warehouse = create_warehouse() + + collection = Collection( name=name if name is not None else faker.slug(), + warehouse_id=warehouse.id, ) - # Add warehouse path associations if provided - if warehouse_path_ids: - for path_id in warehouse_path_ids: - lwp = LibraryWarehousePath() - lwp.warehouse_path_id = path_id - library.warehouse_paths.append(lwp) + # Add title associations if provided + if title_ids_with_paths: + for title_id, path in title_ids_with_paths: + ct = CollectionTitle(path=Path(path)) + ct.title_id = title_id + collection.titles.append(ct) - dbsession.add(library) + dbsession.add(collection) dbsession.flush() - return library + return collection - return _create_library + return _create_collection @pytest.fixture -def library( - create_library: Callable[..., Library], -) -> Library: - return create_library() +def collection( + create_collection: Callable[..., Collection], +) -> Collection: + return create_collection() diff --git a/backend/tests/db/test_book.py b/backend/tests/db/test_book.py index 069d9c5..4a909b4 100644 --- a/backend/tests/db/test_book.py +++ b/backend/tests/db/test_book.py @@ -1,7 +1,7 @@ from faker import Faker from sqlalchemy.orm import Session as OrmSession -from cms_backend.db.book import create_book +from cms_backend.db.book import create_book as db_create_book from cms_backend.db.models import ZimfarmNotification @@ -9,18 +9,16 @@ def test_create_book( dbsession: OrmSession, zimfarm_notification: ZimfarmNotification, faker: Faker ): """Create a book from a zimfarm notification""" - book = create_book( + book_id = zimfarm_notification.id # Use zimfarm notification ID as book ID + book = db_create_book( dbsession, - book_id=zimfarm_notification.id, + book_id=book_id, article_count=faker.random_int(), media_count=faker.random_int(), size=faker.random_int(), zim_metadata={"key": "value"}, zimcheck_result={"check_key": "check_value"}, zimfarm_notification=zimfarm_notification, - producer_display_name=faker.company(), - producer_display_url=faker.url(), - producer_unique_id=str(faker.uuid4()), ) dbsession.flush() assert book.zimfarm_notification == zimfarm_notification diff --git a/backend/tests/db/test_zimfarm_notification.py b/backend/tests/db/test_zimfarm_notification.py index 4180b7e..8d1dd8e 100644 --- a/backend/tests/db/test_zimfarm_notification.py +++ b/backend/tests/db/test_zimfarm_notification.py @@ -5,7 +5,6 @@ import pytest from faker import Faker from sqlalchemy.orm import Session as OrmSession -from tests.processors.test_zimfarm_notification import GOOD_NOTIFICATION_CONTENT from cms_backend.db.exceptions import RecordDoesNotExistError from cms_backend.db.models import Book, ZimfarmNotification @@ -20,6 +19,12 @@ ) from cms_backend.utils.datetime import getnow +# Example notification content for testing +GOOD_NOTIFICATION_CONTENT = { + "requested_task_id": "test-task-123", + "status": "success", +} + def test_get_zimfarm_notification_or_none_( dbsession: OrmSession, diff --git a/backend/tests/mill/__init__.py b/backend/tests/mill/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/mill/processors/__init__.py b/backend/tests/mill/processors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/mill/processors/conftest.py b/backend/tests/mill/processors/conftest.py new file mode 100644 index 0000000..2ac4f77 --- /dev/null +++ b/backend/tests/mill/processors/conftest.py @@ -0,0 +1,10 @@ +"""Fixtures for mill processors tests.""" + +import os +from uuid import uuid4 + +# Set up environment variables for MillContext before any imports +os.environ.setdefault("JAIL_WAREHOUSE_ID", str(uuid4())) +os.environ.setdefault("STAGING_WAREHOUSE_ID", str(uuid4())) +os.environ.setdefault("JAIL_BASE_PATH", "/jail") +os.environ.setdefault("STAGING_BASE_PATH", "/staging") diff --git a/backend/tests/mill/processors/test_book.py b/backend/tests/mill/processors/test_book.py new file mode 100644 index 0000000..d402dc0 --- /dev/null +++ b/backend/tests/mill/processors/test_book.py @@ -0,0 +1,190 @@ +"""Tests for book processor.""" + +from collections.abc import Callable +from unittest.mock import patch + +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.db.models import Book, Title +from cms_backend.mill.processors.book import ( + check_book_zim_spec, + get_matching_title, + process_book, +) + +MINIMUM_ZIM_METADATA = { + "Name": "test_en_all", + "Title": "A superb ZIM", + "Creator": "openZIM", + "Publisher": "openZIM", + "Date": "2025-10-01", + "Description": "About a super content", + "Language": "eng", +} + +GOOD_ZIM_METADATA = { + **MINIMUM_ZIM_METADATA, + "Long description": "X" * 100, + "Flavour": "nopic", +} + + +def test_check_book_zim_spec_with_good_metadata( + create_book: Callable[..., Book], +): + """Test that book with all mandatory metadata passes ZIM spec check""" + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + + assert check_book_zim_spec(book) is True + assert any("passed ZIM specification checks" in event for event in book.events) + + +def test_check_book_zim_spec_missing_metadata( + create_book: Callable[..., Book], +): + """Test that book with missing mandatory metadata fails ZIM spec check""" + # Missing "Title" + metadata = {k: v for k, v in GOOD_ZIM_METADATA.items() if k != "Title"} + book = create_book(zim_metadata=metadata) + + assert check_book_zim_spec(book) is False + assert book.status == "bad_book" + assert any( + "missing mandatory metadata" in event and "Title" in event + for event in book.events + ) + + +def test_check_book_zim_spec_empty_metadata_value( + create_book: Callable[..., Book], +): + """Test that book with empty mandatory metadata value fails ZIM spec check""" + metadata = GOOD_ZIM_METADATA.copy() + metadata["Title"] = "" # Empty string + book = create_book(zim_metadata=metadata) + + assert check_book_zim_spec(book) is False + assert book.status == "bad_book" + assert any( + "missing mandatory metadata" in event and "Title" in event + for event in book.events + ) + + +def test_get_matching_title_with_matching_title( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_title: Callable[..., Title], +): + """Test that get_matching_title finds a title by book name""" + title = create_title(name="test_en_all") + dbsession.flush() + + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + + found_title = get_matching_title(dbsession, book) + + assert found_title is not None + assert found_title.id == title.id + assert any(f"found matching title {title.id}" in event for event in book.events) + + +def test_get_matching_title_no_match( + dbsession: OrmSession, + create_book: Callable[..., Book], +): + """Test that get_matching_title returns None when no title matches""" + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + dbsession.flush() + + # No title with this name exists + found_title = get_matching_title(dbsession, book) + + assert found_title is None + assert book.status == "pending_title" + assert any("no matching title found for book" in event for event in book.events) + + +def test_get_matching_title_missing_book_name( + dbsession: OrmSession, + create_book: Callable[..., Book], +): + """Test that get_matching_title handles missing book name""" + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + book.name = None # Simulate missing name + dbsession.flush() + + found_title = get_matching_title(dbsession, book) + + assert found_title is None + assert book.status == "bad_book" + assert any( + "no title can be found because name is missing" in event + for event in book.events + ) + + +def test_process_book_with_matching_title( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_title: Callable[..., Title], +): + """Test that process_book calls add_book_to_title when title is found""" + title = create_title(name="test_en_all") + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + dbsession.flush() + + with patch("cms_backend.mill.processors.book.add_book_to_title") as mock_add: + process_book(dbsession, book) + + # Should have passed ZIM spec check + assert any("passed ZIM specification checks" in event for event in book.events) + + # Should have found title + assert any(f"found matching title {title.id}" in event for event in book.events) + + # Should have called add_book_to_title + mock_add.assert_called_once() + assert mock_add.call_args[0][1] == book + assert mock_add.call_args[0][2].id == title.id + + +def test_process_book_bad_zim_spec( + dbsession: OrmSession, + create_book: Callable[..., Book], +): + """Test that process_book stops if ZIM spec check fails""" + # Missing Title + metadata = {k: v for k, v in GOOD_ZIM_METADATA.items() if k != "Title"} + book = create_book(zim_metadata=metadata) + dbsession.flush() + + with patch("cms_backend.mill.processors.book.add_book_to_title") as mock_add: + process_book(dbsession, book) + + # Should have failed ZIM spec check + assert book.status == "bad_book" + + # Should NOT have called add_book_to_title + mock_add.assert_not_called() + + +def test_process_book_no_matching_title( + dbsession: OrmSession, + create_book: Callable[..., Book], +): + """Test that process_book stops if no matching title is found""" + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + dbsession.flush() + + with patch("cms_backend.mill.processors.book.add_book_to_title") as mock_add: + process_book(dbsession, book) + + # Should have passed ZIM spec check + assert any("passed ZIM specification checks" in event for event in book.events) + + # Should have no matching title + assert book.status == "pending_title" + + # Should NOT have called add_book_to_title + mock_add.assert_not_called() diff --git a/backend/tests/mill/processors/test_title.py b/backend/tests/mill/processors/test_title.py new file mode 100644 index 0000000..7b820cb --- /dev/null +++ b/backend/tests/mill/processors/test_title.py @@ -0,0 +1,212 @@ +"""Tests for title processor.""" + +from collections.abc import Callable +from unittest.mock import patch + +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.db.models import Book, Collection, Title, Warehouse +from cms_backend.mill.processors.title import add_book_to_title + +GOOD_ZIM_METADATA = { + "Name": "test_en_all", + "Title": "Test Article", + "Creator": "openZIM", + "Publisher": "openZIM", + "Date": "2025-01-01", + "Description": "Test description", + "Language": "eng", + "Flavour": "full", +} + + +def test_add_book_to_title_sets_events( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], +): + """Test that add_book_to_title sets events on both book and title""" + title = create_title(name="test_en_all") + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + dbsession.flush() + + with patch("cms_backend.mill.processors.title.create_book_target_locations"): + add_book_to_title(dbsession, book, title) + + # Check book events + assert any(f"book added to title {title.id}" in event for event in book.events) + + # Check title events + assert any(f"book {book.id} added to title" in event for event in title.events) + + +def test_add_book_to_title_adds_book_to_relationship( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], +): + """Test that add_book_to_title adds book to title.books relationship""" + title = create_title(name="test_en_all") + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + dbsession.flush() + + with patch("cms_backend.mill.processors.title.create_book_target_locations"): + add_book_to_title(dbsession, book, title) + + assert book in title.books + assert book.title_id == title.id + + +def test_add_book_to_title_updates_title_name_when_different( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], +): + """Test that add_book_to_title updates title name when book has different name""" + title = create_title(name="old_name") + book = create_book(zim_metadata=GOOD_ZIM_METADATA) # Name is "test_en_all" + dbsession.flush() + + assert title.name == "old_name" + assert book.name == "test_en_all" + + with patch("cms_backend.mill.processors.title.create_book_target_locations"): + add_book_to_title(dbsession, book, title) + + # Title name should be updated + assert title.name == "test_en_all" + + # Event should be logged + assert any("updating title name to test_en_all" in event for event in title.events) + + +def test_add_book_to_title_no_update_when_name_same( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], +): + """Test that add_book_to_title doesn't update title name when it's the same""" + title = create_title(name="test_en_all") + book = create_book(zim_metadata=GOOD_ZIM_METADATA) # Name is "test_en_all" + dbsession.flush() + + with patch("cms_backend.mill.processors.title.create_book_target_locations"): + add_book_to_title(dbsession, book, title) + + # Title name should remain the same + assert title.name == "test_en_all" + + # No name update event should be logged + assert not any("updating title name" in event for event in title.events) + + +def test_add_book_to_title_dev_maturity_staging( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], +): + """Test that books for dev maturity titles are sent to staging""" + title = create_title(name="test_en_all") + title.maturity = "dev" # Default, but explicit + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + dbsession.flush() + + with patch( + "cms_backend.mill.processors.title.create_book_target_locations" + ) as mock_create: + add_book_to_title(dbsession, book, title) + + # Should have called create_book_target_locations with staging location + mock_create.assert_called_once() + target_locations = mock_create.call_args[1]["target_locations"] + + # Should be a single staging location + assert len(target_locations) == 1 + assert target_locations[0].path.name == "staging" # MillContext.staging_base_path + + +def test_add_book_to_title_robust_maturity_collections( + dbsession: OrmSession, + create_collection: Callable[..., Collection], + create_title: Callable[..., Title], + create_book: Callable[..., Book], + create_warehouse: Callable[..., Warehouse], +): + """Test that books for robust maturity titles go directly to collection + warehouses""" + warehouse = create_warehouse() + collection = create_collection(warehouse=warehouse) + + # Create a title and associate it with collection + title = create_title(name="test_en_all") + title.maturity = "robust" + from pathlib import Path + + from cms_backend.db.models import CollectionTitle + + ct = CollectionTitle(path=Path("wikipedia")) + ct.title = title + ct.collection = collection + dbsession.add(ct) + dbsession.flush() + + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + + with patch( + "cms_backend.mill.processors.title.create_book_target_locations" + ) as mock_create: + add_book_to_title(dbsession, book, title) + + # Should have called create_book_target_locations with collection locations + mock_create.assert_called_once() + target_locations = mock_create.call_args[1]["target_locations"] + + # Should have one location per collection + assert len(target_locations) == 1 + assert target_locations[0].warehouse_id == collection.warehouse_id + + +def test_add_book_to_title_missing_book_name( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], +): + """Test that add_book_to_title handles missing book name""" + title = create_title(name="test_en_all") + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + book.name = None # Simulate missing name + dbsession.flush() + + with patch("cms_backend.mill.processors.title.create_book_target_locations"): + add_book_to_title(dbsession, book, title) + + # Should have error events + assert any( + f"error encountered while adding to title {title.id}" in event + for event in book.events + ) + assert any( + f"error encountered while adding book {book.id}" in event + for event in title.events + ) + + +def test_add_book_to_title_missing_book_date( + dbsession: OrmSession, + create_title: Callable[..., Title], + create_book: Callable[..., Book], +): + """Test that add_book_to_title handles missing book date""" + title = create_title(name="test_en_all") + book = create_book(zim_metadata=GOOD_ZIM_METADATA) + book.date = None # Simulate missing date + dbsession.flush() + + with patch("cms_backend.mill.processors.title.create_book_target_locations"): + add_book_to_title(dbsession, book, title) + + # Should have error events + assert any( + f"error encountered while adding to title {title.id}" in event + for event in book.events + ) diff --git a/backend/tests/mill/processors/test_zimfarm_notification.py b/backend/tests/mill/processors/test_zimfarm_notification.py new file mode 100644 index 0000000..4131453 --- /dev/null +++ b/backend/tests/mill/processors/test_zimfarm_notification.py @@ -0,0 +1,172 @@ +"""Tests for zimfarm notification processor.""" + +from collections.abc import Callable +from pathlib import Path +from unittest.mock import MagicMock, patch + +from sqlalchemy.orm import Session as OrmSession + +from cms_backend.db.models import Warehouse, ZimfarmNotification +from cms_backend.mill.processors.zimfarm_notification import process_notification + +VALID_NOTIFICATION_CONTENT = { + "article_count": 1000, + "media_count": 500, + "size": 1000000, + "metadata": { + "Name": "test_en_all", + "Title": "Test Article", + "Creator": "Test Creator", + "Publisher": "Test Publisher", + "Date": "2025-01-01", + "Description": "Test description", + "Language": "eng", + }, + "zimcheck": {"status": "passed"}, + "folder_name": "test_folder", + "filename": "test.zim", +} + + +def test_process_notification_missing_mandatory_keys( + dbsession: OrmSession, + create_zimfarm_notification: Callable[..., ZimfarmNotification], +): + """Test that notification with missing mandatory keys is marked as + bad_notification""" + # Missing article_count and media_count + content = { + "size": 1000000, + "metadata": {"Name": "test"}, + "zimcheck": {}, + "folder_name": "test", + "filename": "test.zim", + } + + notification = create_zimfarm_notification(content=content) + dbsession.flush() + + with patch("cms_backend.mill.processors.zimfarm_notification.process_book"): + process_notification(dbsession, notification) + + assert notification.status == "bad_notification" + assert any("missing mandatory keys" in event for event in notification.events) + assert "article_count" in notification.events[0] + assert "media_count" in notification.events[0] + + +def test_process_notification_invalid_filename( + dbsession: OrmSession, + create_zimfarm_notification: Callable[..., ZimfarmNotification], +): + """Test that notification with invalid filename is marked as bad_notification""" + content = VALID_NOTIFICATION_CONTENT.copy() + content["filename"] = 123 # Invalid: should be string + + notification = create_zimfarm_notification(content=content) + dbsession.flush() + + with patch("cms_backend.mill.processors.zimfarm_notification.process_book"): + process_notification(dbsession, notification) + + assert notification.status == "bad_notification" + assert any( + "filename must be a non-empty string" in event for event in notification.events + ) + + +def test_process_notification_invalid_folder_name( + dbsession: OrmSession, + create_zimfarm_notification: Callable[..., ZimfarmNotification], +): + """Test that notification with invalid folder_name is marked as bad_notification""" + content = VALID_NOTIFICATION_CONTENT.copy() + content["folder_name"] = None # type: ignore[assignment] # Invalid: should be string + + notification = create_zimfarm_notification(content=content) + dbsession.flush() + + with patch("cms_backend.mill.processors.zimfarm_notification.process_book"): + process_notification(dbsession, notification) + + assert notification.status == "bad_notification" + assert any( + "folder_name must be a non-empty string" in event + for event in notification.events + ) + + +def test_process_notification_valid_creates_book_and_location( + dbsession: OrmSession, + create_zimfarm_notification: Callable[..., ZimfarmNotification], + create_warehouse: Callable[..., Warehouse], +): + """Test that valid notification creates book and location, + then calls process_book""" + # Create the jail warehouse that MillContext expects + jail_warehouse = create_warehouse(name="jail") + dbsession.flush() + + notification = create_zimfarm_notification(content=VALID_NOTIFICATION_CONTENT) + dbsession.flush() + + # Mock MillContext to use our test warehouse + mock_context = MagicMock() + mock_context.jail_warehouse_id = jail_warehouse.id + mock_context.jail_base_path = Path("/jail") + + with patch( + "cms_backend.mill.processors.zimfarm_notification.MillContext", mock_context + ): + with patch( + "cms_backend.mill.processors.zimfarm_notification.process_book" + ) as mock_process_book: + process_notification(dbsession, notification) + + # Notification should be marked as processed + assert notification.status == "processed" + + # process_book should have been called + mock_process_book.assert_called_once() + called_book = mock_process_book.call_args[0][1] + + # Book should be created with correct properties + assert called_book.id == notification.id + assert called_book.article_count == VALID_NOTIFICATION_CONTENT["article_count"] + assert called_book.media_count == VALID_NOTIFICATION_CONTENT["media_count"] + assert called_book.size == VALID_NOTIFICATION_CONTENT["size"] + assert called_book.zim_metadata == VALID_NOTIFICATION_CONTENT["metadata"] + assert called_book.zimcheck_result == VALID_NOTIFICATION_CONTENT["zimcheck"] + + # Book should have a location in jail warehouse + assert len(called_book.locations) == 1 + location = called_book.locations[0] + assert location.filename == VALID_NOTIFICATION_CONTENT["filename"] + assert location.status == "current" + assert location.warehouse_id == jail_warehouse.id + + # Notification should have events + assert any( + "created from Zimfarm notification" in event for event in called_book.events + ) + + +def test_process_notification_error_handling( + dbsession: OrmSession, + create_zimfarm_notification: Callable[..., ZimfarmNotification], +): + """Test that errors during processing are caught and status is set to errored""" + notification = create_zimfarm_notification(content=VALID_NOTIFICATION_CONTENT) + dbsession.flush() + + with patch( + "cms_backend.mill.processors.zimfarm_notification.process_book", + side_effect=Exception("Test error"), + ): + process_notification(dbsession, notification) + + assert notification.status == "errored" + assert any( + "error encountered while processing notification" in event + for event in notification.events + ) diff --git a/backend/tests/processors/test_book.py b/backend/tests/processors/test_book.py deleted file mode 100644 index e7823bf..0000000 --- a/backend/tests/processors/test_book.py +++ /dev/null @@ -1,422 +0,0 @@ -import re -from collections.abc import Callable -from typing import Any - -import pytest -from sqlalchemy.orm import Session as OrmSession - -from cms_backend.db.models import Book, BookLocation, Title, WarehousePath -from cms_backend.processors.book import ( - _current_locations_match_targets, # pyright: ignore[reportPrivateUsage] - check_book_qa, - get_matching_title, -) - -MINIMUM_ZIM_METADATA = { - "Name": "test_en_all", - "Title": "A superb ZIM", - "Creator": "openZIM", - "Publisher": "openZIM", - "Date": "2025-10-01", - "Description": "About a super content", - "Language": "eng", -} - -# minimum set of metadata + a long description + a flavour + a random one -GOOD_ZIM_METADATA = { - **MINIMUM_ZIM_METADATA, - "Long description": "X" * 100, - "Flavour": "nopic", - "Foo": "Bar", -} - - -def test_check_book_qa_ok(create_book: Callable[..., Book]): - """Check book QA - all is good""" - - book = create_book(zim_metadata=GOOD_ZIM_METADATA) - assert len(book.events) == 0 - book_qa = check_book_qa(book=book) - assert book_qa is True - assert any( - event for event in book.events if re.match(".*: book passed QA checks", event) - ) - - -@pytest.mark.parametrize( - "missing_key, book_metadata", - [ - pytest.param( - missing_key, - { - key: value - for key, value in GOOD_ZIM_METADATA.items() - if key != missing_key - }, - id=f"missing-{missing_key}", - ) - for missing_key in MINIMUM_ZIM_METADATA.keys() - ], -) -def test_check_book_qa_missing_mandatory( - create_book: Callable[..., Book], missing_key: str, book_metadata: dict[str, Any] -): - """Check book QA - one mandatory key is missing""" - - book = create_book(zim_metadata=book_metadata) - assert len(book.events) == 0 - book_qa = check_book_qa(book=book) - assert book_qa is False - assert any( - event - for event in book.events - if re.match(f".*: book is missing mandatory metadata: {missing_key}", event) - ) - - -def test_check_book_qa_missing_multiple_mandatory(create_book: Callable[..., Book]): - """Check book QA - one mandatory key is missing""" - - book = create_book( - zim_metadata={ - key: value - for key, value in GOOD_ZIM_METADATA.items() - if key not in ("Description", "Title") - } - ) - assert len(book.events) == 0 - book_qa = check_book_qa(book=book) - assert book_qa is False - assert any( - event - for event in book.events - if re.match(".*: book is missing mandatory metadata: Description,Title", event) - ) - - -def test_check_book_qa_bad_error(create_book: Callable[..., Book]): - """Get matching title for a given book - bad error occurs""" - - book = create_book(zim_metadata=GOOD_ZIM_METADATA) - assert len(book.events) == 0 - # simulate a very bad error by dropping an expected property (and adding it back so - # that SQLAlchemy does not choke) - save_metadata = book.zim_metadata - del book.zim_metadata - book_qa = check_book_qa(book=book) - book.zim_metadata = save_metadata - assert book_qa is False - assert any( - event - for event in book.events - if re.match(".*: error encountered while checking book QA", event) - ) - - -def test_get_matching_title_found( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Get matching title for a given book - title exist""" - - book = create_book(name=title.name, producer_unique_id=title.producer_unique_id) - assert len(book.events) == 0 - assert len(title.events) == 0 - matching_title = get_matching_title(dbsession, book=book) - assert matching_title == title - assert any( - event for event in book.events if re.match(".*: found matching title .*", event) - ) - assert len(title.events) == 0 - - -def test_get_matching_title_not_found( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Get matching title for a given book - title does not exist""" - - book_name = "test2_fr_all" - assert book_name != title.name - book = create_book(name=book_name) - assert len(book.events) == 0 - assert len(title.events) == 0 - matching_title = get_matching_title(dbsession, book=book) - assert matching_title is None - assert any( - event - for event in book.events - if re.match(".*: no matching title found for book", event) - ) - assert len(title.events) == 0 - - -def test_get_matching_title_no_name( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Get matching title for a given book - book has an empty Name""" - - book_name = "" - assert book_name != title.name - book = create_book(name=book_name) - assert len(book.events) == 0 - assert len(title.events) == 0 - matching_title = get_matching_title(dbsession, book=book) - assert matching_title is None - assert any( - event - for event in book.events - if re.match(".*: no title can be found because name is missing", event) - ) - assert len(title.events) == 0 - - -def test_get_matching_title_bad_error( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Get matching title for a given book - bad error occurs""" - - book = create_book(name=title.name) - assert len(book.events) == 0 - assert len(title.events) == 0 - # simulate a very bad error by dropping an expected property (and adding it back so - # that SQLAlchemy does not choke) - save_producer_unique_id = book.producer_unique_id - del book.producer_unique_id - matching_title = get_matching_title(dbsession, book=book) - book.producer_unique_id = save_producer_unique_id - assert matching_title is None - assert any( - event - for event in book.events - if re.match(".*: error encountered while get matching title", event) - ) - assert len(title.events) == 0 - - -class TestCurrentLocationsMatchTargets: - """Test the _current_locations_match_targets helper function.""" - - def test_exact_match_single_location( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Book with single current location matching single target should return - True.""" - book = create_book() - warehouse_path = create_warehouse_path() - - # Add current location - current_location = BookLocation( - book_id=book.id, - status="current", - filename="test_book_2024-01.zim", - ) - current_location.warehouse_path_id = warehouse_path.id - book.locations.append(current_location) - dbsession.add(current_location) - dbsession.flush() - - # Target matches current - target_locations = [(warehouse_path.id, "test_book_2024-01.zim")] - - assert _current_locations_match_targets(book, target_locations) is True - - def test_exact_match_multiple_locations( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Book with multiple locations matching all targets should return True.""" - book = create_book() - path1 = create_warehouse_path(folder_name="path1") - path2 = create_warehouse_path(folder_name="path2") - - # Add current locations - loc1 = BookLocation( - book_id=book.id, - status="current", - filename="test_book_2024-01.zim", - ) - loc1.warehouse_path_id = path1.id - loc2 = BookLocation( - book_id=book.id, - status="current", - filename="test_book_2024-01.zim", - ) - loc2.warehouse_path_id = path2.id - book.locations.extend([loc1, loc2]) - dbsession.add(loc1) - dbsession.add(loc2) - dbsession.flush() - - # Targets match all currents - target_locations = [ - (path1.id, "test_book_2024-01.zim"), - (path2.id, "test_book_2024-01.zim"), - ] - - assert _current_locations_match_targets(book, target_locations) is True - - def test_no_match_different_filenames( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Book with different filename than target should return False.""" - book = create_book() - warehouse_path = create_warehouse_path() - - # Add current location with different filename - current_location = BookLocation( - book_id=book.id, - status="current", - filename="old_filename_2024-01.zim", - ) - current_location.warehouse_path_id = warehouse_path.id - book.locations.append(current_location) - dbsession.add(current_location) - dbsession.flush() - - # Target has different filename - target_locations = [(warehouse_path.id, "test_book_2024-01.zim")] - - assert _current_locations_match_targets(book, target_locations) is False - - def test_no_match_different_warehouse_path( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Book at different warehouse than target should return False.""" - book = create_book() - path1 = create_warehouse_path(folder_name="path1") - path2 = create_warehouse_path(folder_name="path2") - - # Add current location at path1 - current_location = BookLocation( - book_id=book.id, - status="current", - filename="test_book_2024-01.zim", - ) - current_location.warehouse_path_id = path1.id - book.locations.append(current_location) - dbsession.add(current_location) - dbsession.flush() - - # Target specifies path2 - target_locations = [(path2.id, "test_book_2024-01.zim")] - - assert _current_locations_match_targets(book, target_locations) is False - - def test_no_match_subset_current_locations( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Book with only 1 current location should not match 2 target locations.""" - book = create_book() - path1 = create_warehouse_path(folder_name="path1") - path2 = create_warehouse_path(folder_name="path2") - - # Add current location at only path1 - current_location = BookLocation( - book_id=book.id, - status="current", - filename="test_book_2024-01.zim", - ) - current_location.warehouse_path_id = path1.id - book.locations.append(current_location) - dbsession.add(current_location) - dbsession.flush() - - # Targets specify both paths - target_locations = [ - (path1.id, "test_book_2024-01.zim"), - (path2.id, "test_book_2024-01.zim"), - ] - - assert _current_locations_match_targets(book, target_locations) is False - - def test_no_match_superset_current_locations( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Book with more current locations than targets should return False.""" - book = create_book() - path1 = create_warehouse_path(folder_name="path1") - path2 = create_warehouse_path(folder_name="path2") - - # Add current locations at both paths - loc1 = BookLocation( - book_id=book.id, - status="current", - filename="test_book_2024-01.zim", - ) - loc1.warehouse_path_id = path1.id - loc2 = BookLocation( - book_id=book.id, - status="current", - filename="test_book_2024-01.zim", - ) - loc2.warehouse_path_id = path2.id - book.locations.extend([loc1, loc2]) - dbsession.add(loc1) - dbsession.add(loc2) - dbsession.flush() - - # Targets specify only one path - target_locations = [(path1.id, "test_book_2024-01.zim")] - - assert _current_locations_match_targets(book, target_locations) is False - - def test_no_match_empty_current_locations( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Book with no current locations should not match non-empty targets.""" - book = create_book() - warehouse_path = create_warehouse_path() - - # Book has no locations - dbsession.flush() - - # Targets specify locations - target_locations = [(warehouse_path.id, "test_book_2024-01.zim")] - - assert _current_locations_match_targets(book, target_locations) is False - - def test_ignores_target_status_locations( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_warehouse_path: Callable[..., WarehousePath], - ): - """Helper should ignore target status locations and only check current.""" - book = create_book() - warehouse_path = create_warehouse_path() - - # Add only target location (no current locations) - target_location = BookLocation( - book_id=book.id, - status="target", - filename="test_book_2024-01.zim", - ) - target_location.warehouse_path_id = warehouse_path.id - book.locations.append(target_location) - dbsession.add(target_location) - dbsession.flush() - - # Targets specify locations - target_locations = [(warehouse_path.id, "test_book_2024-01.zim")] - - # Should return False because there are no current locations - assert _current_locations_match_targets(book, target_locations) is False diff --git a/backend/tests/processors/test_book_location_integration.py b/backend/tests/processors/test_book_location_integration.py deleted file mode 100644 index 9f3bf92..0000000 --- a/backend/tests/processors/test_book_location_integration.py +++ /dev/null @@ -1,633 +0,0 @@ -"""Integration tests for book location workflow.""" - -from collections.abc import Callable -from typing import Any - -import pytest -from sqlalchemy.orm import Session as OrmSession - -from cms_backend.db.models import ( - Book, - BookLocation, - Title, - Warehouse, - WarehousePath, - ZimfarmNotification, -) -from cms_backend.processors.zimfarm_notification import process_notification - -GOOD_ZIM_METADATA = { - "Name": "test_en_all", - "Title": "Test Title", - "Creator": "Test", - "Publisher": "Test", - "Date": "2024-01-15", - "Description": "Test Description", - "Language": "eng", - "Flavour": None, -} - -GOOD_PRODUCER = { - "displayName": "farm.openzim.org: test_en_all", - "displayUrl": "https://farm.openzim.org/recipes/test_en_all", - "uniqueId": "550e8400-e29b-41d4-a716-446655440000", -} - - -@pytest.fixture -def warehouse_setup( - dbsession: OrmSession, # noqa: ARG001 - create_warehouse: Callable[..., Warehouse], - create_warehouse_path: Callable[..., WarehousePath], -) -> dict[str, Any]: - """Set up warehouse and warehouse paths for testing.""" - dev_warehouse = create_warehouse(name="dev-warehouse") - prod_warehouse = create_warehouse(name="prod-warehouse") - - dev_path = create_warehouse_path( - warehouse=dev_warehouse, - folder_name="dev-zim", - ) - prod_path = create_warehouse_path( - warehouse=prod_warehouse, - folder_name="prod-zim", - ) - - return { - "dev_warehouse": dev_warehouse, - "prod_warehouse": prod_warehouse, - "dev_path": dev_path, - "prod_path": prod_path, - } - - -@pytest.fixture -def good_notification_content( - warehouse_setup: dict[str, Any], # noqa: ARG001 -) -> dict[str, Any]: - """Create good notification content with warehouse info.""" - return { - "article_count": 100, - "media_count": 50, - "size": 1024000, - "metadata": GOOD_ZIM_METADATA, - "zimcheck": {"status": "pass"}, - "warehouse_name": "dev-warehouse", - "folder_name": "dev-zim", - "filename": "test_en_all_2024-01-15.zim", - "producer": GOOD_PRODUCER, - } - - -class TestBookLocationCreation: - """Test book location creation during notification processing.""" - - def test_book_gets_current_location_from_notification( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - good_notification_content: dict[str, Any], - ): - """Book created from notification should have current location.""" - notification = create_zimfarm_notification(content=good_notification_content) - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - assert notification.book is not None - - book = notification.book - current_locations = [loc for loc in book.locations if loc.status == "current"] - - assert len(current_locations) == 1 - assert current_locations[0].filename == "test_en_all_2024-01-15.zim" - assert current_locations[0].warehouse_path.warehouse.name == "dev-warehouse" - assert current_locations[0].warehouse_path.folder_name == "dev-zim" - - def test_notification_without_warehouse_path_fails( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - ): - """Notification with non-existent warehouse path should fail.""" - bad_content = { - "article_count": 100, - "media_count": 50, - "size": 1024000, - "metadata": GOOD_ZIM_METADATA, - "zimcheck": {"status": "pass"}, - "warehouse_name": "non-existent-warehouse", - "folder_name": "non-existent-folder", - "filename": "test.zim", - "producer": GOOD_PRODUCER, - } - notification = create_zimfarm_notification(content=bad_content) - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "bad_notification" - assert any("warehouse path not found" in event for event in notification.events) - - -class TestTargetLocationCreation: - """Test target location creation when book is added to title.""" - - def test_target_locations_created_for_dev_title( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - good_notification_content: dict[str, Any], - ): - """When book is added to dev title, target locations should use dev paths.""" - dev_path = warehouse_setup["dev_path"] - - # Create dev title - create_title( - name="test_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - notification = create_zimfarm_notification(content=good_notification_content) - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.book is not None - - book = notification.book - target_locations = [loc for loc in book.locations if loc.status == "target"] - - assert len(target_locations) == 1 - assert target_locations[0].warehouse_path_id == dev_path.id - # Target filename should be computed - assert "2024-01" in target_locations[0].filename - assert target_locations[0].filename.endswith(".zim") - - def test_target_locations_created_for_prod_title( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - good_notification_content: dict[str, Any], - ): - """When book is added to prod title, target locations should use prod paths.""" - prod_path = warehouse_setup["prod_path"] - - # Create prod title - create_title( - name="test_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[], - prod_warehouse_path_ids=[prod_path.id], - in_prod=True, - ) - - notification = create_zimfarm_notification(content=good_notification_content) - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.book is not None - - book = notification.book - target_locations = [loc for loc in book.locations if loc.status == "target"] - - assert len(target_locations) == 1 - assert target_locations[0].warehouse_path_id == prod_path.id - - def test_multiple_target_locations_for_multiple_paths( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - create_warehouse_path: Callable[..., WarehousePath], - good_notification_content: dict[str, Any], - ): - """Title with multiple warehouse paths - - This should create multiple target locations. - """ - dev_path_1 = warehouse_setup["dev_path"] - dev_path_2 = create_warehouse_path( - warehouse=warehouse_setup["dev_warehouse"], - folder_name="dev-zim-backup", - ) - - # Create title with multiple dev paths - create_title( - name="test_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path_1.id, dev_path_2.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - notification = create_zimfarm_notification(content=good_notification_content) - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.book is not None - - book = notification.book - target_locations = [loc for loc in book.locations if loc.status == "target"] - - assert len(target_locations) == 2 - target_warehouse_path_ids = {loc.warehouse_path_id for loc in target_locations} - assert target_warehouse_path_ids == {dev_path_1.id, dev_path_2.id} - - -class TestTargetFilenameComputation: - """Test that target filenames are computed correctly.""" - - def test_target_filename_basic_format( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - good_notification_content: dict[str, Any], - ): - """Target filename should follow pattern {name}_{period}.zim.""" - dev_path = warehouse_setup["dev_path"] - - create_title( - name="test_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - notification = create_zimfarm_notification(content=good_notification_content) - process_notification(dbsession, notification) - - dbsession.flush() - - assert notification.book - - target_locations = [ - loc for loc in notification.book.locations if loc.status == "target" - ] - - assert len(target_locations) == 1 - # Should match pattern: test_en_all_2024-01.zim - assert target_locations[0].filename == "test_en_all_2024-01.zim" - - def test_target_filename_with_flavour( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - ): - """Target filename should include flavour if present.""" - dev_path = warehouse_setup["dev_path"] - - metadata_with_flavour = { - "Name": "wikipedia_en_all", - "Title": "Wikipedia EN All", - "Creator": "Test", - "Publisher": "Test", - "Date": "2024-02-20", - "Description": "Test Description", - "Language": "eng", - "Flavour": "maxi", - } - - content = { - "article_count": 100, - "media_count": 50, - "size": 1024000, - "metadata": metadata_with_flavour, - "zimcheck": {"status": "pass"}, - "warehouse_name": "dev-warehouse", - "folder_name": "dev-zim", - "filename": "wikipedia_en_all_maxi_2024-02-20.zim", - "producer": GOOD_PRODUCER, - } - - create_title( - name="wikipedia_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - notification = create_zimfarm_notification(content=content) - process_notification(dbsession, notification) - - dbsession.flush() - - assert notification.book - - target_locations = [ - loc for loc in notification.book.locations if loc.status == "target" - ] - - assert len(target_locations) == 1 - assert target_locations[0].filename == "wikipedia_en_all_maxi_2024-02.zim" - - def test_target_filename_collision_handling( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_book_location: Callable[..., BookLocation], - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - good_notification_content: dict[str, Any], - ): - """Target filename should get suffix if collision exists.""" - dev_path = warehouse_setup["dev_path"] - - create_title( - name="test_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - # Create existing book with target location to cause collision - existing_book = create_book( - name="test_en_all", - date="2024-01-10", - flavour=None, - ) - create_book_location( - book=existing_book, - warehouse_path=dev_path, - filename="test_en_all_2024-01.zim", - status="target", - ) - - # Process new notification for same period - notification = create_zimfarm_notification(content=good_notification_content) - process_notification(dbsession, notification) - - dbsession.flush() - - assert notification.book - - target_locations = [ - loc for loc in notification.book.locations if loc.status == "target" - ] - - assert len(target_locations) == 1 - # Should get letter suffix to avoid collision - assert target_locations[0].filename == "test_en_all_2024-01a.zim" - - -class TestTargetLocationOptimization: - """Test that target locations are skipped when they match current locations.""" - - def test_no_target_when_current_matches_single_path( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_book_location: Callable[..., BookLocation], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - ): - """When book current location exactly matches target, no target should - be created.""" - dev_path = warehouse_setup["dev_path"] - - # Create a book manually with a current location that will match the computed - # target - book = create_book( - name="wikipedia_fr_all", - date="2024-02-15", - flavour=None, - producer_unique_id=GOOD_PRODUCER["uniqueId"], - ) - - # Add current location with target-style filename - create_book_location( - book=book, - warehouse_path=dev_path, - filename="wikipedia_fr_all_2024-02.zim", - status="current", - ) - - # Create title with same warehouse path - title = create_title( - name="wikipedia_fr_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - dbsession.flush() - - # Add book to title - should skip target creation - from cms_backend.processors.title import add_book_to_title - - add_book_to_title(dbsession, book, title) - dbsession.flush() - - current_locations = [loc for loc in book.locations if loc.status == "current"] - target_locations = [loc for loc in book.locations if loc.status == "target"] - - # Should have current location - assert len(current_locations) == 1 - assert current_locations[0].warehouse_path_id == dev_path.id - assert current_locations[0].filename == "wikipedia_fr_all_2024-02.zim" - - # Should NOT have target location (optimization applied) - assert len(target_locations) == 0 - - # Check event log - assert any( - "book already at all target locations" in event for event in book.events - ) - - # Book should be marked as published (no move needed) - assert book.status == "published" - - def test_no_target_when_current_matches_multiple_paths( - self, - dbsession: OrmSession, - create_book: Callable[..., Book], - create_title: Callable[..., Title], - create_book_location: Callable[..., BookLocation], - warehouse_setup: dict[str, Any], - create_warehouse_path: Callable[..., WarehousePath], - ): - dev_path_1 = warehouse_setup["dev_path"] - dev_path_2 = create_warehouse_path( - warehouse=warehouse_setup["dev_warehouse"], - folder_name="dev-zim-backup", - ) - - # Create a book with current locations at both paths - book = create_book( - name="wiktionary_es_all", - date="2024-03-10", - flavour="maxi", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - ) - - # Add current locations at both paths with target-style filename - create_book_location( - book=book, - warehouse_path=dev_path_1, - filename="wiktionary_es_all_maxi_2024-03.zim", - status="current", - ) - create_book_location( - book=book, - warehouse_path=dev_path_2, - filename="wiktionary_es_all_maxi_2024-03.zim", - status="current", - ) - - # Create title with multiple dev paths - title = create_title( - name="wiktionary_es_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path_1.id, dev_path_2.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - dbsession.flush() - - # Add book to title - should skip target creation - from cms_backend.processors.title import add_book_to_title - - add_book_to_title(dbsession, book, title) - dbsession.flush() - - current_locations = [loc for loc in book.locations if loc.status == "current"] - target_locations = [loc for loc in book.locations if loc.status == "target"] - - # Should have 2 current locations - assert len(current_locations) == 2 - current_path_ids = {loc.warehouse_path_id for loc in current_locations} - assert current_path_ids == {dev_path_1.id, dev_path_2.id} - - # Should NOT have target locations (optimization applied) - assert len(target_locations) == 0 - - # Check event log - assert any( - "book already at all target locations" in event for event in book.events - ) - - # Book should be marked as published (no move needed) - assert book.status == "published" - - def test_target_created_when_partial_match( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - create_warehouse_path: Callable[..., WarehousePath], - good_notification_content: dict[str, Any], - ): - """When book only matches some target paths, all targets should be created.""" - dev_path_1 = warehouse_setup["dev_path"] - dev_path_2 = create_warehouse_path( - warehouse=warehouse_setup["dev_warehouse"], - folder_name="dev-zim-backup", - ) - - # Adjust filename to match computed target - good_notification_content["filename"] = "test_en_all_2024-01.zim" - - # Create title with multiple dev paths - create_title( - name="test_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path_1.id, dev_path_2.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - notification = create_zimfarm_notification(content=good_notification_content) - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.book is not None - - book = notification.book - current_locations = [loc for loc in book.locations if loc.status == "current"] - target_locations = [loc for loc in book.locations if loc.status == "target"] - - # Should have 1 current location - assert len(current_locations) == 1 - assert current_locations[0].warehouse_path_id == dev_path_1.id - - # Should have 2 target locations (NO optimization, partial match) - assert len(target_locations) == 2 - target_path_ids = {loc.warehouse_path_id for loc in target_locations} - assert target_path_ids == {dev_path_1.id, dev_path_2.id} - - # Check event log - should NOT have optimization message - assert not any( - "book already at all target locations" in event for event in book.events - ) - - # Book should be marked as pending_move (needs file movement) - assert book.status == "pending_move" - - def test_target_created_when_filename_differs( - self, - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - warehouse_setup: dict[str, Any], - good_notification_content: dict[str, Any], - ): - """When book filename differs from computed target, target should be created.""" - dev_path = warehouse_setup["dev_path"] - - # Keep original filename that differs from computed target - # Original: test_en_all_2024-01-15.zim - # Computed target: test_en_all_2024-01.zim - - create_title( - name="test_en_all", - producer_unique_id=GOOD_PRODUCER["uniqueId"], - dev_warehouse_path_ids=[dev_path.id], - prod_warehouse_path_ids=[], - in_prod=False, - ) - - notification = create_zimfarm_notification(content=good_notification_content) - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.book is not None - - book = notification.book - current_locations = [loc for loc in book.locations if loc.status == "current"] - target_locations = [loc for loc in book.locations if loc.status == "target"] - - # Should have current location with original filename - assert len(current_locations) == 1 - assert current_locations[0].filename == "test_en_all_2024-01-15.zim" - - # Should have target location with computed filename (NO optimization) - assert len(target_locations) == 1 - assert target_locations[0].filename == "test_en_all_2024-01.zim" - - # Check event log - should NOT have optimization message - assert not any( - "book already at all target locations" in event for event in book.events - ) - - # Book should be marked as pending_move (needs file movement) - assert book.status == "pending_move" diff --git a/backend/tests/processors/test_title.py b/backend/tests/processors/test_title.py deleted file mode 100644 index d03ecbf..0000000 --- a/backend/tests/processors/test_title.py +++ /dev/null @@ -1,268 +0,0 @@ -import re -from collections.abc import Callable - -from sqlalchemy.orm import Session as OrmSession - -from cms_backend.db.models import Book, Title -from cms_backend.processors.title import add_book_to_title - - -def test_add_book_to_title_same_name( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Add a book to an existing title with same name""" - - book = create_book(name=title.name, date="2024-01-01") - assert len(book.events) == 0 - assert len(title.events) == 0 - add_book_to_title(session=dbsession, book=book, title=title) - dbsession.flush() - assert book.title == title - assert book.title_id == title.id - assert book in title.books - assert any( - event for event in title.events if re.match(".*: book .* added to title", event) - ) - assert any( - event for event in book.events if re.match(".*: book added to title .*", event) - ) - - -def test_add_book_to_title_different_name( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Add a book to an existing title with different name""" - - book_name = "test2_fr_all" - assert book_name != title.name - book = create_book(name=book_name, date="2024-01-01") - assert len(book.events) == 0 - assert len(title.events) == 0 - add_book_to_title(session=dbsession, book=book, title=title) - dbsession.flush() - assert book.title == title - assert book.title_id == title.id - assert book in title.books - assert title.name == book_name # title name has been overriden - assert any( - event for event in title.events if re.match(".*: book .* added to title", event) - ) - assert any( - event - for event in title.events - if re.match(f".*: updating title name to {book_name}", event) - ) - assert any( - event for event in book.events if re.match(".*: book added to title .*", event) - ) - - -def test_add_book_to_title_empty_name( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Add a book to an existing title with an empty name""" - - book_name = "" - assert book_name != title.name - book = create_book(name=book_name, date="2024-01-01") - assert len(book.events) == 0 - assert len(title.events) == 0 - add_book_to_title(session=dbsession, book=book, title=title) - dbsession.flush() - assert book not in title.books - assert book.title is None - assert book.title_id is None - assert [ - event for event in title.events if re.match(".*: book .* added to title", event) - ] == [] - assert [ - event for event in book.events if re.match(".*: book added to title .*", event) - ] == [] - assert any( - event - for event in title.events - if re.match(".*: error encountered while adding book .*", event) - ) - assert any( - event - for event in book.events - if re.match(".*: error encountered while adding to title .*", event) - ) - - -def test_add_book_to_title_missing_name( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Add a book to an existing title with missing name""" - - book = create_book(name=None, date="2024-01-01") - assert len(book.events) == 0 - assert len(title.events) == 0 - add_book_to_title(session=dbsession, book=book, title=title) - dbsession.flush() - assert book not in title.books - assert book.title is None - assert book.title_id is None - assert [ - event for event in title.events if re.match(".*: book .* added to title", event) - ] == [] - assert [ - event for event in book.events if re.match(".*: book added to title .*", event) - ] == [] - assert any( - event - for event in title.events - if re.match(".*: error encountered while adding book .*", event) - ) - assert any( - event - for event in book.events - if re.match(".*: error encountered while adding to title .*", event) - ) - - -def test_add_book_to_title_bad_error( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Add a book to an existing title which encounters a bad error""" - - book = create_book(name=title.name, date="2024-01-01") - assert len(book.events) == 0 - assert len(title.events) == 0 - - # simulate a very bad error by dropping an expected property (and adding it back so - # that SQLAlchemy does not choke) - save_name = book.name - book.name = None - add_book_to_title(session=dbsession, book=book, title=title) - book.name = save_name - - dbsession.flush() - assert book not in title.books - assert book.title is None - assert book.title_id is None - assert [ - event for event in title.events if re.match(".*: book .* added to title", event) - ] == [] - assert [ - event for event in book.events if re.match(".*: book added to title .*", event) - ] == [] - assert any( - event - for event in title.events - if re.match(".*: error encountered while adding book .*", event) - ) - assert any( - event - for event in book.events - if re.match(".*: error encountered while adding to title .*", event) - ) - - -def test_add_book_to_title_updates_producer_fields( - dbsession: OrmSession, - create_book: Callable[..., Book], - create_title: Callable[..., Title], -): - """Add a book to a title updates producer display fields""" - - # Create a title without producer display fields (only unique_id) - title = create_title( - name="test_en_all", - producer_unique_id="550e8400-e29b-41d4-a716-446655440000", - producer_display_name=None, - producer_display_url=None, - ) - - # Create a book with complete producer information - book = create_book( - name=title.name, - date="2024-01-01", - producer_unique_id=title.producer_unique_id, - producer_display_name="farm.openzim.org: test_en_all", - producer_display_url="https://farm.openzim.org/recipes/test_en_all", - ) - - assert title.producer_display_name is None - assert title.producer_display_url is None - assert len(book.events) == 0 - assert len(title.events) == 0 - - add_book_to_title(session=dbsession, book=book, title=title) - dbsession.flush() - - # Verify producer fields were updated - assert title.producer_display_name == book.producer_display_name - assert title.producer_display_url == book.producer_display_url - assert any( - event - for event in title.events - if re.match(".*: updating title producer_display_name to .*", event) - ) - assert any( - event - for event in title.events - if re.match(".*: updating title producer_display_url to .*", event) - ) - - -def test_add_book_to_title_missing_date( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Add a book to an existing title with missing date""" - - book = create_book(name=title.name, date=None) - assert len(book.events) == 0 - assert len(title.events) == 0 - add_book_to_title(session=dbsession, book=book, title=title) - dbsession.flush() - assert book not in title.books - assert book.title is None - assert book.title_id is None - assert [ - event for event in title.events if re.match(".*: book .* added to title", event) - ] == [] - assert [ - event for event in book.events if re.match(".*: book added to title .*", event) - ] == [] - assert any( - event - for event in title.events - if re.match(".*: error encountered while adding book .*", event) - ) - assert any( - event - for event in book.events - if re.match(".*: error encountered while adding to title .*", event) - ) - - -def test_add_book_to_title_empty_date( - dbsession: OrmSession, create_book: Callable[..., Book], title: Title -): - """Add a book to an existing title with empty date""" - - book = create_book(name=title.name, date="") - assert len(book.events) == 0 - assert len(title.events) == 0 - add_book_to_title(session=dbsession, book=book, title=title) - dbsession.flush() - assert book not in title.books - assert book.title is None - assert book.title_id is None - assert [ - event for event in title.events if re.match(".*: book .* added to title", event) - ] == [] - assert [ - event for event in book.events if re.match(".*: book added to title .*", event) - ] == [] - assert any( - event - for event in title.events - if re.match(".*: error encountered while adding book .*", event) - ) - assert any( - event - for event in book.events - if re.match(".*: error encountered while adding to title .*", event) - ) diff --git a/backend/tests/processors/test_zimfarm_notification.py b/backend/tests/processors/test_zimfarm_notification.py deleted file mode 100644 index fb67460..0000000 --- a/backend/tests/processors/test_zimfarm_notification.py +++ /dev/null @@ -1,597 +0,0 @@ -import re -from collections.abc import Callable -from typing import Any - -import pytest -from sqlalchemy.orm import Session as OrmSession -from tests.processors.test_book import GOOD_ZIM_METADATA - -from cms_backend.db.models import Book, Title, ZimfarmNotification -from cms_backend.processors.zimfarm_notification import process_notification - -GOOD_PRODUCER = { - "displayName": "farm.openzim.org: test_en_all", - "displayUrl": "https://farm.openzim.org/recipes/test_en_all", - "uniqueId": "550e8400-e29b-41d4-a716-446655440000", -} - -GOOD_NOTIFICATION_CONTENT = { - "article_count": 100, - "media_count": 50, - "size": 1024000, - "metadata": GOOD_ZIM_METADATA, - "zimcheck": {"status": "pass"}, - "warehouse_name": "test_warehouse", - "folder_name": "test_folder", - "filename": "test.zim", - "producer": GOOD_PRODUCER, -} - - -def test_process_notification_success( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_title: Callable[..., Title], - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification successfully - all steps work""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - # Create title with matching producer_unique_id - title = create_title( - name="test_en_all", producer_unique_id=GOOD_PRODUCER["uniqueId"] - ) - - notification = create_zimfarm_notification(content=GOOD_NOTIFICATION_CONTENT) - assert len(notification.events) == 0 - assert len(title.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - assert notification.book is not None - assert notification.book.title == title - assert notification.book.title_id == title.id - assert notification.book.status == "pending_move" - assert any( - event - for event in notification.events - if re.match(".*: notification transformed into book", event) - ) - assert any( - event - for event in notification.book.events - if re.match(".*: created from Zimfarm notification", event) - ) - assert any( - event - for event in notification.book.events - if re.match(".*: book passed QA checks", event) - ) - assert any( - event - for event in notification.book.events - if re.match(".*: found matching title .*", event) - ) - assert any( - event - for event in notification.book.events - if re.match(".*: book added to title .*", event) - ) - assert any( - event for event in title.events if re.match(".*: book .* added to title", event) - ) - - -@pytest.mark.parametrize( - "missing_key, notification_content", - [ - pytest.param( - missing_key, - { - key: value - for key, value in GOOD_NOTIFICATION_CONTENT.items() - if key != missing_key - }, - id=f"missing-{missing_key}", - ) - for missing_key in [ - "article_count", - "media_count", - "size", - "metadata", - "zimcheck", - "warehouse_name", - "folder_name", - "filename", - "producer", - ] - ], -) -def test_process_notification_missing_mandatory_key( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - missing_key: str, - notification_content: dict[str, Any], -): - """Process notification with missing mandatory key in content""" - - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "bad_notification" - assert notification.book is None - assert any( - event - for event in notification.events - if re.match(f".*: notification is missing mandatory keys: {missing_key}", event) - ) - - -def test_process_notification_missing_multiple_mandatory_keys( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], -): - """Process notification with multiple missing mandatory keys in content""" - - notification_content = { - key: value - for key, value in GOOD_NOTIFICATION_CONTENT.items() - if key not in ["article_count", "size", "filename"] - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "bad_notification" - - assert notification.book is None - assert any( - event - for event in notification.events - if re.match( - ".*: notification is missing mandatory keys: article_count,size,filename", - event, - ) - ) - - -def test_process_notification_qa_check_fails( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - title: Title, # noqa: ARG001 - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification where book QA check fails due to missing metadata""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - # Create notification with metadata missing the Creator field - incomplete_metadata = { - key: value for key, value in GOOD_ZIM_METADATA.items() if key != "Creator" - } - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "metadata": incomplete_metadata, - "zim_metadata": incomplete_metadata, - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - - assert notification.book is not None - # Book was created but not added to title because QA failed - assert notification.book.title is None - assert notification.book.title_id is None - assert notification.book.status == "qa_failed" - assert any( - event - for event in notification.book.events - if re.match(".*: book is missing mandatory metadata: Creator", event) - ) - # Should not have these events since QA failed - assert not any( - event - for event in notification.book.events - if re.match(".*: book passed QA checks", event) - ) - - -def test_process_notification_no_matching_title( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - title: Title, # noqa: ARG001 - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification where no matching title exists in database""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - # Use a different name that doesn't match the existing title - different_metadata = {**GOOD_ZIM_METADATA, "Name": "different_title_name"} - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "metadata": different_metadata, - "zim_metadata": different_metadata, - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - - assert notification.book is not None - # Book was created and passed QA but not added to title - assert notification.book.title is None - assert notification.book.title_id is None - assert notification.book.status == "pending_title" - assert any( - event - for event in notification.book.events - if re.match(".*: book passed QA checks", event) - ) - assert any( - event - for event in notification.book.events - if re.match(".*: no matching title found for book", event) - ) - # Should not have this event since title was not found - assert not any( - event - for event in notification.book.events - if re.match(".*: book added to title .*", event) - ) - - -def test_process_notification_missing_name( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - title: Title, # noqa: ARG001 - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification where book metadata has no Name field""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - # Remove Name from metadata - no_name_metadata = { - key: value for key, value in GOOD_ZIM_METADATA.items() if key != "Name" - } - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "metadata": no_name_metadata, - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - - assert notification.book is not None - # Book was created but QA check fails because Name is missing - assert notification.book.title is None - assert notification.book.title_id is None - assert notification.book.status == "qa_failed" - # Name is a mandatory metadata field, so QA check should fail - assert any( - event - for event in notification.book.events - if re.match(".*: book is missing mandatory metadata: Name", event) - ) - - -def test_process_notification_exception_handling( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - title: Title, # noqa: ARG001 -): - """Process notification with unexpected exception during processing""" - - notification = create_zimfarm_notification(content=GOOD_NOTIFICATION_CONTENT) - assert len(notification.events) == 0 - assert notification.status == "pending" - - # Simulate a very bad error by dropping the content attribute - # (and adding it back so that SQLAlchemy does not choke during flush) - save_content = notification.content - del notification.content - - process_notification(dbsession, notification) - - notification.content = save_content - dbsession.flush() - - assert notification.status == "errored" - - assert any( - event - for event in notification.events - if re.match(".*: error encountered while processing notification", event) - ) - - -def test_process_notification_empty_name( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - title: Title, # noqa: ARG001 - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification where book metadata has empty Name field""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - # Set Name to empty string - empty_name_metadata = {**GOOD_ZIM_METADATA, "Name": ""} - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "metadata": empty_name_metadata, - "zim_metadata": empty_name_metadata, - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - - assert notification.book is not None - # Book was created but not added to title because Name is empty - assert notification.book.title is None - assert notification.book.title_id is None - assert notification.book.status == "qa_failed" - assert any( - event - for event in notification.book.events - if re.match(".*: no title can be found because name is missing", event) - ) - - -def test_process_notification_with_existing_books( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - create_book: Callable[..., Book], - create_title: Callable[..., Title], - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification and add to title that already has books""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - # Create title with matching producer_unique_id - title = create_title( - name="test_en_all", producer_unique_id=GOOD_PRODUCER["uniqueId"] - ) - - # Add an existing book to the title - existing_book = create_book( - zim_metadata={"Name": title.name}, producer_unique_id=title.producer_unique_id - ) - title.books.append(existing_book) - dbsession.flush() - - assert len(title.books) == 1 - - # Now process a new notification - notification = create_zimfarm_notification(content=GOOD_NOTIFICATION_CONTENT) - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - - assert notification.book is not None - assert notification.book.title == title - assert notification.book.status == "pending_move" - assert len(title.books) == 2 - assert existing_book in title.books - assert notification.book in title.books - - -def test_process_notification_producer_not_dict( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], -): - """Process notification where producer is not a dictionary""" - - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "producer": "not a dict", - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "bad_notification" - assert notification.book is None - assert any( - event - for event in notification.events - if re.match(r".*: producer must be a dict\[str, str\]", event) - ) - - -@pytest.mark.parametrize( - "missing_key, producer_content", - [ - pytest.param( - missing_key, - {key: value for key, value in GOOD_PRODUCER.items() if key != missing_key}, - id=f"missing-{missing_key}", - ) - for missing_key in ["displayName", "displayUrl", "uniqueId"] - ], -) -def test_process_notification_producer_missing_key( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - missing_key: str, - producer_content: dict[str, Any], -): - """Process notification with producer missing mandatory key""" - - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "producer": producer_content, - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "bad_notification" - assert notification.book is None - assert any( - event - for event in notification.events - if re.match(f".*: producer is missing mandatory keys: {missing_key}", event) - ) - - -def test_process_notification_producer_missing_multiple_keys( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], -): - """Process notification with producer missing multiple mandatory keys""" - - producer_content = { - key: value - for key, value in GOOD_PRODUCER.items() - if key not in ["displayName", "uniqueId"] - } - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "producer": producer_content, - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "bad_notification" - assert notification.book is None - assert any( - event - for event in notification.events - if re.match( - ".*: producer is missing mandatory keys: displayName,uniqueId", event - ) - ) - - -def test_process_notification_producer_stored_in_book( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - title: Title, # noqa: ARG001 - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification successfully and verify producer fields are stored""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - notification = create_zimfarm_notification(content=GOOD_NOTIFICATION_CONTENT) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "processed" - assert notification.book is not None - - # Verify producer fields are stored correctly - assert notification.book.producer_display_name == GOOD_PRODUCER["displayName"] - assert notification.book.producer_display_url == GOOD_PRODUCER["displayUrl"] - assert notification.book.producer_unique_id == GOOD_PRODUCER["uniqueId"] - - -@pytest.mark.parametrize( - "invalid_filename", - [ - pytest.param(123, id="int"), - pytest.param(None, id="None"), - pytest.param(["file.zim"], id="list"), - pytest.param({"name": "file.zim"}, id="dict"), - pytest.param("", id="empty-string"), - ], -) -def test_process_notification_filename_not_valid_string( - dbsession: OrmSession, - create_zimfarm_notification: Callable[..., ZimfarmNotification], - invalid_filename: Any, - create_warehouse: Callable[..., Any], - create_warehouse_path: Callable[..., Any], -): - """Process notification where filename is not a valid non-empty string""" - - # Create warehouse and warehouse path that match the notification - warehouse = create_warehouse(name="test_warehouse") - create_warehouse_path(folder_name="test_folder", warehouse=warehouse) - - notification_content = { - **GOOD_NOTIFICATION_CONTENT, - "filename": invalid_filename, - } - notification = create_zimfarm_notification(content=notification_content) - assert len(notification.events) == 0 - assert notification.status == "pending" - - process_notification(dbsession, notification) - - dbsession.flush() - assert notification.status == "bad_notification" - assert notification.book is None - assert any( - event - for event in notification.events - if re.match(r".*: filename must be a non-empty string, got \w+", event) - ) diff --git a/backend/tests/shuttle/conftest.py b/backend/tests/shuttle/conftest.py index 2fefc7f..235e1e9 100644 --- a/backend/tests/shuttle/conftest.py +++ b/backend/tests/shuttle/conftest.py @@ -6,7 +6,7 @@ import pytest from sqlalchemy.orm import Session as OrmSession -from cms_backend.db.models import Warehouse, WarehousePath +from cms_backend.db.models import Warehouse @pytest.fixture @@ -25,24 +25,17 @@ def temp_warehouse_dirs(tmp_path: Path) -> dict[str, Path]: @pytest.fixture -def create_warehouse_with_path( +def create_warehouse_simple( dbsession: OrmSession, -) -> Callable[..., tuple[Warehouse, WarehousePath]]: - """Factory to create a warehouse and its path for testing.""" +) -> Callable[..., Warehouse]: + """Factory to create a warehouse for testing.""" def _create( name: str, - folder_name: str = "zims", - ) -> tuple[Warehouse, WarehousePath]: - warehouse = Warehouse(name=name, configuration={}) + ) -> Warehouse: + warehouse = Warehouse(name=name) dbsession.add(warehouse) dbsession.flush() - - warehouse_path = WarehousePath(folder_name=folder_name) - warehouse_path.warehouse = warehouse - dbsession.add(warehouse_path) - dbsession.flush() - - return warehouse, warehouse_path + return warehouse return _create diff --git a/backend/tests/shuttle/test_move_files.py b/backend/tests/shuttle/test_move_files.py index ecba1f2..02108c9 100644 --- a/backend/tests/shuttle/test_move_files.py +++ b/backend/tests/shuttle/test_move_files.py @@ -1,717 +1,258 @@ -"""Tests for the move_files module.""" +"""Tests for shuttle move_files module.""" +from collections.abc import Callable +from contextlib import ExitStack from pathlib import Path -from typing import Any +from unittest.mock import patch from sqlalchemy.orm import Session as OrmSession -from cms_backend.shuttle.context import Context -from cms_backend.shuttle.move_files import move_book_files, move_files - - -class TestMoveFilesEndToEnd: - """End-to-end tests for the move_files function.""" - - def test_move_files_processes_multiple_books_in_order( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test that move_files processes multiple books in created_at order.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", - folder_name="zims", - ) - warehouse_2, warehouse_path_2 = create_warehouse_with_path( - name="warehouse_2", - folder_name="zims", - ) - dbsession.flush() - - # Create test files - source_file_1 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book1.zim" - source_file_2 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book2.zim" - source_file_3 = temp_warehouse_dirs["warehouse_1"] / "zims" / "book3.zim" - for src_file in [source_file_1, source_file_2, source_file_3]: - src_file.parent.mkdir(parents=True, exist_ok=True) - source_file_1.write_text("book1 content") - source_file_2.write_text("book2 content") - source_file_3.write_text("book3 content") - - # Create books with locations - book_1 = create_book() - create_book_location( - book=book_1, - warehouse_path=warehouse_path_1, - filename="book1.zim", - status="current", - ) - create_book_location( - book=book_1, - warehouse_path=warehouse_path_2, - filename="book1.zim", - status="target", - ) - book_1.status = "pending_move" - - book_2 = create_book() - create_book_location( - book=book_2, - warehouse_path=warehouse_path_1, - filename="book2.zim", - status="current", - ) - create_book_location( - book=book_2, - warehouse_path=warehouse_path_2, - filename="book2.zim", - status="target", - ) - book_2.status = "pending_move" - - book_3 = create_book() - create_book_location( - book=book_3, - warehouse_path=warehouse_path_1, - filename="book3.zim", - status="current", - ) - create_book_location( - book=book_3, - warehouse_path=warehouse_path_2, - filename="book3.zim", - status="target", - ) - book_3.status = "pending_move" - - dbsession.flush() - - warehouse_paths = { - warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), - warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]), - } - - Context.local_warehouse_paths = warehouse_paths - move_files(dbsession) - dbsession.flush() - - # Verify all books processed - assert book_1.status == "published" - assert book_2.status == "published" - assert book_3.status == "published" - - # Verify files moved - target_file_1 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book1.zim" - target_file_2 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book2.zim" - target_file_3 = temp_warehouse_dirs["warehouse_2"] / "zims" / "book3.zim" - - assert target_file_1.read_text() == "book1 content" - assert target_file_2.read_text() == "book2 content" - assert target_file_3.read_text() == "book3 content" - - assert not source_file_1.exists() - assert not source_file_2.exists() - assert not source_file_3.exists() - - -class TestMoveBookFilesSuccess: - """Tests for successful move_book_files scenarios.""" - - def test_move_book_files_simple_move( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test simple move with 1 current and 1 target location.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", - folder_name="zims", - ) - dbsession.flush() - - source_dir = temp_warehouse_dirs["warehouse_1"] / "zims" - source_dir.mkdir(parents=True, exist_ok=True) - source_file = source_dir / "old_book.zim" - source_file.write_text("test content") - - book = create_book() - current_loc = create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="old_book.zim", - status="current", - ) - target_loc = create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="new_book.zim", - status="target", - ) - dbsession.flush() +from cms_backend.db.models import Book, BookLocation, Warehouse +from cms_backend.shuttle.move_files import move_book_files + + +def test_move_book_files_inaccessible_warehouse( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that move_book_files returns early if a warehouse is not accessible""" + warehouse = create_warehouse(name="inaccessible_warehouse") + book = create_book() + dbsession.flush() + + create_book_location(book=book, warehouse_id=warehouse.id) + dbsession.flush() + + # Mock ShuttleContext with empty warehouse paths (no warehouses accessible) + with patch("cms_backend.shuttle.move_files.ShuttleContext") as mock_context: + mock_context.local_warehouse_paths = {} - warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} - Context.local_warehouse_paths = warehouse_paths - move_book_files(dbsession, book) - dbsession.flush() - - target_file = source_dir / "new_book.zim" - assert target_file.exists() - assert target_file.read_text() == "test content" - assert not source_file.exists() - assert book.status == "published" - assert target_loc.status == "current" - assert current_loc not in book.locations - - def test_move_book_files_copy_then_move( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test copying to first target, then moving to second (1→2).""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="source" - ) - warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path( - name="warehouse_2", folder_name="target_a" - ) - warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path( - name="warehouse_2", folder_name="target_b" - ) - dbsession.flush() - - source_dir = temp_warehouse_dirs["warehouse_1"] / "source" - source_dir.mkdir(parents=True, exist_ok=True) - source_file = source_dir / "source.zim" - source_file.write_text("book content") - - book = create_book() - # current_loc - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="source.zim", - status="current", - ) - # target_loc_1 - create_book_location( - book=book, - warehouse_path=warehouse_path_2_a, - filename="target1.zim", - status="target", - ) - # target_loc_2 - create_book_location( - book=book, - warehouse_path=warehouse_path_2_b, - filename="target2.zim", - status="target", - ) - dbsession.flush() - - for subfolder in ["target_a", "target_b"]: - (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir( - parents=True, exist_ok=True - ) - - warehouse_paths = { - warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), - warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]), - warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]), - } - Context.local_warehouse_paths = warehouse_paths move_book_files(dbsession, book) - dbsession.flush() - - target_file_1 = temp_warehouse_dirs["warehouse_2"] / "target_a" / "target1.zim" - target_file_2 = temp_warehouse_dirs["warehouse_2"] / "target_b" / "target2.zim" - assert target_file_1.exists() - assert target_file_1.read_text() == "book content" - assert target_file_2.exists() - assert target_file_2.read_text() == "book content" - assert not source_file.exists() - assert book.status == "published" - - def test_move_book_files_multiple_copies_one_move( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test 1→3: 2 copies then 1 move.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="source" - ) - warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path( - name="warehouse_2", folder_name="target_a" - ) - warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path( - name="warehouse_2", folder_name="target_b" - ) - warehouse_2_c, warehouse_path_2_c = create_warehouse_with_path( - name="warehouse_2", folder_name="target_c" - ) - dbsession.flush() - - source_dir = temp_warehouse_dirs["warehouse_1"] / "source" - source_dir.mkdir(parents=True, exist_ok=True) - source_file = source_dir / "source.zim" - source_file.write_text("book content") - - book = create_book() - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="source.zim", - status="current", - ) - create_book_location( - book=book, - warehouse_path=warehouse_path_2_a, - filename="target1.zim", - status="target", - ) - create_book_location( - book=book, - warehouse_path=warehouse_path_2_b, - filename="target2.zim", - status="target", - ) - create_book_location( - book=book, - warehouse_path=warehouse_path_2_c, - filename="target3.zim", - status="target", - ) - dbsession.flush() - - for subfolder in ["target_a", "target_b", "target_c"]: - (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir( - parents=True, exist_ok=True - ) - - warehouse_paths = { - warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), - warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]), - warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]), - warehouse_2_c.id: str(temp_warehouse_dirs["warehouse_2"]), - } - Context.local_warehouse_paths = warehouse_paths + + # Book should not be modified (status should remain default "pending_processing") + assert book.status == "pending_processing" + # No events should be added (the code just logs and returns early) + assert len(book.events) == 0 + + +def test_move_book_files_no_current_location( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_warehouse: Callable[..., Warehouse], +): + """Test that move_book_files errors if there's no current location""" + warehouse = create_warehouse() + book = create_book() + dbsession.flush() + + # Create a target location but no current location + book_location = BookLocation( + book_id=book.id, + warehouse_id=warehouse.id, + path=Path("target_path"), + filename="test.zim", + status="target", + ) + dbsession.add(book_location) + dbsession.flush() + + with patch("cms_backend.shuttle.move_files.ShuttleContext") as mock_context: + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} + move_book_files(dbsession, book) - dbsession.flush() - - for i, subfolder in enumerate(["target_a", "target_b", "target_c"], 1): - target_file = ( - temp_warehouse_dirs["warehouse_2"] / subfolder / f"target{i}.zim" - ) - assert target_file.exists() - assert target_file.read_text() == "book content" - - assert not source_file.exists() - current_locs = [loc for loc in book.locations if loc.status == "current"] - assert len(current_locs) == 3 - - -class TestMoveBookFilesErrors: - """Tests for error handling in move_book_files.""" - - def test_move_book_files_no_current_locations( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test that book with no current locations is marked as errored.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="zims" - ) - dbsession.flush() - - book = create_book() - target_loc = create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="target.zim", - status="target", - ) - dbsession.flush() - warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} - Context.local_warehouse_paths = warehouse_paths + assert book.status == "errored" + assert any("no current location" in event for event in book.events) + + +def test_move_book_files_no_target_location( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that move_book_files marks book as published if there's + no target location""" + warehouse = create_warehouse() + book = create_book() + dbsession.flush() + + create_book_location(book=book, warehouse_id=warehouse.id, status="current") + dbsession.flush() + + with patch("cms_backend.shuttle.move_files.ShuttleContext") as mock_context: + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} + move_book_files(dbsession, book) - dbsession.flush() - - assert book.status == "errored" - assert any("no current location" in event for event in book.events) - assert target_loc.status == "target" - - def test_move_book_files_no_target_locations( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test that book with no target locations is marked as published.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="zims" - ) - dbsession.flush() - - source_dir = temp_warehouse_dirs["warehouse_1"] / "zims" - source_dir.mkdir(parents=True, exist_ok=True) - source_file = source_dir / "book.zim" - source_file.write_text("content") - - book = create_book() - # current_loc - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="book.zim", - status="current", - ) - dbsession.flush() - warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} - Context.local_warehouse_paths = warehouse_paths + assert book.status == "published" + assert any("no target location set" in event for event in book.events) + + +def test_move_book_files_copy_operation( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that move_book_files performs copy when more targets than current""" + warehouse = create_warehouse() + book = create_book() + dbsession.flush() + + # One current location + create_book_location( + book=book, warehouse_id=warehouse.id, path="current", status="current" + ) + + # Two target locations + create_book_location( + book=book, warehouse_id=warehouse.id, path="target1", status="target" + ) + create_book_location( + book=book, warehouse_id=warehouse.id, path="target2", status="target" + ) + dbsession.flush() + + with ExitStack() as stack: + mock_context = stack.enter_context( + patch("cms_backend.shuttle.move_files.ShuttleContext") + ) + mock_copy = stack.enter_context(patch("shutil.copy")) + stack.enter_context(patch("shutil.move")) + stack.enter_context(patch("pathlib.Path.mkdir")) + + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} move_book_files(dbsession, book) - dbsession.flush() - - assert book.status == "published" - assert any("no target location" in event for event in book.events) - assert source_file.exists() - - def test_move_book_files_inaccessible_warehouse( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test that book with inaccessible warehouse is skipped.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="zims" - ) - _, warehouse_path_2 = create_warehouse_with_path( - name="warehouse_2", folder_name="zims" - ) - dbsession.flush() - - book = create_book() - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="book.zim", - status="current", - ) - create_book_location( - book=book, - warehouse_path=warehouse_path_2, - filename="book_target.zim", - status="target", - ) - dbsession.flush() - # Only warehouse_1 accessible (warehouse_2 is inaccessible) - warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} - Context.local_warehouse_paths = warehouse_paths + # Should have copied once (target_loc2 > current_loc) + assert mock_copy.call_count == 1 + + assert book.status == "published" + assert any("copied book from" in event for event in book.events) + # One target should now be current + assert sum(1 for loc in book.locations if loc.status == "current") >= 1 + + +def test_move_book_files_move_operation( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that move_book_files performs move when targets equal current""" + warehouse = create_warehouse() + book = create_book() + dbsession.flush() + + # One current location + create_book_location( + book=book, warehouse_id=warehouse.id, path="current", status="current" + ) + + # One target location + create_book_location( + book=book, warehouse_id=warehouse.id, path="target", status="target" + ) + dbsession.flush() + + with ExitStack() as stack: + mock_context = stack.enter_context( + patch("cms_backend.shuttle.move_files.ShuttleContext") + ) + mock_move = stack.enter_context(patch("shutil.move")) + stack.enter_context(patch("pathlib.Path.mkdir")) + + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} move_book_files(dbsession, book) - dbsession.flush() - - # Book should not be processed (status unchanged) - assert book.status == "pending_processing" - - -class TestMoveBookFilesEdgeCases: - """Tests for edge cases in move_book_files.""" - - def test_move_book_files_same_location( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test when current location matches target.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="zims" - ) - dbsession.flush() - - source_dir = temp_warehouse_dirs["warehouse_1"] / "zims" - source_dir.mkdir(parents=True, exist_ok=True) - source_file = source_dir / "book.zim" - source_file.write_text("content") - - book = create_book() - # current_loc - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="book.zim", - status="current", - ) - # target_loc - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="book.zim", - status="target", - ) - dbsession.flush() - warehouse_paths = {warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"])} - Context.local_warehouse_paths = warehouse_paths + # Should have moved once + assert mock_move.call_count == 1 + + assert book.status == "published" + assert any("moved book from" in event for event in book.events) + # Current location should be removed + assert len([loc for loc in book.locations if loc.status == "current"]) == 1 + + +def test_move_book_files_delete_operation( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that move_book_files deletes extra current locations""" + warehouse = create_warehouse() + book = create_book() + dbsession.flush() + + # Two current locations + create_book_location( + book=book, warehouse_id=warehouse.id, path="current1", status="current" + ) + create_book_location( + book=book, warehouse_id=warehouse.id, path="current2", status="current" + ) + + # One target location + create_book_location( + book=book, warehouse_id=warehouse.id, path="target", status="target" + ) + dbsession.flush() + + with ExitStack() as stack: + mock_context = stack.enter_context( + patch("cms_backend.shuttle.move_files.ShuttleContext") + ) + stack.enter_context(patch("shutil.move")) + stack.enter_context(patch("pathlib.Path.mkdir")) + mock_unlink = stack.enter_context(patch("pathlib.Path.unlink")) + + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} move_book_files(dbsession, book) - dbsession.flush() - - assert book.status == "published" - assert source_file.exists() - assert source_file.read_text() == "content" - - def test_move_book_files_events_contain_warehouse_info( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test that events contain warehouse and folder info.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="primary_warehouse", folder_name="main_zims" - ) - warehouse_2, warehouse_path_2 = create_warehouse_with_path( - name="backup_warehouse", folder_name="backup_zims" - ) - dbsession.flush() - - source_dir = temp_warehouse_dirs["warehouse_1"] / "main_zims" - source_dir.mkdir(parents=True, exist_ok=True) - source_file = source_dir / "test_book.zim" - source_file.write_text("content") - - book = create_book() - # current_loc - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="test_book.zim", - status="current", - ) - # target_loc - create_book_location( - book=book, - warehouse_path=warehouse_path_2, - filename="test_book_v2.zim", - status="target", - ) - dbsession.flush() - (temp_warehouse_dirs["warehouse_2"] / "backup_zims").mkdir( - parents=True, exist_ok=True - ) + # Should have deleted one extra location + assert mock_unlink.call_count == 1 - warehouse_paths = { - warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), - warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]), - } - Context.local_warehouse_paths = warehouse_paths - move_book_files(dbsession, book) - dbsession.flush() - - move_events = [e for e in book.events if "moved book from" in e] - assert len(move_events) == 1 - event = move_events[0] - assert "primary_warehouse" in event - assert "main_zims" in event - assert "backup_warehouse" in event - assert "backup_zims" in event - - def test_move_book_files_file_content_preserved( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test that file content is preserved during copy and move.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="source" - ) - warehouse_2_a, warehouse_path_2_a = create_warehouse_with_path( - name="warehouse_2", folder_name="target_a" - ) - warehouse_2_b, warehouse_path_2_b = create_warehouse_with_path( - name="warehouse_2", folder_name="target_b" - ) - dbsession.flush() - - test_content = "This is a test ZIM file content with special chars: éàù\n" * 100 - source_dir = temp_warehouse_dirs["warehouse_1"] / "source" - source_dir.mkdir(parents=True, exist_ok=True) - source_file = source_dir / "source.zim" - source_file.write_text(test_content) - - book = create_book() - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="source.zim", - status="current", - ) - create_book_location( - book=book, - warehouse_path=warehouse_path_2_a, - filename="target1.zim", - status="target", - ) - create_book_location( - book=book, - warehouse_path=warehouse_path_2_b, - filename="target2.zim", - status="target", - ) - dbsession.flush() - - for subfolder in ["target_a", "target_b"]: - (temp_warehouse_dirs["warehouse_2"] / subfolder).mkdir( - parents=True, exist_ok=True - ) - - warehouse_paths = { - warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), - warehouse_2_a.id: str(temp_warehouse_dirs["warehouse_2"]), - warehouse_2_b.id: str(temp_warehouse_dirs["warehouse_2"]), - } - Context.local_warehouse_paths = warehouse_paths - move_book_files(dbsession, book) - dbsession.flush() - - target_file_1 = temp_warehouse_dirs["warehouse_2"] / "target_a" / "target1.zim" - target_file_2 = temp_warehouse_dirs["warehouse_2"] / "target_b" / "target2.zim" - - assert target_file_1.read_text() == test_content - assert target_file_2.read_text() == test_content - - def test_move_book_files_three_current_one_target( - self, - dbsession: OrmSession, - create_book: Any, - create_book_location: Any, - create_warehouse_with_path: Any, - temp_warehouse_dirs: dict[str, Path], - ): - """Test that when there are 3 current and 1 target, one moves and two get - deleted.""" - warehouse_1, warehouse_path_1 = create_warehouse_with_path( - name="warehouse_1", folder_name="zims" - ) - warehouse_2, warehouse_path_2 = create_warehouse_with_path( - name="warehouse_2", folder_name="zims" - ) - warehouse_3, warehouse_path_3 = create_warehouse_with_path( - name="warehouse_3", folder_name="zims" - ) - warehouse_4, warehouse_path_4 = create_warehouse_with_path( - name="warehouse_4", folder_name="zims" - ) - dbsession.flush() - - # Create three current locations with actual files - source_dir_1 = temp_warehouse_dirs["warehouse_1"] / "zims" - source_dir_1.mkdir(parents=True, exist_ok=True) - source_file_1 = source_dir_1 / "book1.zim" - source_file_1.write_text("content 1") - - source_dir_2 = temp_warehouse_dirs["warehouse_2"] / "zims" - source_dir_2.mkdir(parents=True, exist_ok=True) - source_file_2 = source_dir_2 / "book2.zim" - source_file_2.write_text("content 2") - - source_dir_3 = temp_warehouse_dirs["warehouse_1"] / "zims" - source_file_3 = source_dir_3 / "book3.zim" - source_file_3.write_text("content 3") - - book = create_book() - # current_loc_1 - create_book_location( - book=book, - warehouse_path=warehouse_path_1, - filename="book1.zim", - status="current", - ) - # current_loc_2 - create_book_location( - book=book, - warehouse_path=warehouse_path_2, - filename="book2.zim", - status="current", - ) - # current_loc_3 - create_book_location( - book=book, - warehouse_path=warehouse_path_3, - filename="book3.zim", - status="current", - ) - # target_loc - create_book_location( - book=book, - warehouse_path=warehouse_path_4, - filename="book_final.zim", - status="target", - ) - dbsession.flush() + assert book.status == "published" + assert any("deleted old location" in event for event in book.events) + + +def test_move_book_files_updates_book_locations( + dbsession: OrmSession, + create_book: Callable[..., Book], + create_book_location: Callable[..., BookLocation], + create_warehouse: Callable[..., Warehouse], +): + """Test that move_book_files updates target locations to current status""" + warehouse = create_warehouse() + book = create_book() + dbsession.flush() - (temp_warehouse_dirs["warehouse_2"] / "zims").mkdir(parents=True, exist_ok=True) + # One current location + _current_loc = create_book_location( + book=book, warehouse_id=warehouse.id, path="current", status="current" + ) - warehouse_paths = { - warehouse_1.id: str(temp_warehouse_dirs["warehouse_1"]), - warehouse_2.id: str(temp_warehouse_dirs["warehouse_2"]), - warehouse_3.id: str(temp_warehouse_dirs["warehouse_1"]), - warehouse_4.id: str(temp_warehouse_dirs["warehouse_2"]), - } - Context.local_warehouse_paths = warehouse_paths + # One target location + _target_loc = create_book_location( + book=book, warehouse_id=warehouse.id, path="target", status="target" + ) + dbsession.flush() + + with ExitStack() as stack: + mock_context = stack.enter_context( + patch("cms_backend.shuttle.move_files.ShuttleContext") + ) + stack.enter_context(patch("shutil.move")) + stack.enter_context(patch("pathlib.Path.mkdir")) + + mock_context.local_warehouse_paths = {warehouse.id: Path("/warehouse")} move_book_files(dbsession, book) - dbsession.flush() - - # Verify one file moved to target - target_file = temp_warehouse_dirs["warehouse_2"] / "zims" / "book_final.zim" - assert target_file.exists() - assert target_file.read_text() == "content 1" # First current moved - - # Verify other files deleted - assert not source_file_1.exists() # Moved - assert not source_file_2.exists() # Deleted - assert not source_file_3.exists() # Deleted - - # Verify events - move_events = [e for e in book.events if "moved book from" in e] - delete_events = [e for e in book.events if "deleted old location" in e] - assert len(move_events) == 1 - assert len(delete_events) == 2 - - # Verify only one current location remains - current_locations = [loc for loc in book.locations if loc.status == "current"] - assert len(current_locations) == 1 - assert current_locations[0].warehouse_path_id == warehouse_path_4.id - - assert book.status == "published" + + # After move, target should become current + assert _target_loc.status == "current" + # Old current should be removed from book.locations + assert _current_loc not in book.locations diff --git a/dev/README.md b/dev/README.md index 6a12dc1..11d03c8 100644 --- a/dev/README.md +++ b/dev/README.md @@ -60,12 +60,13 @@ docker exec cms_shuttle python /scripts/setup_warehouses.py This script will: - Create warehouse directories in `dev/warehouses/` -- Create corresponding database records (Warehouse and WarehousePath) +- Create corresponding database records (Warehouse) - Print the LOCAL_WAREHOUSE_PATHS configuration (already configured in docker-compose.yml) Current warehouse configuration: - **hidden**: 2 paths (`jail`, `dev`) -- **prod**: 1 path (`other`) +- **prod**: 1 path (`other`, `wikipedia`) +- **client1**: 1 path (`all`) To modify warehouse configuration, edit the `WAREHOUSES_CONFIG` dict in [scripts/setup_warehouses.py](scripts/setup_warehouses.py) and re-run the script. diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml index 714e941..c6e5bb1 100644 --- a/dev/docker-compose.yml +++ b/dev/docker-compose.yml @@ -63,6 +63,10 @@ services: environment: DEBUG: 1 DATABASE_URL: postgresql+psycopg://cms:cmspass@postgresdb:5432/cms + JAIL_WAREHOUSE_ID: 11111111-1111-1111-1111-111111111111 + JAIL_BASE_PATH: jail + STAGING_WAREHOUSE_ID: 11111111-1111-1111-1111-111111111111 + STAGING_BASE_PATH: staging depends_on: postgresdb: condition: service_healthy diff --git a/dev/scripts/setup_collections.py b/dev/scripts/setup_collections.py new file mode 100644 index 0000000..73d1d28 --- /dev/null +++ b/dev/scripts/setup_collections.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +""" +Development collection setup script. + +Creates collection database records. +""" + +import sys +from uuid import UUID + +# Add backend source to path for imports +sys.path.insert(0, "/usr/local/lib/python3.13/site-packages") + +from cms_backend.db import Session +from cms_backend.db.models import Collection + + +# Configuration: Define collections +COLLECTIONS_CONFIG = { + "prod": { + "id": UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"), + "warehouse_id": UUID("22222222-2222-2222-2222-222222222222"), + }, + "client1": { + "id": UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"), + "warehouse_id": UUID("33333333-3333-3333-3333-333333333333"), + }, +} + + +def create_collections(): + """Create collection database records.""" + session = Session() + + try: + for collection_name, config in COLLECTIONS_CONFIG.items(): + print(f"\nProcessing library: {collection_name}") + collection_id = config["id"] + + # Check if library already exists + existing = ( + session.query(Collection).filter(Collection.id == collection_id).first() + ) + + if existing: + print(f" ⊘ Collection '{collection_name}' already exists (skipping)") + continue + + # Create library DB record with predefined ID + collection = Collection( + name=collection_name, warehouse_id=config["warehouse_id"] + ) + collection.id = collection_id + session.add(collection) + session.flush() + print(f" ✓ Created collection '{collection_name}' with ID {collection.id}") + + # Commit all changes + session.commit() + print("\n✓ All database changes committed") + + # Print summary + print("\n" + "=" * 70) + print("Collections created:") + print("=" * 70) + for collection_name, config in COLLECTIONS_CONFIG.items(): + collection_id = config["id"] + print(f" {collection_name}: {collection_id}") + print(f" Warehouse ID: {config['warehouse_id']}") + print(f" Catalog URL: /v1/collections/{collection_name}/catalog.xml") + print(f" or: /v1/collections/{collection_id}/catalog.xml") + print("=" * 70) + + except Exception as e: + session.rollback() + print(f"\n✗ Error: {e}", file=sys.stderr) + raise + finally: + session.close() + + +if __name__ == "__main__": + create_collections() diff --git a/dev/scripts/setup_libraries.py b/dev/scripts/setup_libraries.py deleted file mode 100644 index 3c238eb..0000000 --- a/dev/scripts/setup_libraries.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python3 -""" -Development library setup script. - -Creates library database records with associated warehouse paths. -""" - -import sys -from uuid import UUID - -# Add backend source to path for imports -sys.path.insert(0, "/usr/local/lib/python3.13/site-packages") - -from cms_backend.db import Session -from cms_backend.db.models import Library, LibraryWarehousePath, WarehousePath - - -# Configuration: Define libraries and their warehouse paths -# Libraries group warehouse paths together for catalog generation -LIBRARIES_CONFIG = { - "dev": { - "id": UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"), - # Include dev path from hidden warehouse - "warehouse_path_names": [("hidden", "dev")], - }, - "prod": { - "id": UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"), - # Include all paths from prod warehouse - "warehouse_path_names": [("prod", "other"), ("prod", "wikipedia")], - }, - "client1": { - "id": UUID("cccccccc-cccc-cccc-cccc-cccccccccccc"), - # Include all paths from client1 warehouse - "warehouse_path_names": [("client1", "all")], - }, -} - - -def create_library_structure(): - """Create library database records.""" - session = Session() - - try: - for library_name, config in LIBRARIES_CONFIG.items(): - print(f"\nProcessing library: {library_name}") - library_id = config["id"] - - # Check if library already exists - existing = session.query(Library).filter(Library.id == library_id).first() - - if existing: - print(f" ⊘ Library '{library_name}' already exists (skipping)") - continue - - # Create library DB record with predefined ID - library = Library(name=library_name) - library.id = library_id - session.add(library) - session.flush() - print(f" ✓ Created library '{library_name}' with ID {library.id}") - - # Associate warehouse paths - for warehouse_name, path_name in config["warehouse_path_names"]: - # Find the warehouse path by warehouse name and folder name - warehouse_path = ( - session.query(WarehousePath) - .join(WarehousePath.warehouse) - .filter( - WarehousePath.folder_name == path_name, - WarehousePath.warehouse.has(name=warehouse_name), - ) - .first() - ) - - if not warehouse_path: - print( - f" ⚠ Warehouse path '{warehouse_name}/{path_name}' not found " - f"(run setup_warehouses.py first)" - ) - continue - - # Create library-warehouse path association - lwp = LibraryWarehousePath() - lwp.warehouse_path_id = warehouse_path.id - library.warehouse_paths.append(lwp) - session.flush() - print( - f" ✓ Added path '{warehouse_name}/{path_name}' " - f"(ID: {warehouse_path.id})" - ) - - # Commit all changes - session.commit() - print("\n✓ All database changes committed") - - # Print summary - print("\n" + "=" * 70) - print("Libraries created:") - print("=" * 70) - for library_name, config in LIBRARIES_CONFIG.items(): - paths = ", ".join(f"{w}/{p}" for w, p in config["warehouse_path_names"]) - print(f" {library_name}: {paths}") - print(f" Catalog URL: /v1/libraries/{library_name}/catalog.xml") - print("=" * 70) - - except Exception as e: - session.rollback() - print(f"\n✗ Error: {e}", file=sys.stderr) - raise - finally: - session.close() - - -if __name__ == "__main__": - create_library_structure() diff --git a/dev/scripts/setup_notifications.py b/dev/scripts/setup_notifications.py index 4cf683f..230a650 100644 --- a/dev/scripts/setup_notifications.py +++ b/dev/scripts/setup_notifications.py @@ -53,14 +53,8 @@ "Illustration_48x48@1": FAVICON_BLUE, }, "zimcheck": {"status": "pass"}, - "warehouse_name": "hidden", - "folder_name": "jail", + "folder_name": "wikipedia", "filename": "wikipedia_en_all_maxi_2025-01.zim", - "producer": { - "displayName": "farm.openzim.org: wikipedia_en_all_maxi", - "displayUrl": "https://farm.openzim.org/recipes/wikipedia_en_all_maxi", - "uniqueId": "farm.openzim.org:wikipedia_en_all_maxi", - }, }, { "article_count": 500, @@ -78,14 +72,8 @@ "Illustration_48x48@1": FAVICON_GREEN, }, "zimcheck": {"status": "pass"}, - "warehouse_name": "hidden", - "folder_name": "jail", + "folder_name": "wiktionary", "filename": "wiktionary_fr_all_maxi_2025-01.zim", - "producer": { - "displayName": "wiktionary_fr", - "displayUrl": "https://farm.openzim.org/recipes/wiktionary_fr", - "uniqueId": "farm.openzim.org:wiktionary_fr", - }, }, { "article_count": 1500, @@ -103,14 +91,8 @@ "Illustration_48x48@1": FAVICON_RED, }, "zimcheck": {"status": "pass"}, - "warehouse_name": "hidden", - "folder_name": "jail", + "folder_name": "", "filename": "wiktionary_en_all_maxi_2025-01.zim", - "producer": { - "displayName": "wiktionary_en", - "displayUrl": "https://farm.openzim.org/recipes/wiktionary_en", - "uniqueId": "farm.openzim.org:wiktionary_en", - }, }, ] @@ -124,13 +106,12 @@ def create_notifications(): try: for content in NOTIFICATIONS_CONFIG: filename = content.get("filename", "unknown") - warehouse_name = content["warehouse_name"] folder_name = content["folder_name"] print(f"\nProcessing notification: {filename}") # Check if file already exists in warehouse - file_path = WAREHOUSE_BASE_PATH / warehouse_name / folder_name / filename + file_path = WAREHOUSE_BASE_PATH / "hidden/jail" / folder_name / filename if file_path.exists(): print(f" - File already exists at {file_path} (skipping)") continue @@ -154,9 +135,7 @@ def create_notifications(): file_path.write_text(str(notification_id)) print(f" + Created file: {file_path}") - created_notifications.append( - (filename, warehouse_name, folder_name, notification_id) - ) + created_notifications.append((filename, folder_name, notification_id)) # Commit all changes session.commit() @@ -169,12 +148,11 @@ def create_notifications(): print("=" * 70) for ( filename, - warehouse_name, folder_name, notification_id, ) in created_notifications: print(f" {filename}") - print(f" warehouse: {warehouse_name}/{folder_name}") + print(f" folder: {folder_name}") print(f" id: {notification_id}") print("=" * 70) print("\nThe mill will now process these notifications into books.") diff --git a/dev/scripts/setup_titles.py b/dev/scripts/setup_titles.py index b94566f..5b3ebaa 100644 --- a/dev/scripts/setup_titles.py +++ b/dev/scripts/setup_titles.py @@ -2,52 +2,32 @@ """ Development titles setup script. -Creates Title records and associates them with warehouse paths. +Creates Title records and associates them with collection paths. """ from cms_backend.db import Session -from cms_backend.db.models import Title, TitleWarehousePath, Warehouse, WarehousePath +from cms_backend.db.models import Title, CollectionTitle -# Configuration: Define titles and their warehouse path associations -# Format for paths: (warehouse_name, folder_name) +# Configuration: Define titles and their collection path associations TITLES_CONFIG = [ { "name": "wikipedia_en_all", - "producer_unique_id": "farm.openzim.org:wikipedia_en_all_maxi", - "dev_paths": [("hidden", "dev")], - "prod_paths": [("prod", "wikipedia")], + "maturity": "dev", + "collections": [ + {"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "path": "wikipedia"} + ], }, { "name": "wiktionary_fr_all", - "producer_unique_id": "farm.openzim.org:wiktionary_fr", - "producer_display_name": "wiktionary_fr", - "producer_display_url": "https://farm.openzim.org/recipes/wiktionary_fr", - "in_prod": True, - "dev_paths": [("hidden", "dev")], - "prod_paths": [("prod", "other"), ("client1", "all")], + "maturity": "robust", + "collections": [ + {"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "path": "other"}, + {"id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "path": "all"}, + ], }, ] -def get_warehouse_path(session, warehouse_name: str, folder_name: str) -> WarehousePath: - """Look up a WarehousePath by warehouse name and folder name.""" - result = ( - session.query(WarehousePath) - .join(Warehouse) - .filter( - Warehouse.name == warehouse_name, - WarehousePath.folder_name == folder_name, - ) - .first() - ) - if not result: - raise ValueError( - f"WarehousePath not found: {warehouse_name}/{folder_name}. " - "Run setup_warehouses.py first." - ) - return result - - def create_titles(): """Create title records and associate them with warehouse paths.""" session = Session() @@ -67,41 +47,24 @@ def create_titles(): # Create title record title = Title( name=title_name, - producer_unique_id=title_config["producer_unique_id"], ) # Set optional fields - if "in_prod" in title_config: - title.in_prod = title_config["in_prod"] - if "producer_display_name" in title_config: - title.producer_display_name = title_config["producer_display_name"] - if "producer_display_url" in title_config: - title.producer_display_url = title_config["producer_display_url"] + if "maturity" in title_config: + title.maturity = title_config["maturity"] session.add(title) session.flush() # Get the generated UUID print(f" + Created title '{title_name}' with ID {title.id}") - # Associate dev warehouse paths - for warehouse_name, folder_name in title_config.get("dev_paths", []): - warehouse_path = get_warehouse_path( - session, warehouse_name, folder_name - ) - twp = TitleWarehousePath(path_type="dev") - twp.title = title - twp.warehouse_path = warehouse_path - session.add(twp) - print(f" + Added dev path: {warehouse_name}/{folder_name}") - - # Associate prod warehouse paths - for warehouse_name, folder_name in title_config.get("prod_paths", []): - warehouse_path = get_warehouse_path( - session, warehouse_name, folder_name + # Associate collections + for collection in title_config.get("collections", []): + ct = CollectionTitle(path=collection["path"]) + ct.title = title + ct.collection_id = collection["id"] + session.add(ct) + print( + f" + Added collection: {collection['id']}/{collection['path']}" ) - twp = TitleWarehousePath(path_type="prod") - twp.title = title - twp.warehouse_path = warehouse_path - session.add(twp) - print(f" + Added prod path: {warehouse_name}/{folder_name}") # Commit all changes session.commit() @@ -112,17 +75,11 @@ def create_titles(): print("Titles configured:") print("=" * 70) for title_config in TITLES_CONFIG: - dev_paths = ", ".join( - f"{w}/{f}" for w, f in title_config.get("dev_paths", []) - ) - prod_paths = ", ".join( - f"{w}/{f}" for w, f in title_config.get("prod_paths", []) - ) print(f" {title_config['name']}") - if (in_prod := title_config.get("in_prod")) is not None: - print(f" in_prod: {in_prod}") - print(f" dev: {dev_paths or '(none)'}") - print(f" prod: {prod_paths or '(none)'}") + if (maturity := title_config.get("maturity")) is not None: + print(f" maturity: {maturity}") + for collection in title_config.get("collections", []): + print(f" - {collection['id']}/{collection['path']}") print("=" * 70) except Exception as e: diff --git a/dev/scripts/setup_warehouses.py b/dev/scripts/setup_warehouses.py index cd6e394..33e15b1 100644 --- a/dev/scripts/setup_warehouses.py +++ b/dev/scripts/setup_warehouses.py @@ -13,7 +13,7 @@ sys.path.insert(0, "/usr/local/lib/python3.13/site-packages") from cms_backend.db import Session -from cms_backend.db.models import Warehouse, WarehousePath +from cms_backend.db.models import Warehouse # Configuration: Define warehouses and their paths @@ -21,18 +21,15 @@ WAREHOUSES_CONFIG = { "hidden": { "id": UUID("11111111-1111-1111-1111-111111111111"), - "paths": ["jail", "dev"], - "configuration": {}, + "paths": ["jail", "staging"], }, "prod": { "id": UUID("22222222-2222-2222-2222-222222222222"), "paths": ["other", "wikipedia"], - "configuration": {}, }, "client1": { "id": UUID("33333333-3333-3333-3333-333333333333"), "paths": ["all"], - "configuration": {}, }, } @@ -61,7 +58,6 @@ def create_warehouse_structure(): # Create warehouse DB record with predefined ID warehouse = Warehouse( name=warehouse_name, - configuration=config.get("configuration", {}), ) warehouse.id = warehouse_id session.add(warehouse) @@ -72,31 +68,6 @@ def create_warehouse_structure(): # Create paths for this warehouse for path_name in config["paths"]: - # Check if path already exists - existing_path = ( - session.query(WarehousePath) - .filter( - WarehousePath.warehouse_id == warehouse.id, - WarehousePath.folder_name == path_name, - ) - .first() - ) - - if existing_path: - print(f" ⊘ Path '{path_name}' already exists (skipping)") - warehouse_path = existing_path - else: - # Create warehouse path DB record - warehouse_path = WarehousePath( - folder_name=path_name, - ) - warehouse_path.warehouse = warehouse - session.add(warehouse_path) - session.flush() - print( - f" ✓ Created path '{path_name}' with ID {warehouse_path.id}" - ) - # Create physical directory physical_path = WAREHOUSE_BASE_PATH / warehouse_name / path_name if physical_path.exists(): diff --git a/dev/scripts/wipe.py b/dev/scripts/wipe.py index 4b7d699..ec77327 100644 --- a/dev/scripts/wipe.py +++ b/dev/scripts/wipe.py @@ -13,12 +13,10 @@ from cms_backend.db.models import ( Book, BookLocation, - Library, - LibraryWarehousePath, + Collection, + CollectionTitle, Title, - TitleWarehousePath, Warehouse, - WarehousePath, ZimfarmNotification, ) @@ -46,25 +44,17 @@ def wipe_database(session): count = session.query(Book).delete() print(f" - Deleted {count} Book records") - # 4. TitleWarehousePath (depends on Title and WarehousePath) - count = session.query(TitleWarehousePath).delete() - print(f" - Deleted {count} TitleWarehousePath records") + # 4. CollectionTitle (depends on Title and Collection) + count = session.query(CollectionTitle).delete() + print(f" - Deleted {count} CollectionTitle records") # 5. Title count = session.query(Title).delete() print(f" - Deleted {count} Title records") - # 6. LibraryWarehousePath (depends on Library and WarehousePath) - count = session.query(LibraryWarehousePath).delete() - print(f" - Deleted {count} LibraryWarehousePath records") - - # 7. Library - count = session.query(Library).delete() - print(f" - Deleted {count} Library records") - - # 8. WarehousePath (depends on Warehouse) - count = session.query(WarehousePath).delete() - print(f" - Deleted {count} WarehousePath records") + # 7. Collection (depends on Warehouse) + count = session.query(Collection).delete() + print(f" - Deleted {count} Collection records") # 9. Warehouse count = session.query(Warehouse).delete() diff --git a/frontend/src/components/CreateTitleDialog.vue b/frontend/src/components/CreateTitleDialog.vue index e772c80..a0332a0 100644 --- a/frontend/src/components/CreateTitleDialog.vue +++ b/frontend/src/components/CreateTitleDialog.vue @@ -17,57 +17,12 @@ /> - - - - - - @@ -95,9 +50,8 @@ diff --git a/frontend/src/components/TitlesTable.vue b/frontend/src/components/TitlesTable.vue index 8fcbcda..b390925 100644 --- a/frontend/src/components/TitlesTable.vue +++ b/frontend/src/components/TitlesTable.vue @@ -60,17 +60,6 @@ - -