diff --git a/.claude/skills/prepare-presentation/SKILL.md b/.claude/skills/prepare-presentation/SKILL.md new file mode 100644 index 0000000..f02e2b9 --- /dev/null +++ b/.claude/skills/prepare-presentation/SKILL.md @@ -0,0 +1,99 @@ +--- +name: prepare-presentation +description: Convert an existing presentation (Google Slides or PPTX) into a prepared template by copying it, naming elements by layout position, and replacing all content with placeholders. +--- + +# Prepare Presentation Skill + +Convert an existing presentation (Google Slides or PPTX) into a prepared template by copying it, +naming elements by layout position, and replacing all content with placeholders. + +## Workflow + +### Step 1: Copy the source presentation + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py copy --source [--title ] +``` + +Creates a working copy of the presentation so the original is not modified. +Prints the new presentation's ID or file path. + +### Step 2: Inspect slides and decide on names + +For each slide, run: + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py inspect --source <copied_id_or_path> [--slide <index>] +``` + +This prints: +- The path to a saved thumbnail image (read it to see the visual layout) +- The slide's `markdown()` output with all element metadata in HTML comments + +Examine each slide's thumbnail and markdown to determine position-based names for elements and slides. + +### Step 3: Apply names + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py name --source <copied_id_or_path> --mapping '<json>' +``` + +The `--mapping` JSON has this structure: + +```json +{ + "slides": [ + { + "index": 0, + "slide_name": "title_slide", + "elements": {"Title": "title", "Text_1": "subtitle"} + } + ] +} +``` + +The element keys are the **display names from the inspect output** (alt_text title or objectId), and the values are the new names to assign. Display names are unique per slide. + +### Step 4: Replace content with placeholders + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py templatize --source <copied_id_or_path> +``` + +This replaces: +- **Named images** (min dimension >= 4 cm): replaced with a gray placeholder image +- **Named text shapes**: replaced with "Example Text" (or "# Example Header Style\n\nExample body style" for multi-style elements) +- **Named tables**: all cells replaced with "Text" + +## Naming Guidelines + +Names must describe **layout geometry and relative position only** — never content, topic, or semantic meaning. Imagine the slide with all text/images blanked out; names should still make sense from position alone. + +### Element names (purely positional) +- Use position on the slide: "top_left", "top_right", "center", "bottom_left", etc. +- For multiple text elements in the same region, number them by reading order: "left_text_1", "left_text_2" +- All images/charts regardless of type -> "chart" (or "chart_1", "chart_2" if multiple) +- Tables -> "table" (or "table_1", "table_2" if multiple) +- The topmost/largest text element is typically "title" +- A text element directly below the title -> "subtitle" +- Small decorative elements (< 1cm) can be skipped + +### Slide names +- Name by layout structure, not content: "title_fullwidth", "two_column", "chart_right", "grid_2x3", etc. +- First slide -> "title_slide" +- Last slide (if simple/closing) -> "closing" +- For repeated layouts, number them: "chart_right_1", "chart_right_2" + +### What NOT to do +- Do NOT use content-derived names like "customer_name", "insights", "performance", "weekly" +- Do NOT name elements after what they currently say (e.g., "sidebar" because it says "Key Achievements") +- DO describe where the element sits: "left_text", "right_chart", "top_bar", "bottom_row_1" + +## Notes + +- Minimum image size for replacement: 4 cm in the smallest dimension +- Tables get "Text" in every cell, preserving the table shape (rows x cols) +- `write_text` handles markdown-to-style mapping automatically +- For Google Slides, credentials must be initialized before running +- For PPTX files, just pass the file path as `--source` diff --git a/.claude/skills/prepare-presentation/placeholder.png b/.claude/skills/prepare-presentation/placeholder.png new file mode 100644 index 0000000..61f5ebd Binary files /dev/null and b/.claude/skills/prepare-presentation/placeholder.png differ diff --git a/.claude/skills/prepare-presentation/prepare.py b/.claude/skills/prepare-presentation/prepare.py new file mode 100644 index 0000000..ffd1f36 --- /dev/null +++ b/.claude/skills/prepare-presentation/prepare.py @@ -0,0 +1,273 @@ +"""Prepare-presentation skill: copy, inspect, name, and templatize a presentation. + +Usage: + poetry run python .claude/skills/prepare-presentation/prepare.py copy --source <url_or_path> [--title <title>] + poetry run python .claude/skills/prepare-presentation/prepare.py inspect --source <url_or_path> [--slide <index>] + poetry run python .claude/skills/prepare-presentation/prepare.py name --source <url_or_path> --mapping <json> + poetry run python .claude/skills/prepare-presentation/prepare.py templatize --source <url_or_path> +""" + +import argparse +import json +import logging +import os +import tempfile + +from gslides_api.adapters.abstract_slides import ( + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractTableElement, + AbstractThumbnailSize, +) +from gslides_api.agnostic.element import MarkdownTableElement, TableData +from gslides_api.agnostic.units import OutputUnit +from gslides_api.common.presentation_id import normalize_presentation_id + +logger = logging.getLogger(__name__) + +# Minimum image dimension (in cm) to qualify for replacement +MIN_IMAGE_SIZE_CM = 4.0 + +# Path to placeholder image (relative to this script) +PLACEHOLDER_PATH = os.path.join(os.path.dirname(__file__), "placeholder.png") + + +def _resolve_source(source: str): + """Determine adapter type from source string and return (api_client, presentation_id). + + - If source looks like a Google Slides URL or ID -> GSlidesAPIClient + - If source is a file path ending in .pptx -> PowerPointAPIClient + """ + source = source.strip() + + # PPTX file path + if source.lower().endswith(".pptx") or os.path.isfile(source): + from gslides_api.adapters.pptx_adapter import PowerPointAPIClient + + api_client = PowerPointAPIClient() + return api_client, source + + # Google Slides URL or ID + from gslides_api.adapters.gslides_adapter import GSlidesAPIClient + + presentation_id = normalize_presentation_id(source) + if presentation_id: + api_client = GSlidesAPIClient.get_default_api_client() + credential_location = os.getenv("GSLIDES_CREDENTIALS_PATH") + if credential_location: + api_client.initialize_credentials(credential_location) + return api_client, presentation_id + + # Fallback: try as GSlides ID anyway + api_client = GSlidesAPIClient.get_default_api_client() + credential_location = os.getenv("GSLIDES_CREDENTIALS_PATH") + if credential_location: + api_client.initialize_credentials(credential_location) + return api_client, source + + +def _load_presentation(source: str): + """Load a presentation from source. Returns (api_client, presentation).""" + api_client, presentation_id = _resolve_source(source) + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + return api_client, presentation + + +def cmd_copy(args): + """Copy a presentation and print the new ID/path.""" + api_client, presentation = _load_presentation(args.source) + title = args.title or f"Template - {presentation.title or 'Untitled'}" + + copied = presentation.copy_via_drive(api_client=api_client, copy_title=title) + presentation_id = copied.presentationId or "" + + # For PPTX, the presentationId is the file path + print(f"Copied presentation: {presentation_id}") + if hasattr(copied, "url"): + try: + print(f"URL: {copied.url}") + except Exception: + pass + + +def cmd_inspect(args): + """Inspect slides: print thumbnails and markdown for each slide.""" + api_client, presentation = _load_presentation(args.source) + + if args.slide is not None: + slides_to_inspect = [(args.slide, presentation.slides[args.slide])] + else: + slides_to_inspect = list(enumerate(presentation.slides)) + + for i, slide in slides_to_inspect: + print(f"\n{'='*60}") + print(f"SLIDE {i} (objectId: {slide.objectId})") + print(f"{'='*60}") + + # Get and save thumbnail + try: + thumb = slide.thumbnail( + api_client=api_client, + size=AbstractThumbnailSize.MEDIUM, + include_data=True, + ) + if thumb.content: + ext = ".png" if "png" in thumb.mime_type else ".jpg" + tmp = tempfile.NamedTemporaryFile( + delete=False, suffix=ext, prefix=f"slide_{i}_" + ) + tmp.write(thumb.content) + tmp.close() + print(f"Thumbnail: {tmp.name}") + elif thumb.contentUrl and thumb.contentUrl.startswith("file://"): + print(f"Thumbnail: {thumb.contentUrl.replace('file://', '')}") + else: + print(f"Thumbnail URL: {thumb.contentUrl}") + except Exception as e: + print(f"Thumbnail error: {e}") + + # Print markdown representation + print(f"\nMarkdown:\n") + print(slide.markdown()) + print() + + +def cmd_name(args): + """Apply naming to slides and elements from a JSON mapping.""" + api_client, presentation = _load_presentation(args.source) + mapping = json.loads(args.mapping) + + for slide_mapping in mapping["slides"]: + idx = slide_mapping["index"] + slide = presentation.slides[idx] + + # Name the slide via speaker notes + slide_name = slide_mapping.get("slide_name") + if slide_name and slide.speaker_notes: + slide.speaker_notes.write_text(api_client=api_client, content=slide_name) + print(f"Slide {idx}: named '{slide_name}'") + + # Name elements via alt text (keys are display names from inspect output) + elements_mapping = slide_mapping.get("elements", {}) + for old_name, new_name in elements_mapping.items(): + # Find element by display name (alt_text.title or objectId) + found = False + for element in slide.page_elements_flat: + display_name = element.alt_text.title or element.objectId + if display_name == old_name: + element.set_alt_text(api_client=api_client, title=new_name) + print(f" Element '{old_name}' -> '{new_name}'") + found = True + break + + if not found: + print(f" WARNING: Element '{old_name}' not found on slide {idx}") + + presentation.save(api_client=api_client) + print("\nNames applied and saved.") + + +def cmd_templatize(args): + """Replace all named content with placeholders.""" + api_client, presentation = _load_presentation(args.source) + + for i, slide in enumerate(presentation.slides): + print(f"Processing slide {i}...") + + for element in slide.page_elements_flat: + name = element.alt_text.title + if not name or not name.strip(): + continue # Skip unnamed elements + + # Images: replace with placeholder if large enough + if isinstance(element, AbstractImageElement): + min_dim = min(element.absolute_size(units=OutputUnit.CM)) + if min_dim >= MIN_IMAGE_SIZE_CM: + element.replace_image(api_client=api_client, file=PLACEHOLDER_PATH) + print(f" Replaced image '{name}' with placeholder") + else: + print(f" Skipped small image '{name}' ({min_dim:.1f} cm)") + + # Text shapes: replace with example text + elif isinstance(element, AbstractShapeElement) and element.has_text: + try: + styles = element.styles(skip_whitespace=True) + except Exception: + styles = None + + if styles and len(styles) >= 2: + # Multi-style: provide header + body example + element.write_text( + api_client=api_client, + content="# Example Header Style\n\nExample body style", + ) + else: + element.write_text(api_client=api_client, content="Example Text") + print(f" Replaced text '{name}' with placeholder") + + # Tables: replace all cells with "Text" + elif isinstance(element, AbstractTableElement): + md_elem = element.to_markdown_element(name=name) + if md_elem and md_elem.content: + rows, cols = md_elem.shape + # Build replacement table: all cells = "Text" + if rows > 0 and cols > 0: + new_headers = ["Text"] * cols + new_rows = [["Text"] * cols for _ in range(max(0, rows - 1))] + new_table_data = TableData(headers=new_headers, rows=new_rows) + new_md_elem = MarkdownTableElement( + name=name, content=new_table_data + ) + element.update_content( + api_client=api_client, + markdown_content=new_md_elem, + check_shape=False, + ) + print(f" Replaced table '{name}' ({rows}x{cols}) with 'Text' placeholders") + + presentation.save(api_client=api_client) + print("\nTemplatization complete.") + + +def main(): + parser = argparse.ArgumentParser( + description="Prepare a presentation as a template" + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + # copy + copy_parser = subparsers.add_parser("copy", help="Copy a presentation") + copy_parser.add_argument("--source", required=True, help="Source presentation URL, ID, or file path") + copy_parser.add_argument("--title", help="Title for the copy") + + # inspect + inspect_parser = subparsers.add_parser("inspect", help="Inspect slides") + inspect_parser.add_argument("--source", required=True, help="Presentation URL, ID, or file path") + inspect_parser.add_argument("--slide", type=int, help="Specific slide index to inspect") + + # name + name_parser = subparsers.add_parser("name", help="Apply names to slides and elements") + name_parser.add_argument("--source", required=True, help="Presentation URL, ID, or file path") + name_parser.add_argument("--mapping", required=True, help="JSON mapping of names") + + # templatize + templatize_parser = subparsers.add_parser("templatize", help="Replace content with placeholders") + templatize_parser.add_argument("--source", required=True, help="Presentation URL, ID, or file path") + + args = parser.parse_args() + + commands = { + "copy": cmd_copy, + "inspect": cmd_inspect, + "name": cmd_name, + "templatize": cmd_templatize, + } + commands[args.command](args) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main() diff --git a/gslides_api/adapters/__init__.py b/gslides_api/adapters/__init__.py new file mode 100644 index 0000000..a8a8fcf --- /dev/null +++ b/gslides_api/adapters/__init__.py @@ -0,0 +1 @@ +"""Slide adapters for various presentation formats (Google Slides, PPTX, HTML).""" diff --git a/gslides_api/adapters/abstract_slides.py b/gslides_api/adapters/abstract_slides.py new file mode 100644 index 0000000..15a14c7 --- /dev/null +++ b/gslides_api/adapters/abstract_slides.py @@ -0,0 +1,679 @@ +import logging +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Literal, Optional, Union + +if TYPE_CHECKING: + pass # Forward reference types handled via string annotations + +from pydantic import BaseModel, Field, PrivateAttr, model_validator + +from gslides_api.agnostic.domain import ImageData +from gslides_api.agnostic.element import MarkdownTableElement +from gslides_api.agnostic.units import OutputUnit + +from gslides_api.agnostic.element_size import ElementSizeMeta + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + pass + + +def _extract_font_size_pt(styles: list[Any] | None) -> float: + """Extract the dominant font size (in points) from a shape element's text styles. + + Handles both GSlides (RichStyle with font_size_pt) and PPTX + (StyleInfo dict with font_size that has a .pt attribute). + + Returns: + Font size in points, or 12.0 as fallback. + """ + if not styles: + return 12.0 + + font_sizes = [] + for style in styles: + if isinstance(style, dict): + # PPTX StyleInfo dict + fs = style.get("font_size") + if fs is not None and hasattr(fs, "pt"): + font_sizes.append(fs.pt) + else: + # GSlides RichStyle object + if hasattr(style, "font_size_pt") and style.font_size_pt is not None: + font_sizes.append(style.font_size_pt) + + return max(font_sizes) if font_sizes else 12.0 + + +def _extract_font_size_from_table(element: "AbstractTableElement") -> float: + """Extract the dominant font size (in points) from a table element's first cell. + + Handles both GSlides (TableElement with tableRows) and PPTX + (GraphicFrame with .table accessor). + + Returns: + Font size in points, or 10.0 as fallback. + """ + try: + if hasattr(element, "pptx_element") and element.pptx_element is not None: + # PPTX path + table = element.pptx_element.table + cell = table.cell(0, 0) + for para in cell.text_frame.paragraphs: + for run in para.runs: + if run.font.size is not None and hasattr(run.font.size, "pt"): + return run.font.size.pt + elif hasattr(element, "gslides_element") and element.gslides_element is not None: + # GSlides path — access table rows from the underlying gslides-api element + gslides_table = element.gslides_element + if hasattr(gslides_table, "table") and gslides_table.table is not None: + table_data = gslides_table.table + if table_data.tableRows: + first_row = table_data.tableRows[0] + if first_row.tableCells: + cell = first_row.tableCells[0] + # Cell text content has styles + if hasattr(cell, "text") and hasattr(cell.text, "textElements"): + for te in cell.text.textElements: + if hasattr(te, "textRun") and te.textRun is not None: + ts = te.textRun.style + if ts and hasattr(ts, "fontSize") and ts.fontSize: + return ts.fontSize.magnitude + except Exception: + pass + return 10.0 + + +# Supporting data classes +class AbstractThumbnail(BaseModel): + contentUrl: str + width: int + height: int + mime_type: str + content: Optional[bytes] = None + file_size: Optional[int] = None + + +class AbstractSlideProperties(BaseModel): + isSkipped: bool = False + + +class AbstractAltText(BaseModel): + title: str | None = None + description: str | None = None + + +class AbstractSpeakerNotes(BaseModel, ABC): + @abstractmethod + def read_text(self, as_markdown: bool = True) -> str: + pass + + @abstractmethod + def write_text(self, api_client: "AbstractSlidesAPIClient", content: str): + pass + + +class AbstractCredentials(BaseModel): + token: Optional[str] = None + refresh_token: Optional[str] = None + client_id: Optional[str] = None + client_secret: Optional[str] = None + token_uri: Optional[str] = None + + +class AbstractSize(BaseModel): + width: float = 0.0 + height: float = 0.0 + + +class AbstractPreprocessedSlide(BaseModel): + gslide: "AbstractSlide" + raw_metadata: str = "" + metadata: Optional[list[dict]] = None + named_elements: dict[str, "AbstractElement"] = Field(default_factory=dict) + + +# Enums +class AbstractThumbnailSize: + MEDIUM = "MEDIUM" + + +class AbstractElementKind: + IMAGE = "IMAGE" + SHAPE = "SHAPE" + TABLE = "TABLE" + + +# Core abstract classes +class AbstractSlidesAPIClient(ABC): + auto_flush: bool = True + + # TODO: remembering to call this is a chore, should we make this into a context manager? + @abstractmethod + def flush_batch_update(self): + pass + + @abstractmethod + def copy_presentation( + self, presentation_id: str, copy_title: str, folder_id: Optional[str] = None + ) -> dict: + pass + + @abstractmethod + def create_folder( + self, name: str, ignore_existing: bool = True, parent_folder_id: Optional[str] = None + ) -> dict: + pass + + @abstractmethod + def delete_file(self, file_id: str): + pass + + def trash_file(self, file_id: str): + """Move a file to trash. Defaults to delete_file for non-GSlides adapters.""" + self.delete_file(file_id) + + @abstractmethod + def set_credentials(self, credentials: AbstractCredentials): + pass + + @abstractmethod + def get_credentials(self) -> Optional[AbstractCredentials]: + pass + + @abstractmethod + def replace_text( + self, slide_ids: list[str], match_text: str, replace_text: str, presentation_id: str + ): + pass + + @classmethod + def get_default_api_client(cls) -> "AbstractSlidesAPIClient": + """Get the default API client wrapped in concrete implementation.""" + # TODO: This is a horrible, non-generalizable hack, will need to fix later + from gslides_api.adapters.gslides_adapter import GSlidesAPIClient + + return GSlidesAPIClient.get_default_api_client() + + @abstractmethod + def get_presentation_as_pdf(self, presentation_id: str) -> bytes: + pass + + +class AbstractElement(BaseModel, ABC): + objectId: str = "" + presentation_id: str = "" + slide_id: str = "" + alt_text: AbstractAltText = Field(default_factory=AbstractAltText) + type: str = "" + + # Parent references - not serialized, populated by parent validators + _parent_slide: Optional["AbstractSlide"] = PrivateAttr(default=None) + _parent_presentation: Optional["AbstractPresentation"] = PrivateAttr(default=None) + + def __eq__(self, other: object) -> bool: + """Custom equality that excludes parent references to avoid circular comparison.""" + if not isinstance(other, AbstractElement): + return False + # Compare only the public fields, not parent references + return self.model_dump() == other.model_dump() + + def __hash__(self) -> int: + """Hash based on objectId for use in sets/dicts.""" + return hash(self.objectId) + + @abstractmethod + def absolute_size(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + pass + + # @abstractmethod + # def element_properties(self) -> dict: + # pass + + @abstractmethod + def absolute_position(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + pass + + @abstractmethod + def create_image_element_like( + self, api_client: AbstractSlidesAPIClient + ) -> "AbstractImageElement": + pass + + @abstractmethod + def set_alt_text( + self, + api_client: AbstractSlidesAPIClient, + title: str | None = None, + description: str | None = None, + ): + pass + + +class AbstractShapeElement(AbstractElement): + type: Literal[AbstractElementKind.SHAPE] = AbstractElementKind.SHAPE + + @property + @abstractmethod + def has_text(self) -> bool: + pass + + @abstractmethod + def write_text( + self, api_client: AbstractSlidesAPIClient, content: str, autoscale: bool = False + ): + pass + + @abstractmethod + def read_text(self, as_markdown: bool = True) -> str: + pass + + @abstractmethod + def styles(self, skip_whitespace: bool = True) -> list[Any] | None: + pass + + +class AbstractImageElement(AbstractElement): + type: Literal[AbstractElementKind.IMAGE] = AbstractElementKind.IMAGE + + # @abstractmethod + # def replace_image(self, url: str, api_client: Optional[AbstractSlidesAPIClient] = None): + # pass + + @abstractmethod + def replace_image( + self, + api_client: AbstractSlidesAPIClient, + file: str | None = None, + url: str | None = None, + ): + pass + + +class AbstractTableElement(AbstractElement): + type: Literal[AbstractElementKind.TABLE] = AbstractElementKind.TABLE + + @abstractmethod + def resize( + self, + api_client: AbstractSlidesAPIClient, + rows: int, + cols: int, + fix_width: bool = True, + fix_height: bool = True, + target_height_in: float | None = None, + ) -> float: + """Resize the table. + + Returns: + Font scale factor (1.0 if no scaling, < 1.0 if rows were added with fix_height) + """ + pass + + @abstractmethod + def update_content( + self, + api_client: AbstractSlidesAPIClient, + markdown_content: MarkdownTableElement, + check_shape: bool = True, + font_scale_factor: float = 1.0, + ): + pass + + @abstractmethod + def to_markdown_element(self, name: str | None = None) -> MarkdownTableElement: + pass + + @abstractmethod + def get_horizontal_border_weight(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get weight of a single horizontal border in specified units.""" + pass + + @abstractmethod + def get_row_count(self) -> int: + """Get current number of rows.""" + pass + + @abstractmethod + def get_column_count(self) -> int: + """Get current number of columns.""" + pass + + def get_total_height_including_borders(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get total table height including borders. + + Returns: + Total height: sum of row heights + all horizontal border heights. + """ + _, row_heights_total = self.absolute_size(units=units) + border_weight = self.get_horizontal_border_weight(units=units) + num_borders = self.get_row_count() + 1 + return row_heights_total + (border_weight * num_borders) + + def get_max_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Calculate max allowed height based on elements below this table. + + Returns: + Max height in specified units. + + Raises: + RuntimeError: If parent references are not set (programming error). + """ + if self._parent_slide is None or self._parent_presentation is None: + raise RuntimeError( + f"Element {self.objectId} missing parent references. " + f"_parent_slide={self._parent_slide}, _parent_presentation={self._parent_presentation}. " + "This is a programming error - parent references should be set during slide creation." + ) + + slide = self._parent_slide + presentation = self._parent_presentation + + # Get this table's position and size + table_x, table_top_y = self.absolute_position(units=units) + table_width, table_height = self.absolute_size(units=units) + table_bottom_y = table_top_y + table_height + + # Find minimum Y of elements below the table + slide_height = presentation.slide_height(units=units) + min_y_below = slide_height + + for element in slide.page_elements_flat: + if element.objectId == self.objectId: + continue + + elem_x, elem_y = element.absolute_position(units=units) + + # Element is "below" if its top is at or below table's bottom + if elem_y >= table_bottom_y: + min_y_below = min(min_y_below, elem_y) + + return min_y_below - table_top_y + + +class AbstractSlide(BaseModel, ABC): + elements: list[AbstractElement] = Field( + description="The elements of the slide", default_factory=list + ) + objectId: str = "" + slideProperties: AbstractSlideProperties = Field(default_factory=AbstractSlideProperties) + speaker_notes: Optional[AbstractSpeakerNotes] = None + + # Parent reference for this slide + _parent_presentation: Optional["AbstractPresentation"] = PrivateAttr(default=None) + + def __eq__(self, other: object) -> bool: + """Custom equality that excludes parent references to avoid circular comparison.""" + if not isinstance(other, AbstractSlide): + return False + # Compare only the public fields, not parent references + return self.model_dump() == other.model_dump() + + def __hash__(self) -> int: + """Hash based on objectId for use in sets/dicts.""" + return hash(self.objectId) + + @model_validator(mode="after") + def _populate_element_parent_refs(self) -> "AbstractSlide": + """Populate parent references on elements after creation/deserialization.""" + for element in self.elements: + element._parent_slide = self + # _parent_presentation is set by presentation validator + return self + + @property + def name(self) -> str: + """Get the slide name from the speaker notes.""" + if not self.speaker_notes: + return "" + try: + return self.speaker_notes.read_text() + except Exception: + return "" + + @property + def page_elements_flat(self) -> list[AbstractElement]: + """Flatten the elements tree into a list.""" + return self.elements + + def markdown(self) -> str: + """Return a markdown representation of this slide's layout and content. + + Metadata (element type, position, size, char capacity) is embedded in + HTML comments following the gslides-api MarkdownSlideElement convention. + Text and table content appears as regular markdown below each comment. + """ + parts = [] + for element in self.page_elements_flat: + name = element.alt_text.title + if not name: + continue + + x, y = element.absolute_position() + w, h = element.absolute_size() + + desc_str = "" + if element.alt_text.description: + desc_str = f' | desc="{element.alt_text.description}"' + + if isinstance(element, AbstractShapeElement) and element.has_text: + text = element.read_text(as_markdown=True) + try: + font_pt = _extract_font_size_pt(element.styles(skip_whitespace=True)) + except Exception: + font_pt = 12.0 + meta = ElementSizeMeta( + box_width_inches=w, box_height_inches=h, font_size_pt=font_pt, + ) + parts.append( + f"<!-- text: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" + f"{desc_str} | ~{meta.approx_char_capacity} chars -->\n{text}" + ) + elif isinstance(element, AbstractTableElement): + md_elem = element.to_markdown_element(name=name) + table_md = md_elem.content.to_markdown() if md_elem and md_elem.content else "" + _rows, cols = md_elem.shape if md_elem else (0, 0) + # Estimate per-column char capacity (equal-width approximation) + col_chars_str = "" + if cols > 0: + font_pt = _extract_font_size_from_table(element) + col_width = w / cols + chars_per_col = int(col_width / (font_pt * 0.5 / 72)) + col_chars_str = f" | ~{chars_per_col} chars/col" + parts.append( + f"<!-- table: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" + f"{desc_str}{col_chars_str} -->\n{table_md}" + ) + elif isinstance(element, AbstractImageElement): + parts.append( + f"<!-- image: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" + f"{desc_str} -->" + ) + else: + parts.append( + f"<!-- {element.type}: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" + f"{desc_str} -->" + ) + + return "\n\n".join(parts) + + @abstractmethod + def thumbnail( + self, api_client: AbstractSlidesAPIClient, size: str, include_data: bool = False + ) -> AbstractThumbnail: + pass + + def get_elements_by_alt_title(self, title: str) -> list[AbstractElement]: + return [e for e in self.page_elements_flat if e.alt_text.title == title] + + def __getitem__(self, item: str): + """Get element by alt title.""" + elements = self.get_elements_by_alt_title(item) + if not elements: + raise KeyError(f"Element with alt title {item} not found") + if len(elements) > 1: + raise KeyError(f"Multiple elements with alt title {item} found") + return elements[0] + + +class AbstractPresentation(BaseModel, ABC): + slides: list[AbstractSlide] = Field(default_factory=list) + presentationId: str | None = None + revisionId: str | None = None + title: str | None = None + + @model_validator(mode="after") + def _populate_slide_parent_refs(self) -> "AbstractPresentation": + """Populate parent references on slides/elements after creation/deserialization.""" + for slide in self.slides: + slide._parent_presentation = self + for element in slide.elements: + element._parent_presentation = self + return self + + @property + @abstractmethod + def url(self) -> str: + pass + + @abstractmethod + def slide_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Return slide height in specified units.""" + pass + + @abstractmethod + def save(self, api_client: "AbstractSlidesAPIClient") -> None: + """Save/persist all changes made to this presentation.""" + pass + + def __getitem__(self, item: str): + """Get slide by name.""" + for slide in self.slides: + if slide.name == item: + return slide + raise KeyError(f"Slide with name {item} not found") + + @classmethod + @abstractmethod + def from_id( + cls, api_client: AbstractSlidesAPIClient, presentation_id: str + ) -> "AbstractPresentation": + from gslides_api.adapters.gslides_adapter import GSlidesAPIClient, GSlidesPresentation + from gslides_api.adapters.html_adapter import HTMLAPIClient, HTMLPresentation + from gslides_api.adapters.pptx_adapter import PowerPointAPIClient, PowerPointPresentation + + if isinstance(api_client, GSlidesAPIClient): + return GSlidesPresentation.from_id(api_client, presentation_id) + elif isinstance(api_client, PowerPointAPIClient): + return PowerPointPresentation.from_id(api_client, presentation_id) + elif isinstance(api_client, HTMLAPIClient): + return HTMLPresentation.from_id(api_client, presentation_id) + else: + raise NotImplementedError("Only gslides, pptx, and html clients are supported") + + @abstractmethod + def copy_via_drive( + self, + api_client: AbstractSlidesAPIClient, + copy_title: str, + folder_id: Optional[str] = None, + ) -> "AbstractPresentation": + pass + + @abstractmethod + def sync_from_cloud(self, api_client: AbstractSlidesAPIClient): + pass + + @abstractmethod + def insert_copy( + self, + source_slide: "AbstractSlide", + api_client: AbstractSlidesAPIClient, + insertion_index: int | None = None, + ) -> "AbstractSlide": + pass + + @abstractmethod + def delete_slide(self, slide: Union["AbstractSlide", int], api_client: AbstractSlidesAPIClient): + """Delete a slide from the presentation by reference or index.""" + pass + + @abstractmethod + def delete_slides( + self, slides: list[Union["AbstractSlide", int]], api_client: AbstractSlidesAPIClient + ): + """Delete multiple slides from the presentation by reference or index.""" + pass + + @abstractmethod + def move_slide( + self, + slide: Union["AbstractSlide", int], + insertion_index: int, + api_client: AbstractSlidesAPIClient, + ): + """Move a slide to a new position within the presentation.""" + pass + + @abstractmethod + def duplicate_slide( + self, slide: Union["AbstractSlide", int], api_client: AbstractSlidesAPIClient + ) -> "AbstractSlide": + """Duplicate a slide within the presentation.""" + pass + + async def get_slide_thumbnails( + self, + api_client: "AbstractSlidesAPIClient", + slides: Optional[list["AbstractSlide"]] = None, + ) -> list["AbstractThumbnail"]: + """Get thumbnails for slides in this presentation. + + Default implementation loops through each slide's thumbnail() method. + Subclasses can override for more efficient batch implementations + (e.g., HTML using single Playwright session, PPTX using single PDF conversion). + + Args: + api_client: The API client for slide operations + slides: Optional list of slides to get thumbnails for. If None, uses all slides. + + Returns: + List of AbstractThumbnail objects with image data + """ + target_slides = slides if slides is not None else self.slides + thumbnails = [] + + for slide in target_slides: + thumb = slide.thumbnail( + api_client=api_client, + size=AbstractThumbnailSize.MEDIUM, + include_data=True, + ) + + # Ensure file_size is set if content is available + if thumb.content and thumb.file_size is None: + thumb = AbstractThumbnail( + contentUrl=thumb.contentUrl, + width=thumb.width, + height=thumb.height, + mime_type=thumb.mime_type, + content=thumb.content, + file_size=len(thumb.content), + ) + + thumbnails.append(thumb) + + return thumbnails + + +# class AbstractLayoutMatcher(ABC): +# """Abstract matcher for finding slide layouts that match given criteria.""" +# +# @abstractmethod +# def __init__(self, presentation: AbstractPresentation, matching_rule: Optional[str] = None): +# pass +# +# @abstractmethod +# def match(self, layout, matching_rule: Optional[str] = None) -> list[AbstractPreprocessedSlide]: +# pass diff --git a/gslides_api/adapters/add_names.py b/gslides_api/adapters/add_names.py new file mode 100644 index 0000000..77f0ce2 --- /dev/null +++ b/gslides_api/adapters/add_names.py @@ -0,0 +1,368 @@ +from dataclasses import dataclass +from typing import Optional +import logging + +from langchain_core.language_models import BaseLanguageModel + +from gslides_api.adapters.abstract_slides import ( + AbstractElement, + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractSlide, + AbstractSlidesAPIClient, + AbstractTableElement, + _extract_font_size_from_table, + _extract_font_size_pt, +) +from gslides_api.agnostic.element_size import ElementSizeMeta +from gslides_api.agnostic.units import OutputUnit +from motleycrew.utils.image_utils import is_this_a_chart + +logger = logging.getLogger(__name__) + + +@dataclass +class SlideElementNames: + """Names of different types of elements in a slide.""" + + image_names: list[str] + text_names: list[str] + chart_names: list[str] + table_names: list[str] + + @classmethod + def empty(cls) -> "SlideElementNames": + """Create an empty SlideElementNames instance.""" + return cls(image_names=[], text_names=[], chart_names=[], table_names=[]) + + +def name_slides( + presentation_id: str, + name_elements: bool = True, + api_client: AbstractSlidesAPIClient | None = None, + skip_empty_text_boxes: bool = False, + llm: Optional[BaseLanguageModel] = None, + check_success: bool = False, + min_image_size_cm: float = 4.0, +) -> dict[str, SlideElementNames]: + """ + Name slides in a presentation based on their speaker notes. + If name_elements is True, also name the elements in the slides.""" + + if api_client is None: + raise ValueError("API client is required") + """Name slides in a presentation based on their speaker notes., enforcing unique names""" + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + if llm is None: + logger.warning("No LLM provided, will not attempt to distinguish charts from images") + + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + slide_names = {} + for i, slide in enumerate(presentation.slides): + if slide.slideProperties.isSkipped: + logger.info( + f"Skipping slide {i+1}: {slide.objectId} as it is marked as skipped in Google Slides" + ) + continue + + speaker_notes = slide.speaker_notes.read_text().strip() + if speaker_notes: + slide_name = speaker_notes.split("\n")[0] + if slide_name in slide_names: + slide_name = f"Slide_{i+1}" + else: + slide_name = f"Slide_{i+1}" + + logger.info(f"Naming slide {i+1}: {slide.objectId} as {slide_name}") + + slide.speaker_notes.write_text(api_client=api_client, content=slide_name) + + if name_elements: + slide_names[slide_name] = name_slide_elements( + slide, + skip_empty_text_boxes=skip_empty_text_boxes, + slide_name=slide_name, + llm=llm, + api_client=api_client, + min_image_size_cm=min_image_size_cm, + ) + else: + # Just get the existing names + text_names = [ + e.alt_text.title + for e in slide.page_elements_flat + if isinstance(e, AbstractShapeElement) + ] + image_names = [ + e.alt_text.title + for e in slide.page_elements_flat + if isinstance(e, AbstractImageElement) + ] + table_names = [ + e.alt_text.title + for e in slide.page_elements_flat + if isinstance(e, AbstractTableElement) + ] + + slide_names[slide_name] = SlideElementNames( + image_names=image_names, + text_names=text_names, + chart_names=[], + table_names=table_names, + ) + + presentation.save(api_client=api_client) + if check_success: + presentation.sync_from_cloud(api_client=api_client) + for name, slide in zip(slide_names, presentation.slides): + assert name + assert slide.speaker_notes.read_text() == name + # TODO: check element names + + return slide_names + + +def name_if_empty( + element: AbstractElement, + value: str, + api_client: AbstractSlidesAPIClient, + names_so_far: list[str] | None = None, +): + if names_so_far is None: + names_so_far = [] + + if element.alt_text.title is not None: + # Google API doesn't support setting alt text to empty string, + # so we use space instead to indicate "empty" + # And whitespaces aren't valid variable names anyway + # Also names in a single slide must be unique + current_name = element.alt_text.title.strip() + if current_name and current_name not in names_so_far: + return current_name + + element.set_alt_text(api_client=api_client, title=value) + return value + + +def delete_slide_names( + presentation_id: str, api_client: AbstractSlidesAPIClient +): # | None = None): + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + for slide in presentation.slides: + slide.speaker_notes.write_text(" ", api_client=api_client) + + +def delete_alt_titles(presentation_id: str, api_client: AbstractSlidesAPIClient): # | None = None): + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + for slide in presentation.slides: + for element in slide.page_elements_flat: + if ( + isinstance(element, (AbstractShapeElement, AbstractImageElement)) + and element.alt_text.title + ): + logger.info(f"Deleting alt title {element.alt_text.title}") + # Unfortunately, Google API doesn't support setting alt text to empty string, so use space instead + element.set_alt_text(api_client=api_client, title=" ") + + presentation.save(api_client=api_client) + + +def name_elements( + elements: list[AbstractElement], + root_name: str, + api_client: AbstractSlidesAPIClient, + names_so_far: list[str] | None = None, +) -> list[str]: + if names_so_far is None: + names_so_far = [] + names = [] + if len(elements) == 1: + names.append( + name_if_empty( + element=elements[0], + value=root_name, + api_client=api_client, + names_so_far=names_so_far + names, + ) + ) + + elif len(elements) > 1: + for i, e in enumerate(elements): + names.append( + name_if_empty( + element=e, + value=f"{root_name}_{i+1}", + api_client=api_client, + names_so_far=names_so_far + names, + ) + ) + + logger.info(f"Named {len(names)} {root_name.lower()}s: {names}") + return names + + +def _is_pptx_chart_element(element: AbstractElement) -> bool: + """Check if an element is a PowerPoint chart (GraphicFrame with embedded chart data).""" + if not hasattr(element, "pptx_element") or element.pptx_element is None: + return False + return getattr(element.pptx_element, "has_chart", False) + + +def name_slide_elements( + slide: AbstractSlide, + slide_name: str = "", + skip_empty_text_boxes: bool = False, + min_image_size_cm: float = 4.0, + api_client: AbstractSlidesAPIClient | None = None, + llm: Optional[BaseLanguageModel] = None, +) -> SlideElementNames: + """ + Name the elements in a slide. + :param slide: + :param slide_name: Only used for clearer logging, for the case when the caller has changed the name + :param skip_empty_text_boxes: + :param api_client: + :param llm: + :return: + """ + if api_client is None: + raise ValueError("API client is required") + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + all_images = [e for e in slide.page_elements_flat if isinstance(e, AbstractImageElement)] + + # Also find PowerPoint chart shapes (GraphicFrames with embedded charts) + # These are separate from images in PowerPoint (unlike Google Slides where charts are images) + pptx_chart_shapes = [ + e + for e in slide.page_elements_flat + if _is_pptx_chart_element(e) and not isinstance(e, AbstractImageElement) + ] + if pptx_chart_shapes: + logger.info( + f"Found {len(pptx_chart_shapes)} PowerPoint chart shape(s) in slide {slide_name}" + ) + + # Sort first by y, then by x + all_images.sort(key=lambda x: x.absolute_position()[::-1]) + pptx_chart_shapes.sort(key=lambda x: x.absolute_position()[::-1]) + + images = [] + charts = [] + for i, image in enumerate(all_images): + if image.alt_text.title and image.alt_text.title.strip(): # already named + continue + + if min(image.absolute_size(units=OutputUnit.CM)) < min_image_size_cm: + logger.info(f"Skipping image number {i+1} in slide {slide_name} as it is too small") + continue + + if llm is not None: + image_data = image.get_image_data() + if is_this_a_chart( + image_bytes=image_data.content, mime_type=image_data.mime_type, llm=llm + ): + logger.info(f"Identified image number {i+1} in slide {slide_name} as a chart") + charts.append(image) + else: + logger.info(f"Identified image number {i+1} in slide {slide_name} as an image") + images.append(image) + else: + logger.info( + f"Assuming image number {i+1} in slide {slide_name} is a chart as no LLM provided" + ) + charts.append(image) + + # Add PowerPoint chart shapes to the charts list + # Always add them - name_if_empty will handle deduplication if they already have names + for chart_shape in pptx_chart_shapes: + charts.append(chart_shape) + logger.info(f"Adding PowerPoint chart shape to charts list in slide {slide_name}") + + image_names = name_elements(images, "Image", api_client) + chart_names = name_elements(charts, "Chart", api_client) + + table_names = name_elements( + [e for e in slide.page_elements_flat if isinstance(e, AbstractTableElement)], + "Table", + api_client, + ) + + text_names = [] + + text_boxes = [e for e in slide.page_elements_flat if isinstance(e, AbstractShapeElement)] + + if skip_empty_text_boxes: + text_boxes = [e for e in text_boxes if e.read_text().strip()] + + if not text_boxes: + return SlideElementNames( + image_names=image_names, + text_names=text_names, + chart_names=chart_names, + table_names=table_names, + ) + + # Sort first by y, then by x + text_boxes.sort(key=lambda x: x.absolute_position()[::-1]) + top_box = text_boxes[0] + text_names.append(name_if_empty(top_box, "Title", api_client, names_so_far=text_names)) + + other_boxes = text_boxes[1:] + text_names.extend(name_elements(other_boxes, "Text", api_client, names_so_far=text_names)) + + return SlideElementNames( + image_names=image_names, + text_names=text_names, + chart_names=chart_names, + table_names=table_names, + ) + + +def _extract_font_size_from_element(element: AbstractShapeElement) -> float: + """Extract the dominant font size (in points) from a shape element's text styles. + + Thin wrapper around _extract_font_size_pt from abstract_slides. + """ + try: + return _extract_font_size_pt(element.styles(skip_whitespace=True)) + except Exception: + return 12.0 + + +def _extract_font_size_from_table_element(element: AbstractTableElement) -> float: + """Extract the dominant font size (in points) from a table element's first cell. + + Delegates to _extract_font_size_from_table from abstract_slides. + """ + return _extract_font_size_from_table(element) + + +def _build_element_size_meta( + element: AbstractElement, + font_size_pt: float, +) -> ElementSizeMeta | None: + """Build ElementSizeMeta from an element's absolute size and a font size. + + Returns None if the element has zero or negative dimensions. + """ + try: + element_size = element.absolute_size(units=OutputUnit.IN) + width, height = element_size[0], element_size[1] + if width > 0 and height > 0: + return ElementSizeMeta( + box_width_inches=width, + box_height_inches=height, + font_size_pt=font_size_pt, + ) + except Exception: + pass + return None diff --git a/gslides_api/adapters/gslides_adapter.py b/gslides_api/adapters/gslides_adapter.py new file mode 100644 index 0000000..2382240 --- /dev/null +++ b/gslides_api/adapters/gslides_adapter.py @@ -0,0 +1,693 @@ +""" +Concrete implementation of abstract slides using gslides-api. +This module provides the actual implementation that maps abstract slide operations to gslides-api calls. +""" + +import io +import logging +from typing import Annotated, Any, Optional, Union + +import httpx +from pydantic import BaseModel, Discriminator, Field, Tag, TypeAdapter, model_validator + +import gslides_api +from gslides_api import GoogleAPIClient, Presentation +from gslides_api import Slide as GSlide +from gslides_api.agnostic.element import MarkdownTableElement +from gslides_api.agnostic.units import EMU_PER_CM, EMU_PER_INCH, OutputUnit, from_emu +from gslides_api.domain.domain import ThumbnailSize +from gslides_api.domain.request import SubstringMatchCriteria +from gslides_api.domain.text import TextStyle +from gslides_api.element.base import ElementKind, PageElementBase +from gslides_api.element.element import ImageElement, PageElement, TableElement +from gslides_api.element.shape import ShapeElement +from gslides_api.page.notes import Notes +from gslides_api.request.parent import GSlidesAPIRequest +from gslides_api.request.request import ReplaceAllTextRequest + +from gslides_api.common.log_time import log_time +from gslides_api.common.retry import retry +from gslides_api.adapters.abstract_slides import ( + AbstractAltText, + AbstractElement, + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractSlide, + AbstractSlideProperties, + AbstractSlidesAPIClient, + AbstractSpeakerNotes, + AbstractTableElement, + AbstractThumbnail, +) + +logger = logging.getLogger(__name__) + + +def concrete_element_discriminator(v: Any) -> str: + """Discriminator to determine which ConcreteElement subclass based on type field.""" + if hasattr(v, "type"): + # Handle ElementKind enum + element_type = v.type + if hasattr(element_type, "value"): + element_type = element_type.value + + if element_type in ["SHAPE", "shape"]: + return "shape" + elif element_type in ["IMAGE", "image"]: + return "image" + elif element_type in ["TABLE", "table"]: + return "table" + else: + # Fallback for other element types (LINE, VIDEO, WORD_ART, etc.) + return "generic" + + raise ValueError(f"Cannot determine element type from: {v}") + + +class GSlidesAPIClient(AbstractSlidesAPIClient): + def __init__(self, gslides_client: GoogleAPIClient | None = None): + if gslides_client is None: + gslides_client = gslides_api.client.api_client + self.gslides_client = gslides_client + + @property + def auto_flush(self): + return self.gslides_client.auto_flush + + @auto_flush.setter + def auto_flush(self, value: bool): + self.gslides_client.auto_flush = value + + def flush_batch_update(self): + pending_count = len(self.gslides_client.pending_batch_requests) + pending_presentation = self.gslides_client.pending_presentation_id + logger.info( + f"FLUSH_BATCH_UPDATE: {pending_count} pending requests " + f"for presentation {pending_presentation}" + ) + result = self.gslides_client.flush_batch_update() + replies = result.get("replies", []) if result else [] + logger.info( + f"FLUSH_BATCH_UPDATE: completed, {len(replies)} replies, " + f"result keys: {result.keys() if result else 'None'}" + ) + # Log any non-empty replies (errors or meaningful responses) + for i, reply in enumerate(replies): + if reply: # Non-empty reply + logger.debug(f"FLUSH_BATCH_UPDATE reply[{i}]: {reply}") + return result + + def copy_presentation( + self, presentation_id: str, copy_title: str, folder_id: Optional[str] = None + ) -> dict: + return self.gslides_client.copy_presentation( + presentation_id=presentation_id, copy_title=copy_title, folder_id=folder_id + ) + + def create_folder( + self, name: str, ignore_existing: bool = True, parent_folder_id: Optional[str] = None + ) -> dict: + return self.gslides_client.create_folder( + name, ignore_existing=ignore_existing, parent_folder_id=parent_folder_id + ) + + def delete_file(self, file_id: str): + self.gslides_client.delete_file(file_id) + + def trash_file(self, file_id: str): + self.gslides_client.trash_file(file_id) + + def upload_image_to_drive(self, image_path: str) -> str: + return self.gslides_client.upload_image_to_drive(image_path) + + def set_credentials(self, credentials): + from google.oauth2.credentials import Credentials + + # Store the abstract credentials for later retrieval + self._abstract_credentials = credentials + # Convert abstract credentials to concrete Google credentials + google_creds = Credentials( + token=credentials.token, + refresh_token=credentials.refresh_token, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + token_uri=credentials.token_uri, + ) + self.gslides_client.set_credentials(google_creds) + + def get_credentials(self): + return getattr(self, "_abstract_credentials", None) + + def initialize_credentials(self, credential_location: str) -> None: + gslides_api.initialize_credentials(credential_location) + + def replace_text( + self, slide_ids: list[str], match_text: str, replace_text: str, presentation_id: str + ): + requests = [ + ReplaceAllTextRequest( + pageObjectIds=slide_ids, + containsText=SubstringMatchCriteria(text=match_text), + replaceText=replace_text, + ) + ] + self.gslides_client.batch_update(requests, presentation_id) + + # Factory functions + @classmethod + def get_default_api_client(cls) -> "GSlidesAPIClient": + """Get the default API client wrapped in concrete implementation.""" + return cls(gslides_api.client.api_client) + + @log_time + async def get_presentation_as_pdf(self, presentation_id: str) -> io.BytesIO: + api_client = self.gslides_client + request = api_client.drive_service.files().export_media( + fileId=presentation_id, mimeType="application/pdf" + ) + + async with httpx.AsyncClient() as client: + response = await retry( + client.get, + args=(request.uri,), + kwargs=dict(headers={"Authorization": f"Bearer {api_client.crdtls.token}"}), + max_attempts=3, + initial_delay=1.0, + max_delay=30.0, + ) + response.raise_for_status() + return io.BytesIO(response.content) + + +class GSlidesSpeakerNotes(AbstractSpeakerNotes): + def __init__(self, gslides_speaker_notes: Notes): + super().__init__() + self._gslides_speaker_notes = gslides_speaker_notes + + def read_text(self, as_markdown: bool = True) -> str: + return self._gslides_speaker_notes.read_text(as_markdown=as_markdown) + + def write_text(self, api_client: GSlidesAPIClient, content: str): + self._gslides_speaker_notes.write_text(content, api_client=api_client.gslides_client) + + +class GSlidesElementParent(AbstractElement): + """Generic concrete element for unsupported element types (LINE, VIDEO, etc.).""" + + gslides_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + # Accept any PageElement + gslides_element = data + + return { + "objectId": gslides_element.objectId, + "presentation_id": gslides_element.presentation_id, + "slide_id": gslides_element.slide_id, + "alt_text": AbstractAltText( + title=gslides_element.alt_text.title, + description=gslides_element.alt_text.description, + ), + "type": ( + str(gslides_element.type.value) + if hasattr(gslides_element.type, "value") + else str(gslides_element.type) + ), + "gslides_element": gslides_element, + } + + def absolute_size(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + return self.gslides_element.absolute_size(units=units) + + # def element_properties(self) -> dict: + # return self.gslides_element.element_properties() + + def absolute_position(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + return self.gslides_element.absolute_position(units=units) + + def to_markdown_element(self, name: str | None = None) -> Any: + if name is None: + # Different element types have different default names + return self.gslides_element.to_markdown_element() + else: + return self.gslides_element.to_markdown_element(name=name) + + def create_image_element_like(self, api_client: GSlidesAPIClient) -> "GSlidesImageElement": + gslides_element = self.gslides_element.create_image_element_like( + api_client=api_client.gslides_client + ) + return GSlidesImageElement(gslides_element=gslides_element) + + def set_alt_text( + self, + api_client: GSlidesAPIClient, + title: str | None = None, + description: str | None = None, + ): + self.gslides_element.set_alt_text( + title=title, description=description, api_client=api_client.gslides_client + ) + + +class GSlidesShapeElement(AbstractShapeElement, GSlidesElementParent): + gslides_element: ShapeElement = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + if isinstance(data, ShapeElement): + gslides_shape = data + elif hasattr(data, "shape"): # It's a PageElement with shape + gslides_shape = data + else: + raise ValueError(f"Expected ShapeElement or PageElement with shape, got {type(data)}") + + return { + "objectId": gslides_shape.objectId, + "presentation_id": gslides_shape.presentation_id, + "slide_id": gslides_shape.slide_id, + "alt_text": AbstractAltText( + title=gslides_shape.alt_text.title, + description=gslides_shape.alt_text.description, + ), + "gslides_element": gslides_shape, + } + + @property + def has_text(self) -> bool: + return self.gslides_element.has_text + + def write_text(self, api_client: GSlidesAPIClient, content: str, autoscale: bool = False): + # Extract styles BEFORE writing to preserve original element styling. + # skip_whitespace=True avoids picking up invisible spacer styles (e.g. white theme colors). + # If no non-whitespace styles exist, TextContent.styles() falls back to including whitespace. + styles = self.gslides_element.styles(skip_whitespace=True) + logger.debug( + f"GSlidesShapeElement.write_text: objectId={self.objectId}, " + f"content={repr(content[:100] if content else None)}, autoscale={autoscale}, " + f"styles_count={len(styles) if styles else 0}" + ) + result = self.gslides_element.write_text( + content, + autoscale=autoscale, + styles=styles, + api_client=api_client.gslides_client, + ) + logger.debug(f"GSlidesShapeElement.write_text: result={result}") + + def read_text(self, as_markdown: bool = True) -> str: + return self.gslides_element.read_text(as_markdown=as_markdown) + + def styles(self, skip_whitespace: bool = True) -> list[TextStyle] | None: + return self.gslides_element.styles(skip_whitespace=skip_whitespace) + + +class GSlidesImageElement(AbstractImageElement, GSlidesElementParent): + gslides_element: ImageElement = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + if isinstance(data, dict): + if "gslides_element" in data: + data = data["gslides_element"] + else: + raise ValueError(f"Need to supply gslides_element in dict, got {data}") + + if isinstance(data, ImageElement): + gslides_image = data + elif hasattr(data, "image"): # It's a PageElement with image + gslides_image = data + else: + raise ValueError(f"Expected ImageElement or PageElement with image, got {type(data)}") + + return { + "objectId": gslides_image.objectId, + "presentation_id": gslides_image.presentation_id, + "slide_id": gslides_image.slide_id, + "alt_text": AbstractAltText( + title=gslides_image.alt_text.title, + description=gslides_image.alt_text.description, + ), + "gslides_element": gslides_image, + } + + def replace_image( + self, + api_client: GSlidesAPIClient, + file: str | None = None, + url: str | None = None, + ): + # Clear cropProperties to avoid Google Slides API error: + # "CropProperties offsets cannot be updated individually" + # This happens when recreating elements that have partial crop settings. + if ( + self.gslides_element.image + and self.gslides_element.image.imageProperties + and hasattr(self.gslides_element.image.imageProperties, "cropProperties") + ): + self.gslides_element.image.imageProperties.cropProperties = None + + new_element = self.gslides_element.replace_image( + file=file, + url=url, + api_client=api_client.gslides_client, + enforce_size="auto", + recreate_element=True, + ) + if new_element is not None: + self.gslides_element = new_element + + def get_image_data(self): + # Get image data from the gslides image and return it directly + return self.gslides_element.get_image_data() + + +class GSlidesTableElement(AbstractTableElement, GSlidesElementParent): + gslides_element: TableElement = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + if isinstance(data, TableElement): + gslides_table = data + elif hasattr(data, "table"): # It's a PageElement with table + gslides_table = data + else: + raise ValueError(f"Expected TableElement or PageElement with table, got {type(data)}") + + return { + "objectId": gslides_table.objectId, + "presentation_id": gslides_table.presentation_id, + "slide_id": gslides_table.slide_id, + "alt_text": AbstractAltText( + title=gslides_table.alt_text.title, + description=gslides_table.alt_text.description, + ), + "gslides_element": gslides_table, + } + + def resize( + self, + api_client: GSlidesAPIClient, + rows: int, + cols: int, + fix_width: bool = True, + fix_height: bool = True, + target_height_in: float | None = None, + ) -> float: + """Resize the table. + + Args: + target_height_in: If provided, constrain total table height (rows + borders) + to this value in inches. Scales both row heights and border + weights proportionally. + + Returns: + Font scale factor (1.0 if no scaling, < 1.0 if rows were added with fix_height) + """ + target_height_emu = None + if target_height_in is not None: + target_height_emu = target_height_in * EMU_PER_INCH + + requests, font_scale_factor = self.gslides_element.resize_requests( + rows, + cols, + fix_width=fix_width, + fix_height=fix_height, + target_height_emu=target_height_emu, + ) + api_client.gslides_client.batch_update(requests, self.presentation_id) + return font_scale_factor + + def get_horizontal_border_weight(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get weight of horizontal borders in specified units.""" + return self.gslides_element.get_horizontal_border_weight(units=units) + + def get_row_count(self) -> int: + """Get current number of rows.""" + return self.gslides_element.table.rows + + def get_column_count(self) -> int: + """Get current number of columns.""" + return self.gslides_element.table.columns + + def update_content( + self, + api_client: GSlidesAPIClient, + markdown_content: MarkdownTableElement, + check_shape: bool = True, + font_scale_factor: float = 1.0, + ): + requests = self.gslides_element.content_update_requests( + markdown_content, check_shape=check_shape, font_scale_factor=font_scale_factor + ) + api_client.gslides_client.batch_update(requests, self.presentation_id) + + def to_markdown_element(self, name: str | None = None) -> Any: + return self.gslides_element.to_markdown_element(name=name) + + +# Discriminated union type for concrete elements +GSlidesElement = Annotated[ + Union[ + Annotated[GSlidesShapeElement, Tag("shape")], + Annotated[GSlidesImageElement, Tag("image")], + Annotated[GSlidesTableElement, Tag("table")], + Annotated[GSlidesElementParent, Tag("generic")], + ], + Discriminator(concrete_element_discriminator), +] + +# TypeAdapter for validating the discriminated union +_concrete_element_adapter = TypeAdapter(GSlidesElement) + + +def validate_concrete_element(page_element: PageElement) -> GSlidesElement: + """Create the appropriate concrete element from a PageElement.""" + return _concrete_element_adapter.validate_python(page_element) + + +class GSlidesSlide(AbstractSlide): + def __init__(self, gslides_slide: GSlide): + # Convert gslides elements to abstract elements, skipping group containers. + # unroll_group_elements includes both the group wrapper and its children; + # children are real elements while the group container has no renderable + # content and may lack size/transform, causing downstream crashes. + elements = [] + for element in gslides_slide.page_elements_flat: + if element.type == ElementKind.GROUP: + continue + concrete_element = validate_concrete_element(element) + elements.append(concrete_element) + + super().__init__( + elements=elements, + objectId=gslides_slide.objectId, + slideProperties=AbstractSlideProperties( + isSkipped=gslides_slide.slideProperties.isSkipped or False + ), + speaker_notes=GSlidesSpeakerNotes(gslides_slide.speaker_notes), + ) + self._gslides_slide = gslides_slide + + def thumbnail( + self, api_client: GSlidesAPIClient, size: str, include_data: bool = False + ) -> AbstractThumbnail: + """Get thumbnail for a Google Slides slide. + + Args: + api_client: The Google Slides API client + size: The thumbnail size (e.g., "MEDIUM") + include_data: If True, downloads the thumbnail image data with retry logic + + Returns: + AbstractThumbnail with metadata and optionally the image content + """ + import http + import ssl + + from gslides_api.common.google_errors import detect_file_access_denied_error + from gslides_api.common.download import download_binary_file + + # Map size string to ThumbnailSize enum + thumbnail_size = getattr(ThumbnailSize, size, ThumbnailSize.MEDIUM) + + # Fetch thumbnail metadata with retry + @retry( + max_attempts=6, + initial_delay=1.0, + max_delay=15.0, + exceptions=( + TimeoutError, + httpx.TimeoutException, + httpx.RequestError, + ConnectionError, + ssl.SSLError, + http.client.ResponseNotReady, + http.client.IncompleteRead, + ), + ) + def fetch_thumbnail(): + return self._gslides_slide.thumbnail( + size=thumbnail_size, api_client=api_client.gslides_client + ) + + try: + gslides_thumbnail = fetch_thumbnail() + except Exception as e: + # Check if this is a file access denied error (drive.file scope) + detect_file_access_denied_error(error=e, file_id=self.presentation_id) + raise + + # Handle mime_type format: Google Slides returns "png", we need "image/png" + mime_type = ( + gslides_thumbnail.mime_type + if gslides_thumbnail.mime_type.startswith("image/") + else f"image/{gslides_thumbnail.mime_type}" + ) + + content = gslides_thumbnail.payload if include_data else None + file_size = len(content) if content else None + + return AbstractThumbnail( + contentUrl=gslides_thumbnail.contentUrl, + width=gslides_thumbnail.width, + height=gslides_thumbnail.height, + mime_type=mime_type, + content=content, + file_size=file_size, + ) + + +class GSlidesPresentation(AbstractPresentation): + def __init__(self, gslides_presentation: Presentation): + # Convert gslides slides to abstract slides + slides = [GSlidesSlide(slide) for slide in gslides_presentation.slides] + + super().__init__( + slides=slides, + url=gslides_presentation.url, + presentationId=getattr(gslides_presentation, "presentationId", ""), + revisionId=getattr(gslides_presentation, "revisionId", ""), + title=gslides_presentation.title, + ) + self._gslides_presentation = gslides_presentation + + @property + def url(self) -> str: + return self._gslides_presentation.url + + def slide_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Return slide height in specified units.""" + height_dim = self._gslides_presentation.pageSize.height + height_emu = height_dim.magnitude if hasattr(height_dim, "magnitude") else float(height_dim) + return from_emu(height_emu, units) + + @classmethod + def from_id(cls, api_client: GSlidesAPIClient, presentation_id: str) -> "GSlidesPresentation": + from gslides_api.common.google_errors import detect_file_access_denied_error + + try: + gslides_presentation = Presentation.from_id( + presentation_id, api_client=api_client.gslides_client + ) + return cls(gslides_presentation) + except Exception as e: + # Check if this is a file access denied error (drive.file scope) + detect_file_access_denied_error(error=e, file_id=presentation_id) + # If not a file access denied error, re-raise the original exception + raise + + def copy_via_drive( + self, + api_client: GSlidesAPIClient, + copy_title: str, + folder_id: Optional[str] = None, + ) -> "GSlidesPresentation": + from gslides_api.common.google_errors import detect_file_access_denied_error + + try: + copied_presentation = self._gslides_presentation.copy_via_drive( + copy_title=copy_title, api_client=api_client.gslides_client, folder_id=folder_id + ) + return GSlidesPresentation(copied_presentation) + except Exception as e: + # Check if this is a file access denied error (drive.file scope) + detect_file_access_denied_error(error=e, file_id=self.presentationId) + raise + + def insert_copy( + self, + source_slide: GSlidesSlide, + api_client: GSlidesAPIClient, + insertion_index: int | None = None, + ): + # Use the new duplicate_slide method + new_gslide = self.duplicate_slide(source_slide, api_client) + if insertion_index is not None: + self.move_slide(new_gslide, insertion_index, api_client) + return new_gslide + + def sync_from_cloud(self, api_client: GSlidesAPIClient): + self._gslides_presentation.sync_from_cloud(api_client=api_client.gslides_client) + # Rebuild the GSlidesSlide wrappers so they reflect the refreshed state + # (e.g. new objectIds for speaker notes elements after slide duplication). + self.slides = [GSlidesSlide(slide) for slide in self._gslides_presentation.slides] + self.presentationId = getattr(self._gslides_presentation, "presentationId", "") + self.revisionId = getattr(self._gslides_presentation, "revisionId", "") + + def save(self, api_client: GSlidesAPIClient) -> None: + """Save/persist all changes made to this presentation.""" + api_client.flush_batch_update() + + def delete_slide(self, slide: Union[GSlidesSlide, int], api_client: GSlidesAPIClient): + """Delete a slide from the presentation by reference or index.""" + if isinstance(slide, int): + slide = self.slides[slide] + if isinstance(slide, GSlidesSlide): + # Use the existing delete logic from GSlidesSlide + slide._gslides_slide.delete(api_client=api_client.gslides_client) + # Remove from our slides list + self.slides.remove(slide) + + def delete_slides(self, slides: list[Union[GSlidesSlide, int]], api_client: GSlidesAPIClient): + for slide in slides: + self.delete_slide(slide, api_client) + + def move_slide( + self, slide: Union[GSlidesSlide, int], insertion_index: int, api_client: GSlidesAPIClient + ): + """Move a slide to a new position within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + if isinstance(slide, GSlidesSlide): + # Use the existing move logic from GSlidesSlide + slide._gslides_slide.move( + insertion_index=insertion_index, api_client=api_client.gslides_client + ) + # Update local slides list order + self.slides.remove(slide) + self.slides.insert(insertion_index, slide) + + def duplicate_slide( + self, slide: Union[GSlidesSlide, int], api_client: GSlidesAPIClient + ) -> GSlidesSlide: + """Duplicate a slide within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + if isinstance(slide, GSlidesSlide): + # Use the existing duplicate logic from GSlidesSlide + duplicated = slide._gslides_slide.duplicate(api_client=api_client.gslides_client) + new_slide = GSlidesSlide(duplicated) + self.slides.append(new_slide) + # Manually set parent refs since validator only runs at construction + new_slide._parent_presentation = self + for element in new_slide.elements: + element._parent_presentation = self + return new_slide + else: + raise ValueError("slide must be a GSlidesSlide or int") diff --git a/gslides_api/adapters/html_adapter.py b/gslides_api/adapters/html_adapter.py new file mode 100644 index 0000000..9e72abe --- /dev/null +++ b/gslides_api/adapters/html_adapter.py @@ -0,0 +1,1616 @@ +""" +Concrete implementation of abstract slides using BeautifulSoup for HTML. +This module provides the actual implementation that maps abstract slide operations to HTML/BeautifulSoup calls. +""" + +import copy +import io +import logging +import os +import shutil +import tempfile +import uuid +from typing import Annotated, Any, List, Optional, Union + +from bs4 import BeautifulSoup +from bs4.element import Tag +from pydantic import BaseModel, ConfigDict, Discriminator, Field +from pydantic import Tag as PydanticTag +from pydantic import TypeAdapter, model_validator + +from gslides_api.agnostic.domain import ImageData +from gslides_api.agnostic.element import MarkdownTableElement, TableData +from gslides_api.agnostic.units import OutputUnit, from_emu, to_emu + +from gslides_api.common.download import download_binary_file + +from gslides_api.adapters.abstract_slides import ( + AbstractAltText, + AbstractCredentials, + AbstractElement, + AbstractElementKind, + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractSlide, + AbstractSlideProperties, + AbstractSlidesAPIClient, + AbstractSpeakerNotes, + AbstractTableElement, + AbstractThumbnail, +) + +logger = logging.getLogger(__name__) + +# Type alias for HTML element inputs +HTMLElementInput = Union[Tag, dict, "HTMLElementParent"] + +# Inline formatting tags to EXCLUDE from element parsing +# These are text styling elements that should not be separate blocks +INLINE_FORMATTING_TAGS = { + "strong", + "b", + "em", + "i", + "u", + "s", + "strike", + "del", + "span", + "br", + "a", + "sub", + "sup", + "code", + "small", + "mark", +} + +# Tags that are internal to other structures (skip these) +INTERNAL_STRUCTURE_TAGS = { + "li", + "thead", + "tbody", + "tr", + "td", + "th", + "figcaption", +} + +# Selectors for common consent/overlay widgets that should not appear in thumbnails +THUMBNAIL_OVERLAY_BLOCKLIST_CSS = """ +[id*="cookie" i], +[class*="cookie" i], +[id*="consent" i], +[class*="consent" i], +[id*="gdpr" i], +[class*="gdpr" i], +[id*="onetrust" i], +[class*="onetrust" i], +[id*="usercentrics" i], +[class*="usercentrics" i], +[id*="didomi" i], +[class*="didomi" i], +[id*="qc-cmp2" i], +[class*="qc-cmp2" i] { + display: none !important; + visibility: hidden !important; + opacity: 0 !important; + pointer-events: none !important; +} +""" + +THUMBNAIL_OVERLAY_SUPPRESSION_SCRIPT = """ +(selector) => { + const slide = document.querySelector(selector); + if (!slide) { + return { hidden: 0, reason: "slide_not_found" }; + } + + const keywords = [ + "cookie", + "consent", + "gdpr", + "onetrust", + "usercentrics", + "didomi", + "privacy", + "trustarc", + "qc-cmp2", + ]; + let hiddenCount = 0; + + for (const element of document.querySelectorAll("body *")) { + if (!(element instanceof HTMLElement)) { + continue; + } + if (slide.contains(element)) { + continue; + } + + const className = typeof element.className === "string" + ? element.className + : (element.getAttribute("class") || ""); + const signature = `${element.id || ""} ${className}`.toLowerCase(); + const hasKeyword = keywords.some((word) => signature.includes(word)); + + const computed = window.getComputedStyle(element); + const position = computed.position; + const zIndex = Number.parseInt(computed.zIndex || "0", 10); + const rect = element.getBoundingClientRect(); + const role = (element.getAttribute("role") || "").toLowerCase(); + const isDialogLike = element.getAttribute("aria-modal") === "true" || role === "dialog"; + const isFixedLike = position === "fixed" || position === "sticky"; + const isLargeBar = + rect.width >= window.innerWidth * 0.6 && + rect.height <= window.innerHeight * 0.35 && + (rect.top <= 120 || rect.bottom >= window.innerHeight - 120); + const isModalSized = + rect.width >= window.innerWidth * 0.3 && + rect.height >= window.innerHeight * 0.2; + const isOverlayCandidate = + hasKeyword || + (isFixedLike && (zIndex >= 10 || isLargeBar || isDialogLike || isModalSized)) || + (isDialogLike && zIndex >= 10); + + if (!isOverlayCandidate) { + continue; + } + + element.setAttribute("data-storyline-thumbnail-hidden", "true"); + element.style.setProperty("display", "none", "important"); + element.style.setProperty("visibility", "hidden", "important"); + element.style.setProperty("pointer-events", "none", "important"); + hiddenCount += 1; + } + + return { hidden: hiddenCount }; +} +""" + + +def _has_includable_child(element: Tag) -> bool: + """Check if element has child elements that would be included as blocks. + + Used to determine if this element is the "innermost" text-containing element. + If it has children that would be included, skip this element (not innermost). + """ + for child in element.children: + if not isinstance(child, Tag): + continue + child_name = child.name.lower() if child.name else "" + # Skip inline formatting - these don't count + if child_name in INLINE_FORMATTING_TAGS: + continue + # Skip internal structures - these don't count + if child_name in INTERNAL_STRUCTURE_TAGS: + continue + # If child is an image, parent has includable child + if child_name == "img": + return True + # If child has text content, parent has includable child + if child.get_text(strip=True): + return True + return False + + +def _hide_overlay_elements_for_thumbnail_sync(page: Any, slide_selector: str) -> None: + """Hide fixed overlays (cookie bars, consent dialogs, sticky headers) before screenshot.""" + try: + page.add_style_tag(content=THUMBNAIL_OVERLAY_BLOCKLIST_CSS) + result = page.evaluate(THUMBNAIL_OVERLAY_SUPPRESSION_SCRIPT, slide_selector) + hidden_count = result.get("hidden", 0) if isinstance(result, dict) else 0 + logger.debug("Suppressed %d overlay element(s) before thumbnail capture", hidden_count) + except Exception as e: + logger.debug("Overlay suppression failed during thumbnail capture: %s", e) + + +async def _hide_overlay_elements_for_thumbnail_async(page: Any, slide_selector: str) -> None: + """Async variant of overlay suppression for batch thumbnail capture.""" + try: + await page.add_style_tag(content=THUMBNAIL_OVERLAY_BLOCKLIST_CSS) + result = await page.evaluate(THUMBNAIL_OVERLAY_SUPPRESSION_SCRIPT, slide_selector) + hidden_count = result.get("hidden", 0) if isinstance(result, dict) else 0 + logger.debug("Suppressed %d overlay element(s) before thumbnail capture", hidden_count) + except Exception as e: + logger.debug("Overlay suppression failed during thumbnail capture: %s", e) + + +def html_element_discriminator(v: HTMLElementInput) -> str: + """Discriminator to determine which HTMLElement subclass based on tag name or type field.""" + # First check if it's a direct BeautifulSoup Tag with a tag name + if isinstance(v, Tag): + tag_name = v.name.lower() if v.name else "" + if tag_name == "img": + return "image" + elif tag_name == "table": + return "table" + elif tag_name in [ + "div", + "p", + "span", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "section", + "article", + "ul", + "ol", + ]: + return "shape" + else: + return "generic" + + # Then check if it's already wrapped with html_element + elif hasattr(v, "html_element"): + html_elem = v.html_element + if isinstance(html_elem, Tag): + tag_name = html_elem.name.lower() if html_elem.name else "" + if tag_name == "img": + return "image" + elif tag_name == "table": + return "table" + elif tag_name in [ + "div", + "p", + "span", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "section", + "article", + "ul", + "ol", + ]: + return "shape" + return "generic" + + # Finally check for type field + else: + element_type = getattr(v, "type", None) + if element_type in [AbstractElementKind.SHAPE, "SHAPE", "shape"]: + return "shape" + elif element_type in [AbstractElementKind.IMAGE, "IMAGE", "image"]: + return "image" + elif element_type in [AbstractElementKind.TABLE, "TABLE", "table"]: + return "table" + else: + return "generic" + + +class HTMLAPIClient(AbstractSlidesAPIClient): + """HTML API client implementation using filesystem operations.""" + + def __init__(self): + # No initialization needed for filesystem-based operations + self._auto_flush = True + + @property + def auto_flush(self): + return self._auto_flush + + @auto_flush.setter + def auto_flush(self, value: bool): + # Just store this for consistency with abstract interface + self._auto_flush = value + + def flush_batch_update(self): + # No batching needed for filesystem operations + pass + + def copy_presentation( + self, presentation_id: str, copy_title: str, folder_id: Optional[str] = None + ) -> dict: + """Copy a presentation directory to another location.""" + if not os.path.exists(presentation_id) or not os.path.isdir(presentation_id): + raise FileNotFoundError(f"Presentation directory not found: {presentation_id}") + + # Determine destination folder + if folder_id is None: + # Copy to same folder as source + dest_folder = os.path.dirname(presentation_id) + else: + # Validate folder exists + if not os.path.exists(folder_id) or not os.path.isdir(folder_id): + raise FileNotFoundError(f"Destination folder not found: {folder_id}") + dest_folder = folder_id + + # Create destination path + dest_path = os.path.join(dest_folder, copy_title) + + # Copy directory (overwrite if exists) + shutil.copytree(presentation_id, dest_path, dirs_exist_ok=True) + + return { + "id": dest_path, + "name": copy_title, + "parents": [dest_folder] if folder_id else [os.path.dirname(presentation_id)], + } + + def create_folder( + self, name: str, ignore_existing: bool = True, parent_folder_id: Optional[str] = None + ) -> dict: + """Create a folder in the filesystem.""" + if parent_folder_id is None: + parent_folder_id = os.getcwd() + + if not os.path.exists(parent_folder_id): + raise FileNotFoundError(f"Parent folder not found: {parent_folder_id}") + + folder_path = os.path.join(parent_folder_id, name) + + try: + os.makedirs(folder_path, exist_ok=ignore_existing) + except FileExistsError: + if not ignore_existing: + raise + + return {"id": folder_path, "name": name, "parents": [parent_folder_id]} + + def delete_file(self, file_id: str): + """Delete a file or directory from the filesystem.""" + if os.path.exists(file_id): + if os.path.isdir(file_id): + shutil.rmtree(file_id) + else: + os.remove(file_id) + + def set_credentials(self, credentials: AbstractCredentials): + # Do nothing as this is filesystem-based + pass + + def get_credentials(self) -> Optional[AbstractCredentials]: + # Return None as no credentials needed for filesystem operations + return None + + def replace_text( + self, slide_ids: list[str], match_text: str, replace_text: str, presentation_id: str + ): + """Replace text across specified slides in a presentation.""" + # This would require loading the presentation, finding slides by ID, and replacing text + # For now, raise NotImplementedError + raise NotImplementedError("replace_text not yet implemented for HTML adapter") + + @classmethod + def get_default_api_client(cls) -> "HTMLAPIClient": + """Get the default API client instance.""" + return cls() + + async def get_presentation_as_pdf(self, presentation_id: str) -> bytes: + """Get PDF from presentation.""" + raise NotImplementedError("PDF export not implemented for HTML adapter") + + +class HTMLSpeakerNotes(AbstractSpeakerNotes): + """HTML speaker notes implementation using data-notes attribute.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_section: Any = Field(exclude=True, default=None) + + def __init__(self, html_section: Tag, **kwargs): + super().__init__(**kwargs) + self.html_section = html_section + + def read_text(self, as_markdown: bool = True) -> str: + """Read text from speaker notes (data-notes attribute).""" + if not self.html_section: + return "" + + notes = self.html_section.get("data-notes", "") + return notes if notes else "" + + def write_text(self, api_client: "HTMLAPIClient", content: str): + """Write text to speaker notes (data-notes attribute).""" + if not self.html_section: + return + + self.html_section["data-notes"] = content + + +class HTMLElementParent(AbstractElement): + """Generic concrete element for HTML elements.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + html_section: Any = Field(exclude=True, default=None) + directory_path: Optional[str] = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup element to our abstract representation.""" + if isinstance(data, dict): + # Already converted + return data + elif isinstance(data, Tag): + html_element = data + elif hasattr(data, "html_element"): + # Already wrapped element + return data.__dict__ + else: + raise ValueError(f"Expected BeautifulSoup Tag, got {type(data)}") + + # Extract basic properties + object_id = html_element.get("id", "") or html_element.get("data-element-id", "") + + # Get alt text from data attributes + alt_text_title = html_element.get("data-alt-title", None) + alt_text_descr = html_element.get("data-alt-description", None) + + return { + "objectId": object_id, + "presentation_id": "", # Will be set by parent + "slide_id": "", + "alt_text": AbstractAltText(title=alt_text_title, description=alt_text_descr), + "type": "generic", + "html_element": html_element, + } + + def absolute_size(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + """Get the absolute size of the element by parsing CSS width/height from style attribute.""" + if not self.html_element: + return (0.0, 0.0) + + # Parse CSS style attribute + style = self.html_element.get("style", "") + width_px = self._parse_css_dimension(style, "width") + height_px = self._parse_css_dimension(style, "height") + + # Convert pixels to requested units (assuming 96 DPI for px to inches) + if units == OutputUnit.IN or units == "in": + return (width_px / 96.0, height_px / 96.0) + elif units == OutputUnit.PX or units == "px": + return (width_px, height_px) + else: + return (width_px / 96.0, height_px / 96.0) + + def _parse_css_dimension(self, style: str, property_name: str) -> float: + """Parse a CSS dimension value from a style string, returning value in pixels.""" + import re + + # Look for the property in the style string + pattern = rf"{property_name}\s*:\s*([^;]+)" + match = re.search(pattern, style, re.IGNORECASE) + if not match: + return 0.0 + + value_str = match.group(1).strip() + + # Parse numeric value and unit + num_match = re.match(r"([\d.]+)\s*(px|in|pt|em|rem|%|)", value_str, re.IGNORECASE) + if not num_match: + return 0.0 + + value = float(num_match.group(1)) + unit = num_match.group(2).lower() if num_match.group(2) else "px" + + # Convert to pixels + if unit == "px" or unit == "": + return value + elif unit == "in": + return value * 96 # 96 DPI + elif unit == "pt": + return value * 96 / 72 # 72 points per inch + elif unit in ("em", "rem"): + return value * 16 # Assume 16px base font + elif unit == "%": + # Percentage values need parent context, return 0 for now + return 0.0 + else: + return value + + def absolute_position(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + """Get the absolute position of the element (simplified - returns 0,0 for HTML).""" + if not self.html_element: + return (0.0, 0.0) + + # HTML uses CSS layout, not absolute positioning + # For now, return simplified values + # Future: parse CSS left/top from style attribute + return (0.0, 0.0) + + def create_image_element_like(self, api_client: HTMLAPIClient) -> "HTMLImageElement": + """Create an image element with the same properties as this element.""" + if not self.html_element: + logger.warning("Cannot create image element: missing html_element reference") + raise ValueError("Cannot create image element without html_element reference") + + # Create a new <img> tag - need to find the document root for new_tag + # Navigate up to find the root BeautifulSoup object which has new_tag method + parent = self.html_element + while parent is not None: + if hasattr(parent, "new_tag") and callable(getattr(parent, "new_tag", None)): + new_img = parent.new_tag("img") + break + parent = parent.parent + else: + # Fallback: create a minimal soup with the tag + soup = BeautifulSoup("<img/>", "lxml") + new_img = soup.find("img") + new_img["src"] = "placeholder.png" + new_img["data-element-name"] = self.html_element.get("data-element-name", "") + new_img["data-alt-title"] = self.alt_text.title or "" + new_img["data-alt-description"] = self.alt_text.description or "" + + # Copy style attribute to preserve dimensions + original_style = self.html_element.get("style", "") + if original_style: + new_img["style"] = original_style + + # Replace current element with new image + self.html_element.replace_with(new_img) + + # Create and return HTMLImageElement wrapper + image_element = HTMLImageElement( + objectId=new_img.get("id", ""), + alt_text=self.alt_text, + html_element=new_img, + directory_path=self.directory_path, + ) + return image_element + + def set_alt_text( + self, + api_client: HTMLAPIClient, + title: str | None = None, + description: str | None = None, + ): + """Set alt text for the element using data attributes.""" + if self.html_element: + if title is not None: + self.html_element["data-alt-title"] = title + self.alt_text.title = title + if description is not None: + self.html_element["data-alt-description"] = description + self.alt_text.description = description + + +class HTMLShapeElement(AbstractShapeElement, HTMLElementParent): + """HTML shape element implementation for text-containing elements.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup element.""" + base_data = HTMLElementParent.convert_from_html_element(data) + base_data["type"] = AbstractElementKind.SHAPE + return base_data + + @property + def has_text(self) -> bool: + """Check if the element has text content.""" + if not self.html_element: + return False + text = self.html_element.get_text(strip=True) + return bool(text) + + def write_text( + self, + api_client: HTMLAPIClient, + content: str, + autoscale: bool = False, + ): + """Write text to the element (supports markdown formatting).""" + if not self.html_element: + return + + from gslides_api.adapters.markdown_to_html import apply_markdown_to_html_element + + # Check if content has markdown formatting indicators + has_markdown_formatting = any( + marker in content for marker in ["**", "*", "__", "~~", "- ", "1. ", "2. "] + ) + + if has_markdown_formatting: + # Use markdown parser to convert markdown to HTML with formatting + apply_markdown_to_html_element( + markdown_text=content, + html_element=self.html_element, + base_style=None, + ) + else: + # Simple text replacement (preserves template variables like {account_name}) + self.html_element.clear() + self.html_element.string = content + + def read_text(self, as_markdown: bool = True) -> str: + """Read text from the element.""" + if not self.html_element: + return "" + + if as_markdown: + from gslides_api.adapters.markdown_to_html import convert_html_to_markdown + + # Convert HTML formatting to markdown + return convert_html_to_markdown(self.html_element) + else: + # Extract plain text content + text = self.html_element.get_text(separator="\n", strip=False) + return text + + def styles(self, skip_whitespace: bool = True) -> Optional[List[dict]]: + """Extract style information from the element (simplified).""" + if not self.html_element: + return None + + # Simplified: return basic style info + # Future: parse inline CSS styles from style attribute + text = self.html_element.get_text() + if skip_whitespace and not text.strip(): + return None + + return [{"text": text}] + + +class HTMLImageElement(AbstractImageElement, HTMLElementParent): + """HTML image element implementation for <img> tags.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup <img> element.""" + base_data = HTMLElementParent.convert_from_html_element(data) + base_data["type"] = AbstractElementKind.IMAGE + return base_data + + def replace_image( + self, + api_client: HTMLAPIClient, + file: str | None = None, + url: str | None = None, + ): + """Replace the image in this element.""" + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "img" + ): + logger.warning("Cannot replace image: element is not an <img> tag") + return + + if file and os.path.exists(file): + # Copy file to images/ subdirectory + if not self.directory_path: + logger.warning("Cannot replace image: no directory_path set") + return + + images_dir = os.path.join(self.directory_path, "images") + os.makedirs(images_dir, exist_ok=True) + + # Copy file + filename = os.path.basename(file) + dest_path = os.path.join(images_dir, filename) + shutil.copy2(file, dest_path) + + # Update src attribute (relative path) + self.html_element["src"] = f"images/{filename}" + logger.info(f"Replaced image with {file}, src set to images/{filename}") + + elif url: + # For URL images, download first using utility with retries + content, _ = download_binary_file(url) + # Save to temp file and use file path method + with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file: + temp_file.write(content) + temp_file_path = temp_file.name + + try: + self.replace_image(api_client, file=temp_file_path) + finally: + os.unlink(temp_file_path) + + def get_image_data(self): + """Get the image data from the HTML element.""" + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "img" + ): + return None + + try: + # Get src attribute + src = self.html_element.get("src", "") + if not src: + return None + + # Determine if it's a URL or local file + if src.startswith("http://") or src.startswith("https://"): + # Download from URL + content, _ = download_binary_file(src) + mime_type = "image/png" # Default, could be detected from content + return ImageData(content=content, mime_type=mime_type) + else: + # Local file (relative to directory) + if not self.directory_path: + return None + + file_path = os.path.join(self.directory_path, src) + if not os.path.exists(file_path): + return None + + with open(file_path, "rb") as f: + content = f.read() + + # Detect mime type from extension + ext = os.path.splitext(file_path)[1].lower() + mime_type_map = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".svg": "image/svg+xml", + } + mime_type = mime_type_map.get(ext, "image/png") + + return ImageData(content=content, mime_type=mime_type) + + except Exception as e: + logger.error(f"Error getting image data: {e}") + return None + + +class HTMLTableElement(AbstractTableElement, HTMLElementParent): + """HTML table element implementation for <table> tags.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup <table> element.""" + base_data = HTMLElementParent.convert_from_html_element(data) + base_data["type"] = AbstractElementKind.TABLE + return base_data + + def _get_soup(self) -> BeautifulSoup: + """Get a BeautifulSoup object for creating new tags.""" + # Create a temporary soup for tag creation + return BeautifulSoup("", "lxml") + + def resize( + self, + api_client: HTMLAPIClient, + rows: int, + cols: int, + fix_width: bool = True, + fix_height: bool = True, + target_height_in: float | None = None, + ) -> float: + """Resize the table to the specified dimensions. + + Args: + target_height_in: Ignored for HTML tables (they auto-size). + + Returns: + Font scale factor (1.0 since HTML doesn't support font scaling during resize). + """ + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "table" + ): + return 1.0 + + try: + soup = self._get_soup() + + # Get or create tbody + tbody = self.html_element.find("tbody") + if not tbody: + tbody = soup.new_tag("tbody") + self.html_element.append(tbody) + + current_rows = tbody.find_all("tr", recursive=False) + current_row_count = len(current_rows) + + # Adjust rows + if rows > current_row_count: + # Add rows + for _ in range(rows - current_row_count): + new_row = soup.new_tag("tr") + for _ in range(cols): + new_cell = soup.new_tag("td") + new_row.append(new_cell) + tbody.append(new_row) + elif rows < current_row_count: + # Remove rows + for row in current_rows[rows:]: + row.decompose() + + # Adjust columns in all rows + for row in tbody.find_all("tr", recursive=False): + cells = row.find_all(["td", "th"], recursive=False) + current_col_count = len(cells) + + if cols > current_col_count: + # Add cells + for _ in range(cols - current_col_count): + new_cell = soup.new_tag("td") + row.append(new_cell) + elif cols < current_col_count: + # Remove cells + for cell in cells[cols:]: + cell.decompose() + + except Exception as e: + logger.error(f"Error resizing table: {e}") + + return 1.0 + + def update_content( + self, + api_client: HTMLAPIClient, + markdown_content: MarkdownTableElement, + check_shape: bool = True, + font_scale_factor: float = 1.0, + ): + """Update the table content with markdown data. + + Args: + font_scale_factor: Font scale factor (currently unused for HTML, but kept for interface conformance). + """ + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "table" + ): + return + + try: + soup = self._get_soup() + + # Get table data from markdown content + if hasattr(markdown_content, "content") and hasattr( + markdown_content.content, "headers" + ): + headers = markdown_content.content.headers + data_rows = markdown_content.content.rows + else: + # Fallback for old interface + headers = ( + markdown_content.rows[0] + if hasattr(markdown_content, "rows") and markdown_content.rows + else [] + ) + data_rows = ( + markdown_content.rows[1:] + if hasattr(markdown_content, "rows") and len(markdown_content.rows) > 1 + else [] + ) + + if not headers: + return + + # Ensure table has thead and tbody + thead = self.html_element.find("thead") + if not thead: + thead = soup.new_tag("thead") + self.html_element.insert(0, thead) + + tbody = self.html_element.find("tbody") + if not tbody: + tbody = soup.new_tag("tbody") + self.html_element.append(tbody) + + # Clear existing content + thead.clear() + tbody.clear() + + # Add header row + header_row = soup.new_tag("tr") + for header in headers: + th = soup.new_tag("th") + th.string = str(header) if header is not None else "" + header_row.append(th) + thead.append(header_row) + + # Add data rows + for row_data in data_rows: + tr = soup.new_tag("tr") + for cell_data in row_data: + td = soup.new_tag("td") + td.string = str(cell_data) if cell_data is not None else "" + tr.append(td) + tbody.append(tr) + + except Exception as e: + logger.error(f"Error updating table content: {e}") + + def get_horizontal_border_weight(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get weight of horizontal borders in specified units. + + For HTML tables, borders are handled via CSS and don't have a fixed weight + that contributes to layout height, so we return 0. + """ + return 0.0 + + def get_row_count(self) -> int: + """Get current number of rows.""" + if not self.html_element: + return 0 + tbody = self.html_element.find("tbody") + if tbody: + return len(tbody.find_all("tr", recursive=False)) + return 0 + + def get_column_count(self) -> int: + """Get current number of columns.""" + if not self.html_element: + return 0 + # Check thead first, then tbody + thead = self.html_element.find("thead") + if thead: + header_row = thead.find("tr") + if header_row: + return len(header_row.find_all(["th", "td"], recursive=False)) + tbody = self.html_element.find("tbody") + if tbody: + first_row = tbody.find("tr") + if first_row: + return len(first_row.find_all(["td", "th"], recursive=False)) + return 0 + + def to_markdown_element(self, name: str | None = None) -> MarkdownTableElement: + """Convert HTML table to markdown table element.""" + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "table" + ): + raise ValueError("HTMLTableElement has no valid <table> element") + + # Extract headers from thead + thead = self.html_element.find("thead") + headers = [] + if thead: + header_row = thead.find("tr") + if header_row: + headers = [th.get_text(strip=True) for th in header_row.find_all(["th", "td"])] + + # Extract rows from tbody + tbody = self.html_element.find("tbody") + rows = [] + if tbody: + for tr in tbody.find_all("tr"): + row = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])] + rows.append(row) + + # Create TableData from extracted headers and rows + if not headers and not rows: + table_data = None + else: + table_data = TableData(headers=headers, rows=rows) + + # Create MarkdownTableElement with TableData + markdown_elem = MarkdownTableElement( + name=name or self.alt_text.title or "Table", + content=table_data, + ) + + return markdown_elem + + +# Discriminated union type for concrete elements +HTMLElement = Annotated[ + Union[ + Annotated[HTMLShapeElement, PydanticTag("shape")], + Annotated[HTMLImageElement, PydanticTag("image")], + Annotated[HTMLTableElement, PydanticTag("table")], + Annotated[HTMLElementParent, PydanticTag("generic")], + ], + Discriminator(html_element_discriminator), +] + +# TypeAdapter for validating the discriminated union +_html_element_adapter = TypeAdapter(HTMLElement) + + +def validate_html_element(html_element: Tag) -> HTMLElement: + """Create the appropriate concrete element from a BeautifulSoup Tag.""" + element_type = html_element_discriminator(html_element) + + if element_type == "shape": + data = HTMLShapeElement.convert_from_html_element(html_element) + return HTMLShapeElement(**data) + elif element_type == "image": + data = HTMLImageElement.convert_from_html_element(html_element) + return HTMLImageElement(**data) + elif element_type == "table": + data = HTMLTableElement.convert_from_html_element(html_element) + return HTMLTableElement(**data) + else: + data = HTMLElementParent.convert_from_html_element(html_element) + return HTMLElementParent(**data) + + +class HTMLSlide(AbstractSlide): + """HTML slide implementation representing a <section> element.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_section: Any = Field(exclude=True, default=None) + html_soup: Any = Field(exclude=True, default=None) + directory_path: Optional[str] = Field(exclude=True, default=None) + + def __init__(self, html_section: Tag, html_soup: BeautifulSoup, directory_path: str, **kwargs): + # Extract all content elements (innermost text-containing elements and images) + elements = [] + + for child in html_section.descendants: + if isinstance(child, Tag): + # Skip nested sections + if child.name == "section": + continue + + tag_name = child.name.lower() if child.name else "" + + # Skip inline formatting elements + if tag_name in INLINE_FORMATTING_TAGS: + continue + + # Skip internal structure elements + if tag_name in INTERNAL_STRUCTURE_TAGS: + continue + + # For images, always include + if tag_name == "img": + try: + html_elem = validate_html_element(child) + html_elem.directory_path = directory_path + html_elem.html_section = html_section + elements.append(html_elem) + except Exception as e: + logger.warning(f"Could not convert image element: {e}") + continue + + # For other elements, only include if: + # 1. They have text content + # 2. They are the innermost (no includable children) + text_content = child.get_text(strip=True) + if not text_content: + continue + + # Skip if this element has includable children (not innermost) + if _has_includable_child(child): + continue + + try: + html_elem = validate_html_element(child) + html_elem.directory_path = directory_path + html_elem.html_section = html_section + elements.append(html_elem) + except Exception as e: + logger.warning(f"Could not convert element {tag_name}: {e}") + + # Get speaker notes + speaker_notes = HTMLSpeakerNotes(html_section) + + # Get slide properties + is_skipped = html_section.get("data-skip", "").lower() == "true" + slide_properties = AbstractSlideProperties(isSkipped=is_skipped) + + # Get object ID + object_id = html_section.get("id", "") or html_section.get("data-slide-id", "") + + super().__init__( + elements=elements, + objectId=object_id, + slideProperties=slide_properties, + speaker_notes=speaker_notes, + ) + + self.html_section = html_section + self.html_soup = html_soup + self.directory_path = directory_path + + @property + def page_elements_flat(self) -> list[HTMLElementParent]: + """Flatten the elements tree into a list.""" + return self.elements + + def thumbnail( + self, api_client: HTMLAPIClient, size: str, include_data: bool = False + ) -> AbstractThumbnail: + """Generate a thumbnail of the slide using Playwright.""" + if not self.directory_path: + logger.warning("Cannot generate thumbnail: no directory_path set") + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", + width=320, + height=240, + mime_type="image/png", + content=None, + ) + + html_file = os.path.join(self.directory_path, "index.html") + if not os.path.exists(html_file): + logger.warning(f"Cannot generate thumbnail: index.html not found at {html_file}") + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", + width=320, + height=240, + mime_type="image/png", + content=None, + ) + + file_url = f"file://{os.path.abspath(html_file)}" + + # Use Playwright to capture the slide + from playwright.sync_api import sync_playwright + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + try: + context = browser.new_context( + device_scale_factor=2, + viewport={"width": 1280, "height": 720}, + ) + page = context.new_page() + page.goto(file_url, wait_until="networkidle") + page.wait_for_timeout(1000) + + # Find the slide section by ID or position + slide_id = self.objectId + if slide_id: + safe_id = slide_id.replace(":", "\\:") + selector = f"section#{safe_id}" + else: + # Fallback: try to find the section element directly + selector = "section" + + _hide_overlay_elements_for_thumbnail_sync(page, selector) + slide_element = page.query_selector(selector) + if slide_element: + png_bytes = slide_element.screenshot(type="png") + + # Get dimensions from the image + from PIL import Image + import io as _io + + img = Image.open(_io.BytesIO(png_bytes)) + img_width, img_height = img.size + img.close() + + return AbstractThumbnail( + contentUrl=file_url, + width=img_width, + height=img_height, + mime_type="image/png", + content=png_bytes if include_data else None, + ) + else: + logger.warning(f"Could not find slide element with selector: {selector}") + finally: + browser.close() + except Exception as e: + logger.error(f"Error generating HTML thumbnail: {e}") + + # Return placeholder on failure + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", + width=320, + height=240, + mime_type="image/png", + content=None, + ) + + +class HTMLPresentation(AbstractPresentation): + """HTML presentation implementation representing a directory with index.html.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_soup: Any = Field(exclude=True, default=None) + directory_path: Optional[str] = None + # Storage URLs set after upload (for API adapter layer) + uploaded_html_url: Optional[str] = None + uploaded_zip_url: Optional[str] = None + + def __init__( + self, + html_soup: BeautifulSoup, + directory_path: str, + ): + # Extract all top-level <section> elements as slides + slides = [] + for section in html_soup.find_all("section", recursive=True): + # Only process top-level sections (not nested) + if section.find_parent("section") is None: + try: + slide = HTMLSlide(section, html_soup, directory_path) + slides.append(slide) + except Exception as e: + logger.warning(f"Could not convert section to slide: {e}") + continue + + # Extract presentation metadata + presentation_id = directory_path + title_tag = html_soup.find("title") + title = title_tag.get_text(strip=True) if title_tag else os.path.basename(directory_path) + + super().__init__( + slides=slides, + presentationId=presentation_id, + revisionId=None, # HTML doesn't have revision IDs + title=title, + ) + + self.html_soup = html_soup + self.directory_path = directory_path + + @property + def url(self) -> str: + """Return the file path as URL (file system based).""" + if self.directory_path: + html_file = os.path.join(self.directory_path, "index.html") + return f"file://{os.path.abspath(html_file)}" + else: + raise ValueError("No directory path specified for presentation") + + def slide_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Return slide height in specified units. + + HTML slides don't have a fixed height - returns default presentation height. + """ + # Default to standard presentation height (7.5 inches for 4:3 aspect ratio) + default_height_in = 7.5 + default_height_emu = to_emu(default_height_in, OutputUnit.IN) + return from_emu(default_height_emu, units) + + @classmethod + def from_id( + cls, + api_client: HTMLAPIClient, + presentation_id: str, + ) -> "HTMLPresentation": + """Load presentation from directory path.""" + # presentation_id is the directory path + if not os.path.exists(presentation_id) or not os.path.isdir(presentation_id): + raise FileNotFoundError(f"Presentation directory not found: {presentation_id}") + + # Load index.html + html_file = os.path.join(presentation_id, "index.html") + if not os.path.exists(html_file): + raise FileNotFoundError(f"index.html not found in {presentation_id}") + + try: + with open(html_file, "r", encoding="utf-8") as f: + html_content = f.read() + + html_soup = BeautifulSoup(html_content, "lxml") + return cls(html_soup, presentation_id) + except Exception as e: + raise ValueError(f"Could not load presentation from {presentation_id}: {e}") + + def copy_via_drive( + self, + api_client: HTMLAPIClient, + copy_title: str, + folder_id: Optional[str] = None, + ) -> "HTMLPresentation": + """Copy presentation to another location.""" + if not self.directory_path: + raise ValueError("Cannot copy presentation without a directory path") + + # Use the API client to copy the directory + copy_result = api_client.copy_presentation(self.directory_path, copy_title, folder_id) + + # Load the copied presentation + copied_presentation = HTMLPresentation.from_id(api_client, copy_result["id"]) + + return copied_presentation + + def sync_from_cloud(self, api_client: HTMLAPIClient): + """Re-read presentation from filesystem.""" + if not self.directory_path or not os.path.exists(self.directory_path): + return + + # Reload from file + html_file = os.path.join(self.directory_path, "index.html") + with open(html_file, "r", encoding="utf-8") as f: + html_content = f.read() + + # Update our internal representation + html_soup = BeautifulSoup(html_content, "lxml") + self.html_soup = html_soup + + # Rebuild slides + slides = [] + for section in html_soup.find_all("section", recursive=True): + if section.find_parent("section") is None: + try: + slide = HTMLSlide(section, html_soup, self.directory_path) + slides.append(slide) + except Exception as e: + logger.warning(f"Could not convert section during sync: {e}") + continue + + self.slides = slides + + # Update metadata + title_tag = html_soup.find("title") + self.title = ( + title_tag.get_text(strip=True) if title_tag else os.path.basename(self.directory_path) + ) + + def save(self, api_client: HTMLAPIClient) -> None: + """Save/persist all changes made to this presentation.""" + if not self.directory_path: + raise ValueError("No directory path specified for saving") + + html_file = os.path.join(self.directory_path, "index.html") + + # Ensure directory exists + os.makedirs(self.directory_path, exist_ok=True) + + # Save the HTML + with open(html_file, "w", encoding="utf-8") as f: + f.write(self.html_soup.prettify()) + + def insert_copy( + self, + source_slide: AbstractSlide, + api_client: HTMLAPIClient, + insertion_index: int | None = None, + ) -> AbstractSlide: + """Insert a copy of a slide into this presentation.""" + if not isinstance(source_slide, HTMLSlide): + raise ValueError("Can only copy HTMLSlide instances") + + # Deep copy the section + new_section = copy.deepcopy(source_slide.html_section) + + # Generate a unique ID for the copied section to avoid ID collisions + unique_id = f"slide-{uuid.uuid4().hex[:8]}" + new_section["id"] = unique_id + + # Insert into soup at the specified index + if insertion_index is None: + self.html_soup.body.append(new_section) + else: + sections = [ + s + for s in self.html_soup.find_all("section", recursive=True) + if s.find_parent("section") is None + ] + if insertion_index < len(sections): + sections[insertion_index].insert_before(new_section) + else: + self.html_soup.body.append(new_section) + + # Create new slide wrapper + new_slide = HTMLSlide(new_section, self.html_soup, self.directory_path) + + # Update our slides list + if insertion_index is None: + self.slides.append(new_slide) + else: + self.slides.insert(insertion_index, new_slide) + + return new_slide + + def delete_slide(self, slide: Union[HTMLSlide, int], api_client: HTMLAPIClient): + """Delete a slide from the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + if isinstance(slide, HTMLSlide): + # Remove from DOM + slide.html_section.decompose() + # Remove from our slides list + self.slides.remove(slide) + + def delete_slides(self, slides: List[Union[HTMLSlide, int]], api_client: HTMLAPIClient): + """Delete multiple slides from the presentation.""" + # Convert all indices to slide objects first to avoid index shifting issues + slides_to_delete = [] + for slide in slides: + if isinstance(slide, int): + slides_to_delete.append(self.slides[slide]) + else: + slides_to_delete.append(slide) + + # Now delete all slides + for slide in slides_to_delete: + self.delete_slide(slide, api_client) + + def move_slide( + self, + slide: Union[HTMLSlide, int], + insertion_index: int, + api_client: HTMLAPIClient, + ): + """Move a slide to a new position within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + if isinstance(slide, HTMLSlide): + # Extract the section from the DOM + section = slide.html_section.extract() + + # Insert at new position + sections = [ + s + for s in self.html_soup.find_all("section", recursive=True) + if s.find_parent("section") is None + ] + if insertion_index < len(sections): + sections[insertion_index].insert_before(section) + else: + self.html_soup.body.append(section) + + # Update local slides list order + self.slides.remove(slide) + self.slides.insert(insertion_index, slide) + + def duplicate_slide(self, slide: Union[HTMLSlide, int], api_client: HTMLAPIClient) -> HTMLSlide: + """Duplicate a slide within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + if isinstance(slide, HTMLSlide): + # Deep copy the section + new_section = copy.deepcopy(slide.html_section) + + # Append to DOM + self.html_soup.body.append(new_section) + + # Create new slide wrapper + new_slide = HTMLSlide(new_section, self.html_soup, self.directory_path) + self.slides.append(new_slide) + + return new_slide + else: + raise ValueError("slide must be an HTMLSlide or int") + + async def get_slide_thumbnails( + self, + api_client: "HTMLAPIClient", + slides: Optional[List["AbstractSlide"]] = None, + ) -> List[AbstractThumbnail]: + """Get thumbnails for slides using a single Playwright browser session. + + This is more efficient than calling thumbnail() for each slide individually + because it opens the browser once and captures all slides in one session. + + Args: + api_client: The HTML API client + slides: Optional list of slides to get thumbnails for. If None, uses all slides. + + Returns: + List of AbstractThumbnail objects with image data + """ + import io as _io + + from PIL import Image + from playwright.async_api import async_playwright + + target_slides = slides if slides is not None else self.slides + thumbnails = [] + + if not target_slides: + return thumbnails + + if not self.directory_path: + logger.warning("Cannot generate thumbnails: no directory_path set") + return [self._create_placeholder_thumbnail() for _ in target_slides] + + html_file = os.path.join(self.directory_path, "index.html") + if not os.path.exists(html_file): + logger.warning(f"Cannot generate thumbnails: index.html not found at {html_file}") + return [self._create_placeholder_thumbnail() for _ in target_slides] + + html_file_url = f"file://{os.path.abspath(html_file)}" + + logger.info( + "Generating HTML thumbnails for %d slides from %s", len(target_slides), html_file_url + ) + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + try: + context = await browser.new_context( + device_scale_factor=2, # 2x for good quality thumbnails + viewport={"width": 1280, "height": 720}, + ) + page = await context.new_page() + + # Navigate to the HTML file + await page.goto(html_file_url, wait_until="networkidle") + await page.wait_for_timeout(1000) # Wait for any JS rendering + + # Generate thumbnail for each slide + for i, slide in enumerate(target_slides): + slide_id = slide.objectId + + # Build selector - prefer ID-based, fallback to index + if slide_id: + # CSS ID selectors need escaping for special characters + safe_id = slide_id.replace(":", "\\:") + selector = f"section#{safe_id}" + else: + # Fallback: use nth-of-type selector + selector = f"section:nth-of-type({i + 1})" + + try: + await _hide_overlay_elements_for_thumbnail_async(page, selector) + slide_element = await page.query_selector(selector) + if slide_element: + # Take screenshot of the slide section + png_bytes = await slide_element.screenshot(type="png") + + # Get dimensions from image + img = Image.open(_io.BytesIO(png_bytes)) + img_width, img_height = img.size + img.close() + + thumbnail = AbstractThumbnail( + contentUrl=html_file_url, + width=img_width, + height=img_height, + mime_type="image/png", + content=png_bytes, + file_size=len(png_bytes), + ) + thumbnails.append(thumbnail) + logger.debug( + "Generated thumbnail for slide %s (%dx%d)", + slide_id, + img_width, + img_height, + ) + else: + logger.warning( + "Could not find slide element with selector: %s", selector + ) + thumbnails.append(self._create_placeholder_thumbnail()) + except Exception as e: + logger.error("Failed to capture thumbnail for slide %s: %s", slide_id, e) + thumbnails.append(self._create_placeholder_thumbnail()) + finally: + await browser.close() + + logger.info("Generated %d HTML thumbnails", len(thumbnails)) + return thumbnails + + def _create_placeholder_thumbnail(self) -> AbstractThumbnail: + """Create a placeholder thumbnail for failed captures.""" + import io as _io + + from PIL import Image + + # Create a simple gray placeholder image + img = Image.new("RGB", (320, 240), color=(200, 200, 200)) + buffer = _io.BytesIO() + img.save(buffer, format="PNG") + png_bytes = buffer.getvalue() + img.close() + + return AbstractThumbnail( + contentUrl="", + width=320, + height=240, + mime_type="image/png", + content=png_bytes, + file_size=len(png_bytes), + ) diff --git a/gslides_api/adapters/markdown_to_html.py b/gslides_api/adapters/markdown_to_html.py new file mode 100644 index 0000000..ad12264 --- /dev/null +++ b/gslides_api/adapters/markdown_to_html.py @@ -0,0 +1,236 @@ +"""Convert markdown to HTML formatted text using BeautifulSoup. + +This module uses the shared markdown parser from gslides-api to convert markdown text +to HTML elements with proper formatting tags. +""" + +import logging +from typing import Optional + +from bs4 import BeautifulSoup, Tag + +from gslides_api.agnostic.ir import ( + FormattedDocument, + FormattedList, + FormattedParagraph, + FormattedTextRun, +) +from gslides_api.agnostic.markdown_parser import parse_markdown_to_ir +from gslides_api.agnostic.text import FullTextStyle + +logger = logging.getLogger(__name__) + + +def apply_markdown_to_html_element( + markdown_text: str, + html_element: Tag, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Apply markdown formatting to an HTML element. + + Args: + markdown_text: The markdown text to convert + html_element: The BeautifulSoup Tag to write to (e.g., <div>, <p>) + base_style: Optional base text style (from gslides-api TextStyle). + NOTE: Only RichStyle properties (font_family, font_size, color, underline) + are applied from base_style. Markdown-renderable properties (bold, italic) + should come from the markdown content itself (e.g., **bold**, *italic*). + + Note: + This function clears the existing content of the element before writing. + """ + # Parse markdown to IR using shared parser + ir_doc = parse_markdown_to_ir(markdown_text, base_style=base_style) + + # Clear existing content + html_element.clear() + + # Get soup for creating new tags + soup = BeautifulSoup("", "lxml") + + # Convert IR to HTML + _apply_ir_to_html_element(ir_doc, html_element, soup, base_style) + + +def _apply_ir_to_html_element( + ir_doc: FormattedDocument, + html_element: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Convert IR document to HTML content.""" + # Process each paragraph/list in the document + for item in ir_doc.elements: + if isinstance(item, FormattedParagraph): + _add_paragraph_to_html(item, html_element, soup, base_style) + elif isinstance(item, FormattedList): + _add_list_to_html(item, html_element, soup, base_style) + + +def _add_paragraph_to_html( + paragraph: FormattedParagraph, + parent: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Add a paragraph to HTML element.""" + # For HTML, we'll add runs inline without creating separate <p> tags + # unless it's the first paragraph (to avoid extra spacing) + + for run in paragraph.runs: + _add_run_to_html(run, parent, soup, base_style) + + # Add line break after paragraph (except if it's the last one) + # We'll just add the content inline for now and let HTML handle spacing + + +def _add_run_to_html( + run: FormattedTextRun, + parent: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Add a text run with formatting to HTML element.""" + text = run.content + + # Apply formatting by wrapping in HTML tags + # FullTextStyle has markdown (bold, italic, strikethrough) and rich (underline, color, etc.) + bold = run.style.markdown.bold if run.style.markdown else False + italic = run.style.markdown.italic if run.style.markdown else False + strikethrough = run.style.markdown.strikethrough if run.style.markdown else False + underline = run.style.rich.underline if run.style.rich else False + + if bold and italic: + # Bold + Italic + wrapper = soup.new_tag("strong") + inner = soup.new_tag("em") + inner.string = text + wrapper.append(inner) + parent.append(wrapper) + elif bold: + # Bold only + wrapper = soup.new_tag("strong") + wrapper.string = text + parent.append(wrapper) + elif italic: + # Italic only + wrapper = soup.new_tag("em") + wrapper.string = text + parent.append(wrapper) + elif underline: + # Underline + wrapper = soup.new_tag("u") + wrapper.string = text + parent.append(wrapper) + elif strikethrough: + # Strikethrough + wrapper = soup.new_tag("s") + wrapper.string = text + parent.append(wrapper) + else: + # Plain text - always append (don't use .string which replaces) + parent.append(text) + + +def _add_list_to_html( + formatted_list: FormattedList, + parent: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Add a formatted list to HTML element.""" + # Create <ul> or <ol> based on list type + list_tag = soup.new_tag("ol" if formatted_list.ordered else "ul") + + for item in formatted_list.items: + li_tag = soup.new_tag("li") + + # Add each paragraph's runs to the list item + for paragraph in item.paragraphs: + for run in paragraph.runs: + _add_run_to_html(run, li_tag, soup, base_style) + + list_tag.append(li_tag) + + parent.append(list_tag) + + +def _process_li_children(li: Tag, depth: int = 0) -> str: + """Recursively process children of a <li> to preserve inline formatting. + + Unlike get_text(), this preserves bold, italic, etc. as markdown markers. + """ + parts = [] + for child in li.children: + if isinstance(child, str): + parts.append(child.strip()) + elif isinstance(child, Tag): + tag_name = child.name.lower() if child.name else "" + if tag_name in ("strong", "b"): + parts.append(f"**{child.get_text().strip()}**") + elif tag_name in ("em", "i"): + parts.append(f"*{child.get_text().strip()}*") + elif tag_name == "u": + parts.append(f"__{child.get_text().strip()}__") + elif tag_name in ("s", "strike", "del"): + parts.append(f"~~{child.get_text().strip()}~~") + elif tag_name == "br": + # Preserve line breaks within list items + parts.append("\n") + elif tag_name == "span": + # Recurse into spans to pick up nested formatting + parts.append(_process_li_children(child, depth=depth)) + else: + parts.append(child.get_text().strip()) + return " ".join(part for part in parts if part and part != "\n").strip() + + +def convert_html_to_markdown(html_element: Tag) -> str: + """Convert HTML element content to markdown format. + + Args: + html_element: BeautifulSoup Tag to read from + + Returns: + Markdown-formatted string + """ + + def process_element(elem, depth=0): + """Recursively process HTML elements.""" + if isinstance(elem, str): + return elem + + if elem.name == "strong" or elem.name == "b": + # Strip whitespace from inside the tag for proper markdown + return f"**{elem.get_text().strip()}**" + elif elem.name == "em" or elem.name == "i": + return f"*{elem.get_text().strip()}*" + elif elem.name == "u": + # HTML underline doesn't have direct markdown equivalent + return f"__{elem.get_text().strip()}__" + elif elem.name == "s" or elem.name == "strike" or elem.name == "del": + return f"~~{elem.get_text().strip()}~~" + elif elem.name == "ul": + # Bulleted list - recursively process children to preserve formatting + result = [] + for li in elem.find_all("li", recursive=False): + li_content = _process_li_children(li, depth=depth) + result.append(f"{' ' * depth}- {li_content}") + return "\n".join(result) + elif elem.name == "ol": + # Numbered list - recursively process children to preserve formatting + result = [] + for i, li in enumerate(elem.find_all("li", recursive=False), 1): + li_content = _process_li_children(li, depth=depth) + result.append(f"{' ' * depth}{i}. {li_content}") + return "\n".join(result) + else: + # For other elements, recursively process children + result = [] + for child in elem.children: + processed = process_element(child, depth) + if processed: + result.append(processed) + return "".join(result) + + return process_element(html_element) diff --git a/gslides_api/adapters/pptx_adapter.py b/gslides_api/adapters/pptx_adapter.py new file mode 100644 index 0000000..6b94de3 --- /dev/null +++ b/gslides_api/adapters/pptx_adapter.py @@ -0,0 +1,2351 @@ +""" +Concrete implementation of abstract slides using python-pptx. +This module provides the actual implementation that maps abstract slide operations to python-pptx calls. +""" + +import io +import logging +import os +import shutil +import tempfile +from typing import Annotated, Any, List, Optional, Union + +from PIL import Image +from pptx import Presentation +from pptx.dml.color import RGBColor +from pptx.enum.dml import MSO_COLOR_TYPE +from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER +from pptx.enum.text import MSO_ANCHOR, PP_ALIGN +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.oxml import parse_xml +from pptx.shapes.autoshape import Shape +from pptx.shapes.base import BaseShape +from pptx.shapes.graphfrm import GraphicFrame +from pptx.shapes.picture import Picture +from pptx.shapes.placeholder import PlaceholderPicture +from pptx.shapes.shapetree import SlideShapes +from pptx.slide import NotesSlide, Slide, SlideLayout +from pptx.table import Table +from pptx.text.text import TextFrame +from pptx.util import Emu, Inches, Pt +from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag, TypeAdapter, model_validator +from typing_extensions import TypedDict + +from gslides_api.agnostic.domain import ImageData +from gslides_api.agnostic.element import MarkdownTableElement, TableData +from gslides_api.agnostic.ir import FormattedDocument, FormattedParagraph, FormattedTextRun +from gslides_api.agnostic.ir_to_markdown import ir_to_markdown +from gslides_api.agnostic.text import ( + AbstractColor, + FullTextStyle, + MarkdownRenderableStyle, + RichStyle, +) +from gslides_api.agnostic.units import EMU_PER_INCH, OutputUnit, from_emu, to_emu + +from gslides_api.common.download import download_binary_file +from gslides_api.pptx.converters import _paragraph_has_bullet, pptx_table_to_markdown +from gslides_api.pptx.slide_copier import _remove_layout_placeholders +from gslides_api.pptx.chart_renderer import render_slide_to_image + +from gslides_api.adapters.abstract_slides import ( + AbstractAltText, + AbstractCredentials, + AbstractElement, + AbstractElementKind, + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractSize, + AbstractSlide, + AbstractSlideProperties, + AbstractSlidesAPIClient, + AbstractSpeakerNotes, + AbstractTableElement, + AbstractThumbnail, +) + +logger = logging.getLogger(__name__) + +# Type alias for python-pptx Presentation to avoid name collision with our class +PptxPresentation = Presentation + +# Union type for discriminator and model_validator inputs +PptxElementInput = Union[BaseShape, dict, "PowerPointElementParent"] + + +# TypedDict for style information returned by styles() method +class StyleInfo(TypedDict, total=False): + """Style information for a text run.""" + + text: str + bold: Optional[bool] + italic: Optional[bool] + font_name: Optional[str] + font_size: Optional[int] + color_rgb: Optional[tuple] + + +# Map MSO_THEME_COLOR index to XML element names in theme color scheme +_THEME_COLOR_MAP = { + 1: "dk1", # Dark 1 (typically text/background dark) + 2: "lt1", # Light 1 (typically text/background light) + 3: "dk2", # Dark 2 + 4: "lt2", # Light 2 + 5: "accent1", # Accent 1 + 6: "accent2", # Accent 2 + 7: "accent3", # Accent 3 + 8: "accent4", # Accent 4 + 9: "accent5", # Accent 5 + 10: "accent6", # Accent 6 + 11: "hlink", # Hyperlink + 12: "folHlink", # Followed Hyperlink +} + + +def _textframe_to_ir(text_frame: TextFrame) -> FormattedDocument: + """Convert a PowerPoint TextFrame to platform-agnostic IR. + + This enables using the shared ir_to_markdown function which handles + run consolidation and proper space placement outside markdown markers. + + Args: + text_frame: The python-pptx TextFrame to convert + + Returns: + FormattedDocument with paragraphs and text runs + """ + from gslides_api.pptx.converters import _paragraph_has_bullet + + if not text_frame: + return FormattedDocument() + + elements = [] + + for paragraph in text_frame.paragraphs: + runs = [] + has_bullet = _paragraph_has_bullet(paragraph) + level = getattr(paragraph, "level", 0) or 0 + + for run in paragraph.runs: + # Build the style from run properties + md_style = MarkdownRenderableStyle( + bold=run.font.bold or False, + italic=run.font.italic or False, + hyperlink=( + run.hyperlink.address if run.hyperlink and run.hyperlink.address else None + ), + ) + style = FullTextStyle(markdown=md_style) + + runs.append(FormattedTextRun(content=run.text, style=style)) + + # Create paragraph + para = FormattedParagraph(runs=runs) + + # Handle bullet points by prepending indent and bullet marker + if has_bullet and runs: + indent = " " * level + # Prepend bullet marker to first run's content + if para.runs: + para.runs[0].content = f"{indent}- {para.runs[0].content}" + + elements.append(para) + + return FormattedDocument(elements=elements) + + +def _resolve_font_color_rgb(run, slide: Optional[Slide] = None) -> Optional[tuple]: + """Resolve font color to RGB tuple, handling both direct RGB and theme colors. + + Args: + run: A python-pptx text run object + slide: Optional slide for resolving theme colors. If not provided, + theme colors cannot be resolved and will return None. + + Returns: + RGB tuple (r, g, b) with values 0-255, or None if color cannot be resolved. + """ + try: + color = run.font.color + if color.type is None: + return None + + # Direct RGB color - straightforward case + if color.type == MSO_COLOR_TYPE.RGB: + rgb = color.rgb + if rgb: + return (rgb[0], rgb[1], rgb[2]) + + # Theme/scheme color - need to resolve from theme XML + elif color.type == MSO_COLOR_TYPE.SCHEME: + theme_color_idx = color.theme_color + if theme_color_idx is None or slide is None: + return None + + try: + # Navigate: slide -> slide_layout -> slide_master -> theme + slide_master = slide.slide_layout.slide_master + theme_part = slide_master.part.part_related_by(RT.THEME) + theme_xml = parse_xml(theme_part.blob) + + color_name = _THEME_COLOR_MAP.get(theme_color_idx) + if color_name: + # XPath to find the srgbClr value for this theme color + ns = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"} + # Try srgbClr first (explicit RGB), then sysClr (system color) + xpath_srgb = f".//a:clrScheme/a:{color_name}//a:srgbClr/@val" + results = theme_xml.xpath(xpath_srgb, namespaces=ns) + if results: + hex_val = results[0] + return ( + int(hex_val[0:2], 16), + int(hex_val[2:4], 16), + int(hex_val[4:6], 16), + ) + # Try system color (e.g., windowText, window) + xpath_sys = f".//a:clrScheme/a:{color_name}//a:sysClr/@lastClr" + results = theme_xml.xpath(xpath_sys, namespaces=ns) + if results: + hex_val = results[0] + return ( + int(hex_val[0:2], 16), + int(hex_val[2:4], 16), + int(hex_val[4:6], 16), + ) + except Exception: + pass # Fall through to None + + except Exception: + pass + + return None + + +def _extract_base_style_from_textframe( + text_frame: TextFrame, + preserve_bold_italic: bool = False, + slide: Optional[Slide] = None, +) -> Optional[FullTextStyle]: + """Extract FullTextStyle from the first non-empty text run. + + Used to preserve font formatting when replacing template variables. + Extracts font size, color, font family, and underline from the first + text run that contains non-whitespace text. + + Args: + text_frame: The PowerPoint text frame to extract style from + preserve_bold_italic: If True, extract bold/italic from template formatting. + Set to True when content doesn't have markdown formatting (no * characters), + so the original template's bold/italic is preserved. + Set to False (default) when content has markdown formatting, so that + **bold** and *italic* syntax controls the formatting instead. + slide: Optional slide reference for resolving theme colors to RGB. + If provided, theme colors (like accent1, dk1, etc.) will be resolved + to their actual RGB values from the presentation theme. + + Returns: + FullTextStyle if a styled run was found, None otherwise + """ + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + if not run.text.strip(): + continue + + # Extract RichStyle properties + font_size_pt = None + if run.font.size is not None: + font_size_pt = run.font.size.pt + + font_family = run.font.name + + foreground_color = None + try: + # Use helper to resolve both RGB and theme colors + rgb_tuple = _resolve_font_color_rgb(run, slide=slide) + if rgb_tuple: + foreground_color = AbstractColor.from_rgb_tuple(rgb_tuple) + except Exception: + pass + + rich = RichStyle( + font_family=font_family, + font_size_pt=font_size_pt, + foreground_color=foreground_color, + underline=run.font.underline or False, + ) + + # Extract bold/italic based on preserve_bold_italic flag + if preserve_bold_italic: + # Preserve template's bold/italic when content has no markdown formatting + # Try python-pptx API first, fall back to XML attributes + bold = run.font.bold + italic = run.font.italic + if bold is None: + # python-pptx may return None for inherited values, check XML + try: + rPr = run._r.find( + "{http://schemas.openxmlformats.org/drawingml/2006/main}rPr" + ) + if rPr is not None: + bold = rPr.get("b") == "1" + except Exception: + bold = False + if italic is None: + try: + rPr = run._r.find( + "{http://schemas.openxmlformats.org/drawingml/2006/main}rPr" + ) + if rPr is not None: + italic = rPr.get("i") == "1" + except Exception: + italic = False + markdown = MarkdownRenderableStyle( + bold=bold or False, + italic=italic or False, + ) + else: + # Let markdown content determine bold/italic + # This allows mixed formatting like "Text **bold** more text" + markdown = MarkdownRenderableStyle( + bold=False, + italic=False, + ) + + return FullTextStyle(rich=rich, markdown=markdown) + + return None + + +def pptx_element_discriminator(v: PptxElementInput) -> str: + """Discriminator to determine which PowerPointElement subclass based on type field.""" + # First check if it's a direct pptx shape with shape_type + if hasattr(v, "shape_type"): + if hasattr(v, "text_frame"): + return "shape" + if v.shape_type in [MSO_SHAPE_TYPE.AUTO_SHAPE, MSO_SHAPE_TYPE.TEXT_BOX]: + return "shape" + elif v.shape_type == MSO_SHAPE_TYPE.PICTURE: + return "image" + elif v.shape_type == MSO_SHAPE_TYPE.TABLE: + return "table" + else: + return "generic" + + # Then check if it's already wrapped with pptx_element + elif hasattr(v, "pptx_element"): + pptx_element = v.pptx_element + if hasattr(pptx_element, "shape_type"): + shape_type = pptx_element.shape_type + if shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE: + return "shape" + elif shape_type == MSO_SHAPE_TYPE.PICTURE: + return "image" + elif shape_type == MSO_SHAPE_TYPE.TABLE: + return "table" + elif shape_type == MSO_SHAPE_TYPE.PLACEHOLDER: + if hasattr(pptx_element, "text_frame"): + return "shape" + return "generic" + return "generic" + + # Finally check for type field + else: + element_type = getattr(v, "type", None) + if element_type in [AbstractElementKind.SHAPE, "SHAPE", "shape"]: + return "shape" + elif element_type in [AbstractElementKind.IMAGE, "IMAGE", "image"]: + return "image" + elif element_type in [AbstractElementKind.TABLE, "TABLE", "table"]: + return "table" + else: + return "generic" + + +class PowerPointAPIClient(AbstractSlidesAPIClient): + """PowerPoint API client implementation.""" + + def __init__(self): + # No initialization needed for filesystem-based operations + self._auto_flush = True + + @property + def auto_flush(self): + return self._auto_flush + + @auto_flush.setter + def auto_flush(self, value: bool): + # Just store this for consistency with abstract interface + self._auto_flush = value + + def flush_batch_update(self): + # No batching needed for filesystem operations + pass + + def copy_presentation( + self, presentation_id: str, copy_title: str, folder_id: Optional[str] = None + ) -> dict: + """Copy a presentation file to another location.""" + if not os.path.exists(presentation_id): + raise FileNotFoundError(f"Presentation file not found: {presentation_id}") + + # Determine destination folder + if folder_id is None: + # Copy to same folder as source + dest_folder = os.path.dirname(presentation_id) + else: + # Validate folder exists + if not os.path.exists(folder_id) or not os.path.isdir(folder_id): + raise FileNotFoundError(f"Destination folder not found: {folder_id}") + dest_folder = folder_id + + if copy_title.endswith(".pptx"): + copy_title = copy_title[:-5] + + # Create destination path + base_name = os.path.splitext(os.path.basename(presentation_id))[0] + dest_path = os.path.join(dest_folder, f"{copy_title}.pptx") + + # Copy file + shutil.copy2(presentation_id, dest_path) + + return { + "id": dest_path, + "name": copy_title, + "parents": [dest_folder] if folder_id else [os.path.dirname(presentation_id)], + } + + def create_folder( + self, name: str, ignore_existing: bool = True, parent_folder_id: Optional[str] = None + ) -> dict: + """Create a folder in the filesystem.""" + if parent_folder_id is None: + parent_folder_id = os.getcwd() + + if not os.path.exists(parent_folder_id): + raise FileNotFoundError(f"Parent folder not found: {parent_folder_id}") + + folder_path = os.path.join(parent_folder_id, name) + + try: + os.makedirs(folder_path, exist_ok=ignore_existing) + except FileExistsError: + if not ignore_existing: + raise + + return {"id": folder_path, "name": name, "parents": [parent_folder_id]} + + def delete_file(self, file_id: str): + """Delete a file from the filesystem.""" + if os.path.exists(file_id): + if os.path.isdir(file_id): + shutil.rmtree(file_id) + else: + os.remove(file_id) + + def set_credentials(self, credentials: AbstractCredentials): + # Do nothing as this is filesystem-based + pass + + def get_credentials(self) -> Optional[AbstractCredentials]: + # Return None as no credentials needed for filesystem operations + return None + + def replace_text( + self, slide_ids: list[str], match_text: str, replace_text: str, presentation_id: str + ): + """Replace text across specified slides in a presentation.""" + if not os.path.exists(presentation_id): + raise FileNotFoundError(f"Presentation file not found: {presentation_id}") + + prs = Presentation(presentation_id) + + # Convert slide_ids to indices (assuming they are string indices) + slide_indices = [] + for slide_id in slide_ids: + try: + slide_indices.append(int(slide_id)) + except ValueError: + # If slide_id is not a number, skip it + continue + + # Replace text in specified slides + for slide_idx in slide_indices: + if slide_idx < len(prs.slides): + slide = prs.slides[slide_idx] + self._replace_text_in_slide(slide, match_text, replace_text) + + # Save the presentation + prs.save(presentation_id) + + def _replace_text_in_slide(self, slide: Slide, match_text: str, replace_text: str): + """Replace text in all shapes of a slide.""" + for shape in slide.shapes: + if hasattr(shape, "text_frame") and shape.text_frame: + for paragraph in shape.text_frame.paragraphs: + for run in paragraph.runs: + if match_text in run.text: + run.text = run.text.replace(match_text, replace_text) + + @classmethod + def get_default_api_client(cls) -> "PowerPointAPIClient": + """Get the default API client instance.""" + return cls() + + async def get_presentation_as_pdf(self, presentation_id: str) -> bytes: + """Get PDF from presentation.""" + raise NotImplementedError("PDF export not implemented for PowerPoint") + + +class PowerPointSpeakerNotes(AbstractSpeakerNotes): + """PowerPoint speaker notes (belonging to a particular slide) implementation.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + notes_slide: Any = Field(exclude=True, default=None) + + def __init__(self, notes_slide: NotesSlide, **kwargs): + super().__init__(**kwargs) + self.notes_slide = notes_slide + + def read_text(self, as_markdown: bool = True) -> str: + """Read text from speaker notes.""" + if not self.notes_slide or not self.notes_slide.notes_text_frame: + return "" + + text_frame = self.notes_slide.notes_text_frame + if as_markdown: + return self._convert_to_markdown(text_frame) + else: + return text_frame.text + + def write_text(self, api_client: "PowerPointAPIClient", content: str): + """Write text to speaker notes.""" + if not self.notes_slide: + return + + # Get or create text frame + text_frame = self.notes_slide.notes_text_frame + if not text_frame: + return + + # Clear existing content + text_frame.clear() + + # Add content as paragraphs (simple implementation - could be enhanced for markdown) + lines = content.split("\n") + for i, line in enumerate(lines): + if i == 0: + text_frame.text = line + else: + p = text_frame.add_paragraph() + p.text = line + + def _convert_to_markdown(self, text_frame: TextFrame) -> str: + """Convert text frame content to markdown format.""" + markdown_lines = [] + + for paragraph in text_frame.paragraphs: + line_parts = [] + for run in paragraph.runs: + text = run.text + if run.font.bold: + text = f"**{text}**" + if run.font.italic: + text = f"*{text}*" + line_parts.append(text) + + line = "".join(line_parts) + if line.strip(): # Only add non-empty lines + markdown_lines.append(line) + + return "\n".join(markdown_lines) + + +class PowerPointElementParent(AbstractElement): + """Generic concrete element for PowerPoint slide elements.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + pptx_element: Any = Field(exclude=True, default=None) + pptx_slide: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_pptx_element(cls, data: PptxElementInput) -> dict: + """Convert from python-pptx element to our abstract representation.""" + if isinstance(data, dict): + # Already converted + return data + elif isinstance(data, BaseShape): + pptx_element = data + elif hasattr(data, "pptx_element"): + # Already wrapped element + return data.__dict__ + else: + raise ValueError(f"Expected BaseShape, got {type(data)}") + + # Extract basic properties + object_id = str(getattr(pptx_element, "shape_id", "")) + if not object_id and hasattr(pptx_element, "element"): + element_attr = getattr(pptx_element, "element", {}) + if hasattr(element_attr, "attrib"): + object_id = element_attr.attrib.get("id", "") + elif isinstance(element_attr, dict): + object_id = element_attr.get("id", "") + + # Get slide ID if available (from parent slide) + slide_id = "" + try: + if hasattr(pptx_element, "_element") and hasattr(pptx_element._element, "getparent"): + slide_elem = pptx_element._element.getparent() + while slide_elem is not None and hasattr(slide_elem, "tag"): + if "cSld" in slide_elem.tag: + slide_id = slide_elem.attrib.get("name", "") + break + if hasattr(slide_elem, "getparent"): + slide_elem = slide_elem.getparent() + else: + break + except Exception: + # Ignore XML traversal errors + pass + + # Get alt text if available + # While waiting for https://github.com/scanny/python-pptx/pull/512 to get merged, + # hack raw XML to get at the alt text, code inspired by + # https://stackoverflow.com/questions/63802783/check-if-image-is-decorative-in-powerpoint-using-python-pptx + cnvpr_elements = pptx_element._element.xpath(".//p:cNvPr") + alt_text_title = cnvpr_elements[0].attrib.get("title", None) if cnvpr_elements else None + alt_text_descr = cnvpr_elements[0].attrib.get("descr", None) if cnvpr_elements else None + + return { + "objectId": object_id, + "presentation_id": "", # Will be set by parent + "slide_id": slide_id, + "alt_text": AbstractAltText(title=alt_text_title, description=alt_text_descr), + "type": "generic", + "pptx_element": pptx_element, + } + + def absolute_size(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + """Get the absolute size of the element.""" + if not self.pptx_element: + return (0.0, 0.0) + + width_emu = float(self.pptx_element.width) + height_emu = float(self.pptx_element.height) + + return (from_emu(width_emu, units), from_emu(height_emu, units)) + + def absolute_position(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + """Get the absolute position of the element.""" + if not self.pptx_element: + return (0.0, 0.0) + + left_emu = float(self.pptx_element.left) + top_emu = float(self.pptx_element.top) + + return (from_emu(left_emu, units), from_emu(top_emu, units)) + + def create_image_element_like( + self, api_client: PowerPointAPIClient + ) -> "PowerPointImageElement": + """Create an image element with the same properties as this element. + + This is used when replacing non-image elements (like charts) with images. + Creates a placeholder image at the same position/size as this element, + removes this element from the slide, and returns the new image element. + """ + if not self.pptx_slide or not self.pptx_element: + logger.warning("Cannot create image element: missing slide or element reference") + raise ValueError("Cannot create image element without slide reference") + + # Get position and size from current element + left = self.pptx_element.left + top = self.pptx_element.top + width = self.pptx_element.width + height = self.pptx_element.height + + # Create a small transparent placeholder image + # (will be replaced immediately by the actual image via replace_image) + placeholder_img = Image.new("RGBA", (100, 100), (255, 255, 255, 0)) + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png") + try: + placeholder_img.save(temp_file.name, "PNG") + temp_file.close() + + # For chart elements, clean up the chart relationship before removal + # to prevent orphaned chart files from causing corruption + self._cleanup_chart_relationship() + + # Remove current element (chart) from the slide + logger.debug( + f"create_image_element_like: removing element {self.alt_text.title}, type={type(self.pptx_element)}" + ) + if hasattr(self.pptx_element, "_element") and hasattr( + self.pptx_element._element, "getparent" + ): + parent = self.pptx_element._element.getparent() + if parent is not None: + parent.remove(self.pptx_element._element) + logger.debug(f"create_image_element_like: removed element from parent") + else: + logger.warning(f"create_image_element_like: element has no parent") + else: + logger.warning(f"create_image_element_like: element has no _element or getparent") + + # Add new picture at the same position/size + new_picture = self.pptx_slide.shapes.add_picture( + image_file=temp_file.name, left=left, top=top, width=width, height=height + ) + + # Create and return a PowerPointImageElement wrapper + image_element = PowerPointImageElement( + objectId=str(new_picture.shape_id), + alt_text=self.alt_text, + pptx_element=new_picture, + pptx_slide=self.pptx_slide, + ) + return image_element + + finally: + # Clean up temp file + if os.path.exists(temp_file.name): + os.unlink(temp_file.name) + + def set_alt_text( + self, + api_client: PowerPointAPIClient, + title: str | None = None, + description: str | None = None, + ): + """Set alt text for the element.""" + if self.pptx_element and (title is not None or description is not None): + # Set the XML title/descr attributes (the actual alt text used for element matching) + # This must match how alt text is read in convert_from_pptx_element + try: + cnvpr = self.pptx_element._element.xpath(".//p:cNvPr") + if cnvpr: + if title is not None: + cnvpr[0].attrib["title"] = title + if description is not None: + cnvpr[0].attrib["descr"] = description + except Exception as e: + logger.debug(f"Could not set alt text in XML: {e}") + + # Also set the name property for compatibility + if title is not None and hasattr(self.pptx_element, "name"): + self.pptx_element.name = title + + # Update our alt_text model + if title is not None: + self.alt_text.title = title + if description is not None: + self.alt_text.description = description + + def _cleanup_chart_relationship(self): + """Remove orphaned chart relationship when replacing chart with image. + + When a chart graphicFrame is removed from a slide to be replaced with an image, + the chart relationship and associated files (chartN.xml, styles, colors, Excel) + would otherwise remain in the package as orphaned references, potentially + causing file corruption. + """ + try: + if not hasattr(self.pptx_element, "has_chart") or not self.pptx_element.has_chart: + return + + if not hasattr(self.pptx_element, "chart"): + return + + chart = self.pptx_element.chart + if not hasattr(chart, "part"): + return + + chart_part = chart.part + slide_part = self.pptx_slide.part + + # Find and remove the relationship pointing to this chart + for rId, rel in list(slide_part.rels.items()): + if rel._target is chart_part: + slide_part.drop_rel(rId) + logger.debug(f"Removed orphaned chart relationship {rId}") + break + + except Exception as e: + logger.warning(f"Could not clean up chart relationship: {e}") + + +class PowerPointShapeElement(AbstractShapeElement, PowerPointElementParent): + """PowerPoint shape element implementation, in particular a text box.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + pptx_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_pptx_element(cls, data: PptxElementInput) -> dict: + """Convert from python-pptx shape element.""" + base_data = PowerPointElementParent.convert_from_pptx_element(data) + base_data["type"] = AbstractElementKind.SHAPE + return base_data + + @property + def has_text_frame(self) -> bool: + """Check if the shape can contain text (has a text_frame).""" + if not self.pptx_element: + return False + return hasattr(self.pptx_element, "text_frame") and self.pptx_element.text_frame is not None + + @property + def has_text(self) -> bool: + """Check if the shape has text content.""" + return self.has_text_frame and bool(self.pptx_element.text_frame.text) + + def write_text( + self, + api_client: PowerPointAPIClient, + content: str, + autoscale: bool = False, + ): + """Write text to the shape, parsing markdown and applying formatting.""" + if not self.has_text_frame: + return + + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + text_frame = self.pptx_element.text_frame + + # Check if content has markdown formatting indicators (* for bold/italic) + # If not, preserve the template's bold/italic formatting + has_markdown_formatting = "*" in content + + # Extract base style from existing text to preserve formatting + # (font size, color, bold, italic, etc.) when replacing template variables + base_style = _extract_base_style_from_textframe( + text_frame, preserve_bold_italic=not has_markdown_formatting + ) + + # Use shared markdown parser to convert markdown to formatted PowerPoint text + apply_markdown_to_textframe( + markdown_text=content, + text_frame=text_frame, + base_style=base_style, + autoscale=autoscale, + ) + + def read_text(self, as_markdown: bool = True) -> str: + """Read text from the shape.""" + if not self.has_text: + return "" + + text_frame = self.pptx_element.text_frame + if as_markdown: + return self._convert_text_frame_to_markdown(text_frame) + else: + return text_frame.text + + def _convert_text_frame_to_markdown(self, text_frame: TextFrame) -> str: + """Convert PowerPoint text frame to markdown format. + + Uses the shared ir_to_markdown function which handles: + 1. Consolidation of adjacent runs with identical formatting + 2. Proper placement of spaces outside markdown markers + + This fixes the bug where adjacent bold runs produce `****` instead of + a single consolidated bold section. + """ + if not text_frame: + return "" + + # Convert to platform-agnostic IR + ir_doc = _textframe_to_ir(text_frame) + + # Convert IR to markdown using the shared function + return ir_to_markdown(ir_doc) + + def styles(self, skip_whitespace: bool = True) -> Optional[List[StyleInfo]]: + """Extract style information from text runs. + + Returns a list of style dictionaries containing formatting information + (bold, italic, color, font_name, font_size) separate from text content. + This follows the gslides-api pattern of separating style from content. + + Args: + skip_whitespace: If True, skip runs that are only whitespace + + Returns: + List of style dictionaries, or None if element has no text + """ + if not self.has_text: + return None + + text_frame = self.pptx_element.text_frame + styles = [] + + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + # Skip whitespace-only runs if requested + if skip_whitespace and not run.text.strip(): + continue + + # Extract style information from the run + style_info = { + "text": run.text, + "bold": run.font.bold, + "italic": run.font.italic, + "font_name": run.font.name, + "font_size": run.font.size, + } + + # Extract color if available (handles both direct RGB and theme colors) + style_info["color_rgb"] = _resolve_font_color_rgb(run, slide=self.pptx_slide) + + styles.append(style_info) + + return styles if styles else None + + +class PowerPointImageElement(AbstractImageElement, PowerPointElementParent): + """PowerPoint image element implementation.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + pptx_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_pptx_element(cls, data: PptxElementInput) -> dict: + """Convert from python-pptx image element.""" + base_data = PowerPointElementParent.convert_from_pptx_element(data) + base_data["type"] = AbstractElementKind.IMAGE + return base_data + + def replace_image( + self, + api_client: PowerPointAPIClient, + file: str | None = None, + url: str | None = None, + ): + """Replace the image in this element.""" + logger.debug(f"replace_image called: pptx_element={type(self.pptx_element)}, file={file}") + if not self.pptx_element: + logger.warning("replace_image: pptx_element is None") + return + if not isinstance(self.pptx_element, Picture): + logger.warning( + f"replace_image: pptx_element is not Picture, it's {type(self.pptx_element)}" + ) + return + + if file and os.path.exists(file): + # Get current position and size + left = self.pptx_element.left + top = self.pptx_element.top + width = self.pptx_element.width + height = self.pptx_element.height + logger.debug(f"replace_image: position=({left}, {top}), size=({width}, {height})") + + # Get the slide containing this shape + slide = self._get_parent_slide() + if not slide: + logger.warning( + f"Cannot replace image: no parent slide reference for element {self.alt_text.title}" + ) + return + + logger.debug(f"replace_image: removing old picture from slide {slide}") + # Remove current picture + self._remove_from_slide() + + # Add new picture with same position and size + logger.debug(f"replace_image: adding new picture from {file}") + new_picture = slide.shapes.add_picture(file, left, top, width, height) + self.pptx_element = new_picture + logger.info(f"replace_image: successfully replaced image with {file}") + + elif url: + # For URL images, download first using utility with retries + content, _ = download_binary_file(url) + # Save to temp file and use file path method + with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file: + temp_file.write(content) + temp_file_path = temp_file.name + + try: + self.replace_image(api_client, file=temp_file_path) + finally: + os.unlink(temp_file_path) + + def get_image_data(self): + """Get the image data from the PowerPoint element.""" + if not self.pptx_element or not isinstance(self.pptx_element, Picture): + return None + + try: + # Extract the image data from the picture element + image_part = self.pptx_element.image + return ImageData( + content=image_part.blob, + mime_type=image_part.content_type, + ) + except AttributeError: + return None + + def _get_parent_slide(self): + """Get the slide that contains this shape.""" + return self.pptx_slide + + def _remove_from_slide(self): + """Remove this shape from its parent slide.""" + if hasattr(self.pptx_element, "_element") and hasattr( + self.pptx_element._element, "getparent" + ): + parent = self.pptx_element._element.getparent() + if parent is not None: + parent.remove(self.pptx_element._element) + + +class PowerPointTableElement(AbstractTableElement, PowerPointElementParent): + """PowerPoint table element implementation.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + pptx_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_pptx_element(cls, data: PptxElementInput) -> dict: + """Convert from python-pptx table element.""" + base_data = PowerPointElementParent.convert_from_pptx_element(data) + base_data["type"] = AbstractElementKind.TABLE + return base_data + + def resize( + self, + api_client: PowerPointAPIClient, + rows: int, + cols: int, + fix_width: bool = True, + fix_height: bool = True, + target_height_in: float | None = None, + ) -> float: + """Resize the table to the specified dimensions. + + Args: + target_height_in: If provided, constrain total table height to this value in inches. + + Returns: + Font scale factor (1.0 since PPTX doesn't support font scaling during resize). + """ + if not self.pptx_element or not isinstance(self.pptx_element, GraphicFrame): + return 1.0 + + try: + table = self.pptx_element.table + current_rows = len(table.rows) + current_cols = len(table.columns) + + # Adjust rows + if rows > current_rows: + for _ in range(rows - current_rows): + self._add_table_row(table) + elif rows < current_rows: + # Remove excess rows (not directly supported, need to work with XML) + for i in range(current_rows - 1, rows - 1, -1): + self._remove_table_row(table, i) + + # Adjust columns + if cols > current_cols: + for _ in range(cols - current_cols): + self._add_table_column(table) + + # Copy header row (row 0) styling from rightmost existing cell to new columns + if len(table.rows) > 0 and current_cols > 0: + # Get the rightmost existing header cell (before columns were added) + rightmost_header_cell = table.cell(0, current_cols - 1) + + # Copy fill/background to new header cells + for new_col_idx in range(current_cols, cols): + new_header_cell = table.cell(0, new_col_idx) + + # Copy cell fill (background color) + try: + src_fill = rightmost_header_cell.fill + dst_fill = new_header_cell.fill + if src_fill.type is not None: + # Copy solid fill + if ( + hasattr(src_fill, "fore_color") + and src_fill.fore_color + and src_fill.fore_color.type is not None + ): + dst_fill.solid() + if src_fill.fore_color.type == MSO_COLOR_TYPE.RGB: + dst_fill.fore_color.rgb = src_fill.fore_color.rgb + elif src_fill.fore_color.type == MSO_COLOR_TYPE.SCHEME: + dst_fill.fore_color.theme_color = ( + src_fill.fore_color.theme_color + ) + except Exception as e: + logger.debug(f"Could not copy header cell fill: {e}") + + elif cols < current_cols: + # Remove excess columns (not directly supported, need to work with XML) + for i in range(current_cols - 1, cols - 1, -1): + self._remove_table_column(table, i) + + # Optionally adjust dimensions + if fix_width: + # Distribute width evenly among columns + total_width = self.pptx_element.width + col_width = total_width // cols + for col in table.columns: + col.width = col_width + + if fix_height or target_height_in is not None: + # Distribute height evenly among rows + # Use target_height_in if provided, otherwise use current element height + if target_height_in is not None: + # Convert inches to EMU (914400 EMU = 1 inch) + total_height = Inches(target_height_in) + else: + total_height = self.pptx_element.height + row_height = total_height // rows + for row in table.rows: + row.height = row_height + + except Exception as e: + logger.warning(f"Error resizing table: {e}") + + return 1.0 + + def update_content( + self, + api_client: PowerPointAPIClient, + markdown_content: MarkdownTableElement, + check_shape: bool = True, + font_scale_factor: float = 1.0, + ): + """Update the table content with markdown data. + + Args: + font_scale_factor: Font scale factor (currently unused for PPTX, but kept for interface conformance). + """ + if not self.pptx_element or not isinstance(self.pptx_element, GraphicFrame): + return + + try: + table = self.pptx_element.table + + # Get table data from markdown content (MarkdownTableElement has TableData in content field) + if hasattr(markdown_content, "content") and hasattr( + markdown_content.content, "headers" + ): + headers = markdown_content.content.headers + data_rows = markdown_content.content.rows + # Combine headers and data rows for the full table + all_rows_data = [headers] + data_rows + else: + # Fallback for old interface + all_rows_data = markdown_content.rows if hasattr(markdown_content, "rows") else [] + + if not all_rows_data: + return + + # Get slide reference for theme color resolution + slide = None + try: + slide = self.pptx_element.part.slide + except (AttributeError, TypeError): + pass # Fall back to None - theme colors won't be resolved + + # Extract canonical styles BEFORE resizing to ensure consistency + # Header style from row 0, body style from row 1 (or row 0 if only 1 row) + header_style = None + body_style = None + + if len(table.rows) > 0 and len(table.columns) > 0: + # Extract header style from first cell of first row + header_cell = table.cell(0, 0) + header_style = _extract_base_style_from_textframe( + header_cell.text_frame, + preserve_bold_italic=True, + slide=slide, + ) + + # Extract body style from first cell of second row (if exists) + if len(table.rows) > 1: + body_cell = table.cell(1, 0) + body_style = _extract_base_style_from_textframe( + body_cell.text_frame, + preserve_bold_italic=True, + slide=slide, + ) + else: + # Only one row, use header style for body too + body_style = header_style + + # Ensure table has enough rows and columns + required_rows = len(all_rows_data) + required_cols = len(all_rows_data[0]) if all_rows_data else 0 + + if check_shape: + current_rows = len(table.rows) + current_cols = len(table.columns) + + if required_rows != current_rows or required_cols != current_cols: + self.resize(api_client, required_rows, required_cols) + + # Import markdown conversion utility (same as used for text boxes) + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + # Fill table with data using canonical styles for consistency + for row_idx, row_data in enumerate(all_rows_data): + if row_idx < len(table.rows): + # Use header style for row 0, body style for all other rows + canonical_style = header_style if row_idx == 0 else body_style + + for col_idx, cell_data in enumerate(row_data): + if col_idx < len(table.columns): + cell = table.cell(row_idx, col_idx) + cell_text = str(cell_data) if cell_data is not None else "" + + # Check if content has markdown formatting indicators + has_markdown_formatting = "*" in cell_text + + # Use canonical style, but allow markdown to override bold/italic + if canonical_style and has_markdown_formatting: + # Create a copy without bold/italic so markdown can control them + from copy import deepcopy + + base_style = deepcopy(canonical_style) + base_style.bold = False + base_style.italic = False + else: + base_style = canonical_style + + # Apply markdown text with formatting to cell + apply_markdown_to_textframe( + markdown_text=cell_text, + text_frame=cell.text_frame, + base_style=base_style, + ) + + except Exception as e: + logger.warning(f"Error updating table content: {e}") + + def _add_table_row(self, table: Table): + """Add a row to the table by cloning the last row (working with XML).""" + try: + from copy import deepcopy + + num_rows = len(table.rows) + if num_rows == 0: + logger.warning("Cannot add row to empty table") + return + + # Clone the last row's XML element (use explicit index, not -1) + last_row = table.rows[num_rows - 1]._tr + new_row = deepcopy(last_row) + + # Clear text content from all cells in the new row + for tc in new_row.iter( + "{http://schemas.openxmlformats.org/drawingml/2006/main}tc" + ): + for txBody in tc.iter( + "{http://schemas.openxmlformats.org/drawingml/2006/main}txBody" + ): + for p in txBody.iter( + "{http://schemas.openxmlformats.org/drawingml/2006/main}p" + ): + for t in list( + p.iter( + "{http://schemas.openxmlformats.org/drawingml/2006/main}t" + ) + ): + t.text = "" + + # Append the new row to the table + last_row.getparent().append(new_row) + except Exception as e: + logger.warning(f"Error adding table row: {e}") + + def _add_table_column(self, table: Table): + """Add a column to the table (working with XML).""" + try: + from copy import deepcopy + + num_cols = len(table.columns) + if num_cols == 0: + logger.warning("Cannot add column to empty table") + return + + # Add a new column definition to the grid + # Clone the last column's gridCol (use explicit index, not -1) + last_col = table.columns[num_cols - 1]._gridCol + new_col = deepcopy(last_col) + last_col.getparent().append(new_col) + + # Add a new cell to each row by cloning the last cell + for row in table.rows: + num_cells = len(row.cells) + if num_cells > 0: + last_cell = row.cells[num_cells - 1]._tc + new_cell = deepcopy(last_cell) + + # Clear text content from the new cell + for txBody in new_cell.iter( + "{http://schemas.openxmlformats.org/drawingml/2006/main}txBody" + ): + for p in txBody.iter( + "{http://schemas.openxmlformats.org/drawingml/2006/main}p" + ): + for t in list( + p.iter( + "{http://schemas.openxmlformats.org/drawingml/2006/main}t" + ) + ): + t.text = "" + + # Append the new cell to the row + last_cell.getparent().append(new_cell) + except Exception as e: + logger.warning(f"Error adding table column: {e}") + + def _remove_table_row(self, table: Table, row_index: int): + """Remove a row from the table (working with XML).""" + try: + if row_index < len(table.rows): + # python-pptx _Row uses ._tr for the underlying CT_TableRow XML element + row_element = table.rows[row_index]._tr + row_element.getparent().remove(row_element) + except Exception as e: + logger.warning(f"Error removing table row {row_index}: {e}") + + def _remove_table_column(self, table: Table, col_index: int): + """Remove a column from the table (working with XML).""" + try: + if col_index < len(table.columns): + # Remove cells from all rows for this column + # python-pptx _Cell uses ._tc for the underlying CT_TableCell XML element + for row in table.rows: + if col_index < len(row.cells): + cell_element = row.cells[col_index]._tc + cell_element.getparent().remove(cell_element) + + # Remove column definition + # python-pptx _Column uses ._gridCol for the underlying CT_TableCol XML element + col_element = table.columns[col_index]._gridCol + col_element.getparent().remove(col_element) + except Exception as e: + logger.warning(f"Error removing table column {col_index}: {e}") + + def get_horizontal_border_weight(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get weight of horizontal borders in specified units. + + For PowerPoint tables, borders are handled via table styles and don't have + a fixed weight that contributes to layout height the same way as Google Slides, + so we return 0. + """ + return 0.0 + + def get_row_count(self) -> int: + """Get current number of rows.""" + if not self.pptx_element or not isinstance(self.pptx_element, GraphicFrame): + return 0 + try: + return len(self.pptx_element.table.rows) + except (AttributeError, TypeError): + return 0 + + def get_column_count(self) -> int: + """Get current number of columns.""" + if not self.pptx_element or not isinstance(self.pptx_element, GraphicFrame): + return 0 + try: + return len(self.pptx_element.table.columns) + except (AttributeError, TypeError): + return 0 + + def to_markdown_element(self, name: str | None = None) -> MarkdownTableElement: + """Convert PowerPoint table to markdown table element.""" + + if not self.pptx_element or not isinstance(self.pptx_element, GraphicFrame): + raise ValueError("PowerPointTableElement has no valid GraphicFrame element") + + if not hasattr(self.pptx_element, "table"): + raise ValueError("GraphicFrame does not contain a table") + + # Convert table to markdown using the converters module + markdown_table_str = pptx_table_to_markdown(self.pptx_element.table) + + # Create MarkdownTableElement with the markdown string + markdown_elem = MarkdownTableElement( + name=name or self.alt_text.title or "Table", + content=markdown_table_str, + ) + + # Add metadata + metadata = { + "objectId": self.objectId, + "rows": len(self.pptx_element.table.rows), + "columns": len(self.pptx_element.table.columns) if self.pptx_element.table.rows else 0, + } + + # Add size metadata if available + if self.pptx_element.width and self.pptx_element.height: + metadata["size"] = { + "width": self.pptx_element.width, + "height": self.pptx_element.height, + } + + # Add position metadata if available + if hasattr(self.pptx_element, "left") and hasattr(self.pptx_element, "top"): + metadata["position"] = {"left": self.pptx_element.left, "top": self.pptx_element.top} + + # Add alt text title if available + if self.alt_text and self.alt_text.title: + metadata["title"] = self.alt_text.title + + # Update metadata + markdown_elem.metadata.update(metadata) + + return markdown_elem + + +# Discriminated union type for concrete elements +PowerPointElement = Annotated[ + Union[ + Annotated[PowerPointShapeElement, Tag("shape")], + Annotated[PowerPointImageElement, Tag("image")], + Annotated[PowerPointTableElement, Tag("table")], + Annotated[PowerPointElementParent, Tag("generic")], + ], + Discriminator(pptx_element_discriminator), +] + +# TypeAdapter for validating the discriminated union +_pptx_element_adapter = TypeAdapter(PowerPointElement) + + +def validate_pptx_element(pptx_element: BaseShape) -> PowerPointElement: + """Create the appropriate concrete element from a python-pptx element.""" + element_type = pptx_element_discriminator(pptx_element) + + if element_type == "shape": + data = PowerPointShapeElement.convert_from_pptx_element(pptx_element) + return PowerPointShapeElement(**data) + elif element_type == "image": + data = PowerPointImageElement.convert_from_pptx_element(pptx_element) + return PowerPointImageElement(**data) + elif element_type == "table": + data = PowerPointTableElement.convert_from_pptx_element(pptx_element) + return PowerPointTableElement(**data) + else: + data = PowerPointElementParent.convert_from_pptx_element(pptx_element) + return PowerPointElementParent(**data) + + +class PowerPointSlide(AbstractSlide): + """PowerPoint slide implementation.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + pptx_slide: Any = Field(exclude=True, default=None) + pptx_presentation: Any = Field(exclude=True, default=None) + # Store the presentation ID for reference, will be propagated to elements + presentation_id: Optional[str] = Field(default=None, exclude=True) + + def _propagate_presentation_id(self, presentation_id: Optional[str] = None) -> None: + """Set presentation_id on all elements.""" + target_id = presentation_id if presentation_id is not None else self.presentation_id + if target_id is not None: + for element in self.elements: + element.presentation_id = target_id + # Also propagate via page_elements_flat in case of nested elements + for element in self.page_elements_flat: + element.presentation_id = target_id + + def _propagate_pptx_slide(self) -> None: + """Set pptx_slide on all elements so they can perform slide operations.""" + if self.pptx_slide is not None: + for element in self.elements: + element.pptx_slide = self.pptx_slide + + def __setattr__(self, name: str, value) -> None: + """Override setattr to propagate presentation_id when it's set directly.""" + super().__setattr__(name, value) + # If presentation_id was just set, propagate it to elements + if name == "presentation_id" and hasattr(self, "elements"): + self._propagate_presentation_id(value) + + def __init__(self, pptx_slide: Slide, **kwargs): + # Convert python-pptx elements to abstract elements + slide_name = pptx_slide.notes_slide.notes_text_frame.text + logger.info(f"Processing slide {pptx_slide.slide_id} with title {slide_name}") + + elements = [] + + for shape in pptx_slide.shapes: + try: + pptx_element = validate_pptx_element(shape) + elements.append(pptx_element) + except Exception as e: + print(f"Warning: Could not convert shape {shape}: {e}") + continue + + # Get speaker notes + speaker_notes = None + if pptx_slide.has_notes_slide: + speaker_notes = PowerPointSpeakerNotes(pptx_slide.notes_slide) + + # Get slide properties + slide_properties = AbstractSlideProperties( + isSkipped=False + ) # PowerPoint doesn't have skip property + + super().__init__( + elements=elements, + objectId=str(pptx_slide.slide_id), + slideProperties=slide_properties, + speaker_notes=speaker_notes, + ) + + self.pptx_slide = pptx_slide + self._propagate_pptx_slide() + + @property + def page_elements_flat(self) -> list[PowerPointElementParent]: + """Flatten the elements tree into a list.""" + return self.elements + + def _get_slide_index(self) -> int: + """Get zero-based index of this slide within the presentation. + + Returns: + Zero-based slide index + + Raises: + ValueError: If slide not found in presentation + """ + if not self.pptx_presentation: + raise ValueError("No presentation reference available") + + slide_id = self.pptx_slide.slide_id + for i, slide in enumerate(self.pptx_presentation.slides): + if slide.slide_id == slide_id: + return i + + raise ValueError(f"Slide {slide_id} not found in presentation") + + def thumbnail( + self, api_client: PowerPointAPIClient, size: str, include_data: bool = False + ) -> AbstractThumbnail: + """Generate a thumbnail of the slide using LibreOffice rendering.""" + + # Get slide index within presentation + try: + slide_index = self._get_slide_index() + except ValueError as e: + logger.warning(f"Cannot determine slide index: {e}") + # Return placeholder on failure + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", width=320, height=240, mime_type="image/png" + ) + + # Check if presentation file exists + if not self.presentation_id or not os.path.exists(self.presentation_id): + logger.warning(f"Presentation file not found: {self.presentation_id}") + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", width=320, height=240, mime_type="image/png" + ) + + # Render slide to PNG bytes + logger.debug(f"Rendering thumbnail for slide {slide_index} from {self.presentation_id}") + png_bytes = render_slide_to_image( + presentation_path=self.presentation_id, + slide_index=slide_index, + crop_bounds=None, # Full slide, not cropped + ) + + if png_bytes is None: + logger.warning( + f"Failed to render thumbnail for slide {self.objectId} " + f"(LibreOffice/pdftoppm not available or rendering failed)" + ) + # Return placeholder on rendering failure + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", width=320, height=240, mime_type="image/png" + ) + + # Get image dimensions + try: + img = Image.open(io.BytesIO(png_bytes)) + width, height = img.size + except Exception as e: + logger.warning(f"Failed to read image dimensions: {e}") + width, height = 320, 240 + + # Save to temp file and return file:// URL + # (Consistent with PowerPointPresentation.url pattern at line 1480) + temp_path = None + try: + with tempfile.NamedTemporaryFile( + delete=False, suffix=".png", prefix="slide_thumb_" + ) as tmp: + tmp.write(png_bytes) + temp_path = tmp.name + + return AbstractThumbnail( + contentUrl=f"file://{os.path.abspath(temp_path)}", + width=width, + height=height, + mime_type="image/png", + content=png_bytes if include_data else None, + ) + except Exception as e: + # Clean up temp file if it was created + if temp_path: + try: + os.unlink(temp_path) + logger.debug(f"Cleaned up temp file after error: {temp_path}") + except Exception: + pass + logger.error(f"Failed to save thumbnail to temp file: {e}") + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", + width=width, + height=height, + mime_type="image/png", + ) + + def _copy_shape_to_slide(self, source_shape: BaseShape, target_slide: Slide): + """Copy a shape from one slide to another.""" + if source_shape.shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE: + # Copy text box or auto shape + if hasattr(source_shape, "text_frame") and source_shape.text_frame: + # Add text box + textbox = target_slide.shapes.add_textbox( + source_shape.left, source_shape.top, source_shape.width, source_shape.height + ) + textbox.text_frame.text = source_shape.text_frame.text + + elif source_shape.shape_type == MSO_SHAPE_TYPE.PICTURE: + # Copy image - this is complex as we need the image data + # For now, skip image copying + print("Image copying not fully implemented") + + elif source_shape.shape_type == MSO_SHAPE_TYPE.TABLE: + # Copy table + if hasattr(source_shape, "table"): + table = source_shape.table + rows = len(table.rows) + cols = len(table.columns) + + new_table_shape = target_slide.shapes.add_table( + rows, + cols, + source_shape.left, + source_shape.top, + source_shape.width, + source_shape.height, + ) + + new_table = new_table_shape.table + + # Copy cell contents + for row_idx in range(rows): + for col_idx in range(cols): + if row_idx < len(table.rows) and col_idx < len(table.columns): + source_cell = table.cell(row_idx, col_idx) + target_cell = new_table.cell(row_idx, col_idx) + target_cell.text = source_cell.text + + +class PowerPointPresentation(AbstractPresentation): + """PowerPoint presentation implementation.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + pptx_presentation: Any = Field(exclude=True, default=None) + file_path: Optional[str] = None + uploaded_url: Optional[str] = None + + def __init__( + self, + pptx_presentation: Presentation, + file_path: Optional[str] = None, + uploaded_url: Optional[str] = None, + ): + # Convert python-pptx slides to abstract slides + slides = [] + for pptx_slide in pptx_presentation.slides: + try: + slide = PowerPointSlide(pptx_slide) + slides.append(slide) + except Exception as e: + print(f"Warning: Could not convert slide: {e}") + continue + + # Extract presentation metadata + presentation_id = file_path or "untitled.pptx" + title = getattr(pptx_presentation.core_properties, "title", None) or os.path.basename( + presentation_id + ).replace(".pptx", "") + + super().__init__( + slides=slides, + presentationId=presentation_id, + revisionId=None, # PowerPoint doesn't have revision IDs like Google Slides + title=title, + ) + + self.pptx_presentation = pptx_presentation + self.file_path = file_path + self.uploaded_url = uploaded_url + + # Propagate presentation_id and pptx_presentation to all slides and their elements + for slide in self.slides: + slide.presentation_id = presentation_id + slide.pptx_presentation = pptx_presentation + + @property + def url(self) -> str: + """Return the file path as URL (file system based).""" + if self.file_path: + return f"file://{os.path.abspath(self.file_path)}" + else: + raise ValueError("No file path specified for presentation") + + def slide_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Return slide height in specified units.""" + # python-pptx stores slide dimensions in EMU + height_emu = float(self.pptx_presentation.slide_height) + return from_emu(height_emu, units) + + @classmethod + def from_id( + cls, + api_client: PowerPointAPIClient, + presentation_id: str, + uploaded_url: Optional[str] = None, + ) -> "PowerPointPresentation": + """Load presentation from file path/ID.""" + # In the context of pptx, ID is simply a filename + if not os.path.exists(presentation_id): + raise FileNotFoundError(f"Presentation file not found: {presentation_id}") + + try: + pptx_presentation = Presentation(presentation_id) + return cls(pptx_presentation, presentation_id, uploaded_url=uploaded_url) + except Exception as e: + raise ValueError(f"Could not load presentation from {presentation_id}: {e}") + + def copy_via_drive( + self, + api_client: PowerPointAPIClient, + copy_title: str, + folder_id: Optional[str] = None, + ) -> "PowerPointPresentation": + """Copy presentation to another location.""" + + if not self.file_path: + raise ValueError("Cannot copy presentation without a file path") + + # Use the API client to copy the file + copy_result = api_client.copy_presentation(self.file_path, copy_title, folder_id) + + # Load the copied presentation + copied_presentation = PowerPointPresentation.from_id(api_client, copy_result["id"]) + + return copied_presentation + + def sync_from_cloud(self, api_client: PowerPointAPIClient): + """Re-read presentation from filesystem.""" + if not self.file_path or not os.path.exists(self.file_path): + return + + # Reload the presentation from file + pptx_presentation = Presentation(self.file_path) + + # Update our internal representation + self.pptx_presentation = pptx_presentation + + # Rebuild slides + slides = [] + for pptx_slide in pptx_presentation.slides: + try: + slide = PowerPointSlide(pptx_slide) + slides.append(slide) + except Exception as e: + print(f"Warning: Could not convert slide during sync: {e}") + continue + + self.slides = slides + + # Propagate presentation_id to all slides and their elements + for slide in self.slides: + slide.presentation_id = self.presentationId + + # Update metadata + self.title = getattr(pptx_presentation.core_properties, "title", None) or os.path.basename( + self.file_path + ).replace(".pptx", "") + + def _update_app_properties_metadata(self) -> None: + """ + Update docProps/app.xml to reflect current slide count. + + Python-pptx doesn't update this metadata after slide changes, + which causes PowerPoint Online to reject the file as corrupted. + This is called automatically before save(). + """ + from lxml import etree + + pkg = self.pptx_presentation.part.package + + # Find the extended-properties (app.xml) part + app_part = None + for rel in pkg._rels.values(): + if "extended-properties" in rel.reltype: + app_part = rel._target + break + + if app_part is None: + return + + # Parse the XML + root = etree.fromstring(app_part.blob) + + ns = { + "ep": "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties", + "vt": "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", + } + + actual_slide_count = len(self.pptx_presentation.slides) + + # Fix <Slides> count + slides_elem = root.find(".//ep:Slides", ns) + if slides_elem is not None: + slides_elem.text = str(actual_slide_count) + + # Fix HeadingPairs - find the slide titles count + heading_pairs = root.find(".//ep:HeadingPairs/vt:vector", ns) + if heading_pairs is not None: + variants = heading_pairs.findall("vt:variant", ns) + for i, var in enumerate(variants): + lpstr = var.find("vt:lpstr", ns) + if lpstr is not None and lpstr.text == "Slide Titles": + if i + 1 < len(variants): + count_var = variants[i + 1] + i4 = count_var.find("vt:i4", ns) + if i4 is not None: + i4.text = str(actual_slide_count) + + # Fix TitlesOfParts - update vector size + titles_vector = root.find(".//ep:TitlesOfParts/vt:vector", ns) + if titles_vector is not None: + new_size = 1 + actual_slide_count # 1 theme + N slides + titles_vector.set("size", str(new_size)) + + # Remove excess lpstr elements (keep first one which is theme) + lpstrs = titles_vector.findall("vt:lpstr", ns) + for lpstr in lpstrs[new_size:]: + titles_vector.remove(lpstr) + + # Set the modified XML back + new_blob = etree.tostring(root, xml_declaration=True, encoding="UTF-8", standalone="yes") + app_part._blob = new_blob + + def _renumber_slides_in_file(self, file_path: str) -> None: + """ + Post-process a saved PPTX file to renumber slides sequentially. + + PowerPoint Online requires slide files to be numbered sequentially starting from 1. + Python-pptx may create non-sequential slide numbers (e.g., slide16.xml through slide24.xml) + when slides are copied and originals deleted. This method fixes that. + """ + import re + import zipfile + + # Read the existing file + with zipfile.ZipFile(file_path, "r") as zf: + file_contents = {} + for name in zf.namelist(): + file_contents[name] = zf.read(name) + + # Find all slide files and create renumbering map + slide_pattern = re.compile(r"^ppt/slides/slide(\d+)\.xml$") + slide_files = [] + for name in file_contents.keys(): + match = slide_pattern.match(name) + if match: + slide_files.append((int(match.group(1)), name)) + + # Sort by current number and create mapping to sequential numbers + slide_files.sort(key=lambda x: x[0]) + renumber_map = {} # old_num -> new_num + for new_num, (old_num, _) in enumerate(slide_files, start=1): + if old_num != new_num: + renumber_map[old_num] = new_num + + if not renumber_map: + # Slides are already sequential, nothing to do + return + + logger.info(f"Renumbering slides: {renumber_map}") + + # Create new file contents with renumbered files + new_contents = {} + for name, content in file_contents.items(): + new_name = name + new_content = content + + # Rename slide files + if name.startswith("ppt/slides/slide") and name.endswith(".xml"): + match = slide_pattern.match(name) + if match: + old_num = int(match.group(1)) + if old_num in renumber_map: + new_name = f"ppt/slides/slide{renumber_map[old_num]}.xml" + + # Rename slide relationship files + elif name.startswith("ppt/slides/_rels/slide") and name.endswith(".xml.rels"): + match = re.match(r"^ppt/slides/_rels/slide(\d+)\.xml\.rels$", name) + if match: + old_num = int(match.group(1)) + if old_num in renumber_map: + new_name = f"ppt/slides/_rels/slide{renumber_map[old_num]}.xml.rels" + + # Rename notesSlide files + elif name.startswith("ppt/notesSlides/notesSlide") and name.endswith(".xml"): + match = re.match(r"^ppt/notesSlides/notesSlide(\d+)\.xml$", name) + if match: + old_num = int(match.group(1)) + if old_num in renumber_map: + new_name = f"ppt/notesSlides/notesSlide{renumber_map[old_num]}.xml" + + # Rename notesSlide relationship files + elif name.startswith("ppt/notesSlides/_rels/notesSlide") and name.endswith(".xml.rels"): + match = re.match(r"^ppt/notesSlides/_rels/notesSlide(\d+)\.xml\.rels$", name) + if match: + old_num = int(match.group(1)) + if old_num in renumber_map: + new_name = ( + f"ppt/notesSlides/_rels/notesSlide{renumber_map[old_num]}.xml.rels" + ) + + # Update references in XML content + if isinstance(content, bytes): + try: + text_content = content.decode("utf-8") + modified = False + + # Update slide references in relationships and content types + for old_num, new_num in renumber_map.items(): + # Update slide file references + old_ref = f"slide{old_num}.xml" + new_ref = f"slide{new_num}.xml" + if old_ref in text_content: + text_content = text_content.replace(old_ref, new_ref) + modified = True + + # Update notesSlide references + old_notes_ref = f"notesSlide{old_num}.xml" + new_notes_ref = f"notesSlide{new_num}.xml" + if old_notes_ref in text_content: + text_content = text_content.replace(old_notes_ref, new_notes_ref) + modified = True + + if modified: + new_content = text_content.encode("utf-8") + else: + new_content = content + except UnicodeDecodeError: + # Binary file, don't modify + new_content = content + + new_contents[new_name] = new_content + + # Write the new file + with zipfile.ZipFile(file_path, "w", zipfile.ZIP_DEFLATED) as zf: + for name, content in new_contents.items(): + zf.writestr(name, content) + + logger.info(f"Slide renumbering complete. Renumbered {len(renumber_map)} slides.") + + def save(self, api_client: PowerPointAPIClient) -> None: + """Save/persist all changes made to this presentation.""" + if not self.file_path: + raise ValueError("No file path specified for saving") + + # Update app.xml metadata before saving (fixes slide count after add/delete) + self._update_app_properties_metadata() + + # Ensure directory exists + dir_path = os.path.dirname(self.file_path) + if dir_path: + os.makedirs(dir_path, exist_ok=True) + + # Save the presentation + self.pptx_presentation.save(self.file_path) + + # Renumber slides to be sequential (fixes PowerPoint Online compatibility) + self._renumber_slides_in_file(self.file_path) + + def save_as(self, api_client: PowerPointAPIClient, file_path: str): + """Save the presentation to a new file path.""" + # Update app.xml metadata before saving (fixes slide count after add/delete) + self._update_app_properties_metadata() + + # Ensure directory exists + dir_path = os.path.dirname(file_path) + if dir_path: + os.makedirs(dir_path, exist_ok=True) + + # Save the presentation + self.pptx_presentation.save(file_path) + + # Renumber slides to be sequential (fixes PowerPoint Online compatibility) + self._renumber_slides_in_file(file_path) + + # Update our file path + self.file_path = file_path + self.presentationId = file_path + + def insert_copy( + self, + source_slide: AbstractSlide, + api_client: PowerPointAPIClient, + insertion_index: int | None = None, + ) -> AbstractSlide: + """Insert a copy of a slide from another presentation into this presentation.""" + if not isinstance(source_slide, PowerPointSlide): + raise ValueError("Can only copy PowerPointSlide instances") + + # Import the robust slide copier + from gslides_api.pptx.slide_copier import SlideCopierManager + + # Use the robust slide copier implementation + with SlideCopierManager(self.pptx_presentation) as copier: + try: + # Attempt robust copying with full error handling + new_pptx_slide = copier.copy_slide_safe( + source_slide.pptx_slide, + insertion_index=insertion_index, + fallback_to_layout_only=True, + ) + + if new_pptx_slide is None: + raise RuntimeError("All slide copying strategies failed") + + # Get copy statistics for logging + stats = copier.get_copy_statistics() + logger.info(f"Slide copy completed. Stats: {stats}") + + # Create and return a PowerPointSlide wrapper + new_slide = PowerPointSlide(new_pptx_slide) + + # DEBUG: Log alt_text values from copied slide + for elem in new_slide.page_elements_flat: + logger.debug( + f"Copied slide element: alt_text.title='{elem.alt_text.title}' " + f"type={type(elem).__name__}" + ) + + # Propagate presentation_id and pptx_presentation to the new slide and its elements + new_slide.presentation_id = self.presentationId + new_slide.pptx_presentation = self.pptx_presentation + + # Update our slides list + self.slides.append(new_slide) + + # Set parent references for the new slide and its elements + new_slide._parent_presentation = self + for element in new_slide.elements: + element._parent_presentation = self + + return new_slide + + except Exception as e: + logger.error(f"Robust slide copying failed: {e}") + # Fall back to the original simple implementation as last resort + return self._insert_copy_fallback(source_slide, api_client, insertion_index) + + def _insert_copy_fallback( + self, + source_slide: PowerPointSlide, + api_client: PowerPointAPIClient, + insertion_index: int | None = None, + ) -> AbstractSlide: + """Fallback implementation using the original simple copying method.""" + logger.warning("Using fallback slide copying method") + + # Get the source slide's pptx slide + source_pptx_slide = source_slide.pptx_slide + + # Get the layout from the source slide + try: + layout = source_pptx_slide.slide_layout + except Exception: + # Use blank layout if we can't get the source layout + layout_items_count = [ + len(layout.placeholders) for layout in self.pptx_presentation.slide_layouts + ] + min_items = min(layout_items_count) + blank_layout_id = layout_items_count.index(min_items) + layout = self.pptx_presentation.slide_layouts[blank_layout_id] + + # Create a new slide with the layout + new_slide = self.pptx_presentation.slides.add_slide(layout) + _remove_layout_placeholders(new_slide) + + # Copy speaker notes if they exist + if source_pptx_slide.has_notes_slide and source_pptx_slide.notes_slide.notes_text_frame: + try: + notes_text = source_pptx_slide.notes_slide.notes_text_frame.text + if notes_text.strip(): + new_slide.notes_slide.notes_text_frame.text = notes_text + except Exception as e: + logger.warning(f"Could not copy notes in fallback method: {e}") + + # Create and return a PowerPointSlide wrapper + new_slide_obj = PowerPointSlide(new_slide) + # Propagate presentation_id and pptx_presentation to the new slide and its elements + new_slide_obj.presentation_id = self.presentationId + new_slide_obj.pptx_presentation = self.pptx_presentation + + # Set parent references for the new slide and its elements + new_slide_obj._parent_presentation = self + for element in new_slide_obj.elements: + element._parent_presentation = self + + return new_slide_obj + + def delete_slide(self, slide: Union["PowerPointSlide", int], api_client: PowerPointAPIClient): + """Delete a slide from the presentation using robust SlideDeleter.""" + from gslides_api.pptx.slide_deleter import SlideDeleter + + if isinstance(slide, int): + slide_idx = slide + else: + slide_idx = self.slides.index(slide) + + # Use the robust SlideDeleter implementation + deleter = SlideDeleter(self.pptx_presentation) + result = deleter.delete_slide(slide_idx) + + if result.success: + # Update our slides list to reflect the deletion + self.slides.pop(slide_idx) + logger.info(f"Successfully deleted slide at index {slide_idx}") + + # Log any warnings + for warning in result.warnings: + logger.warning(f"Slide deletion warning: {warning}") + else: + # Deletion failed, raise an error + error_msg = f"Failed to delete slide at index {slide_idx}: {result.error_message}" + logger.error(error_msg) + raise ValueError(error_msg) + + def delete_slides( + self, slides: List[Union["PowerPointSlide", int]], api_client: PowerPointAPIClient + ): + """Delete multiple slides from the presentation using robust SlideDeleter.""" + from gslides_api.pptx.slide_deleter import SlideDeleter + + # Convert all slides to indices + slide_indices = [] + for slide in slides: + if isinstance(slide, int): + slide_indices.append(slide) + else: + # Convert PowerPointSlide to index + try: + slide_idx = self.slides.index(slide) + slide_indices.append(slide_idx) + except ValueError: + logger.warning(f"Slide {slide} not found in presentation") + continue + + if not slide_indices: + logger.info("No valid slides to delete") + return + + # Use SlideDeleter which handles reverse order deletion internally + deleter = SlideDeleter(self.pptx_presentation) + results = deleter.delete_slides(slide_indices) + + # Process results and update local slides list + successfully_deleted = [] + for result in results: + if result.success: + successfully_deleted.append(result.slide_index) + # Log any warnings + for warning in result.warnings: + logger.warning(f"Slide deletion warning: {warning}") + else: + logger.error( + f"Failed to delete slide at index {result.slide_index}: {result.error_message}" + ) + + # Remove successfully deleted slides from our list (in reverse order) + for idx in sorted(successfully_deleted, reverse=True): + self.slides.pop(idx) + + logger.info(f"Deleted {len(successfully_deleted)}/{len(slide_indices)} slides successfully") + + def move_slide( + self, + slide: Union["PowerPointSlide", int], + insertion_index: int, + api_client: PowerPointAPIClient, + ): + """Move a slide to a new position within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + # Note: python-pptx doesn't support direct slide reordering + # Log the operation for now + current_index = self.slides.index(slide) + logger.debug(f"Moving slide from index {current_index} to {insertion_index}") + # TODO: Implement actual XML-based slide reordering + + # For now, just update our local slides list + self.slides.remove(slide) + self.slides.insert(insertion_index, slide) + + def duplicate_slide( + self, slide: Union["PowerPointSlide", int], api_client: PowerPointAPIClient + ) -> "PowerPointSlide": + """Duplicate a slide within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + # Use the robust SlideCopierManager + from gslides_api.pptx.slide_copier import SlideCopierManager + + with SlideCopierManager(self.pptx_presentation) as copier: + new_pptx_slide = copier.copy_slide_safe(slide.pptx_slide, fallback_to_layout_only=True) + + if new_pptx_slide: + new_slide = PowerPointSlide(new_pptx_slide) + # Propagate presentation_id to the new slide and its elements + new_slide.presentation_id = self.presentationId + new_slide.pptx_presentation = self.pptx_presentation + self.slides.append(new_slide) + return new_slide + else: + raise RuntimeError("Failed to duplicate slide") + + async def get_slide_thumbnails( + self, + api_client: "PowerPointAPIClient", + slides: Optional[List["AbstractSlide"]] = None, + ) -> List[AbstractThumbnail]: + """Get thumbnails for slides using efficient batch PDF conversion. + + Converts PPTX to PDF once and extracts all slide images in a single operation, + which is much more efficient than converting per-slide. + + Args: + api_client: The PowerPoint API client + slides: Optional list of slides to get thumbnails for. If None, uses all slides. + + Returns: + List of AbstractThumbnail objects with image data + """ + from io import BytesIO + + from PIL import Image + + from gslides_api.pptx.chart_renderer import render_all_slides_to_images + + if not self.file_path: + logger.warning("Cannot generate thumbnails: no file path") + return [] + + # Determine which slides to get thumbnails for + target_slides = slides if slides is not None else self.slides + + # If we need specific slides, we need to map their indices + if slides is not None: + slide_indices = [] + for slide in slides: + try: + idx = self.slides.index(slide) + slide_indices.append(idx) + except ValueError: + logger.warning(f"Slide not found in presentation: {slide}") + slide_indices.append(-1) # Mark as not found + else: + slide_indices = list(range(len(self.slides))) + + # Save presentation to ensure latest changes are on disk + self.save(api_client) + + # Render all slides at once (efficient: single PPTX->PDF conversion) + all_png_bytes = await render_all_slides_to_images( + presentation_path=self.file_path, + dpi=150, + ) + + if not all_png_bytes: + logger.warning("Batch rendering produced no images") + return [] + + # Build result list, extracting only the requested slides + thumbnails = [] + for idx in slide_indices: + if idx < 0 or idx >= len(all_png_bytes): + # Slide not found or out of range, use placeholder + thumbnails.append(AbstractThumbnail( + contentUrl="", + width=320, + height=240, + mime_type="image/png", + content=b"", + file_size=0, + )) + continue + + png_bytes = all_png_bytes[idx] + + # Get dimensions from the image + try: + img = Image.open(BytesIO(png_bytes)) + width, height = img.size + img.close() + except Exception as e: + logger.warning(f"Failed to read image dimensions: {e}") + width, height = 320, 240 + + thumbnails.append(AbstractThumbnail( + contentUrl="", + width=width, + height=height, + mime_type="image/png", + content=png_bytes, + file_size=len(png_bytes), + )) + + return thumbnails + + +# +# class PowerPointLayoutMatcher: +# """PowerPoint implementation of AbstractLayoutMatcher using python-pptx.""" +# +# # DO NOT implement this - I'll move the matcher to abstract_slides.py +# +# def __init__(self, presentation, matching_rule: Optional[str] = None): +# # Layout matching will be implemented in the abstract base class +# pass +# +# def match(self, layout, matching_rule: Optional[str] = None): +# # Layout matching will be implemented in the abstract base class +# pass diff --git a/gslides_api/agnostic/element_size.py b/gslides_api/agnostic/element_size.py new file mode 100644 index 0000000..58c57a5 --- /dev/null +++ b/gslides_api/agnostic/element_size.py @@ -0,0 +1,21 @@ +from pydantic import BaseModel, computed_field + + +class ElementSizeMeta(BaseModel): + """Presentation element size metadata, captured during layout ingestion.""" + + box_width_inches: float + box_height_inches: float + font_size_pt: float + + @computed_field + @property + def approx_char_capacity(self) -> int: + """Estimate how many characters fit in this textbox.""" + char_width_in = self.font_size_pt * 0.5 / 72 + line_height_in = self.font_size_pt * 1.2 / 72 + if char_width_in <= 0 or line_height_in <= 0: + return 0 + chars_per_line = self.box_width_inches / char_width_in + num_lines = self.box_height_inches / line_height_in + return int(chars_per_line * num_lines * 0.85) diff --git a/gslides_api/common/__init__.py b/gslides_api/common/__init__.py new file mode 100644 index 0000000..6e1a8cd --- /dev/null +++ b/gslides_api/common/__init__.py @@ -0,0 +1 @@ +"""Common utilities shared across gslides-api.""" diff --git a/gslides_api/common/download.py b/gslides_api/common/download.py new file mode 100644 index 0000000..cc85640 --- /dev/null +++ b/gslides_api/common/download.py @@ -0,0 +1,88 @@ +import logging + +import httpx + +from gslides_api.common.log_time import log_time +from gslides_api.common.retry import retry + +logger = logging.getLogger(__name__) + + +@log_time +def download_file(url: str, timeout: int = 30, **retry_kwargs) -> httpx.Response: + """ + Downloads file from a URL with retry mechanism and error handling. + + Args: + url: URL to download the file from + timeout: Timeout in seconds for the request (default: 30) + **retry_kwargs: Additional retry parameters (max_attempts, initial_delay, max_delay, + exponential_base, jitter, exceptions). Defaults: max_attempts=3, + initial_delay=1.0, max_delay=10.0 + + Returns: + The downloaded file as an httpx.Response object + + Raises: + ValueError: If the URL is invalid or empty + httpx.RequestError: If the download fails after all retries + """ + if not url or not url.strip(): + raise ValueError("URL cannot be empty") + + # Set default retry parameters + retry_params = { + "max_attempts": 3, + "initial_delay": 1.0, + "max_delay": 10.0, + "exceptions": ( + httpx.RequestError, + httpx.TimeoutException, + ), + } + # Override with user-provided kwargs + retry_params.update(retry_kwargs) + + @retry(**retry_params) + def _download() -> httpx.Response: + response = httpx.get(url, timeout=timeout) + response.raise_for_status() + return response + + return _download() + + +def download_text_file(url: str, timeout: int = 30, **retry_kwargs) -> str: + """ + Calls download_file and returns the text content of the file. + + Args: + url: URL to download the file from + timeout: Timeout in seconds for the request (default: 30) + **retry_kwargs: Additional retry parameters passed to download_file + + Returns: + The text content of the file + """ + response = download_file(url, timeout=timeout, **retry_kwargs) + return response.text + + +@log_time +def download_binary_file(url: str, timeout: int = 30, **retry_kwargs) -> tuple[bytes, int | None]: + """ + Calls download_file and returns the binary content and file size of the file. + + Args: + url: URL to download the file from + timeout: Timeout in seconds for the request (default: 30) + **retry_kwargs: Additional retry parameters passed to download_file + + Returns: + A tuple of (binary_content, file_size_from_headers) + file_size_from_headers may be None if Content-Length header is not present + """ + response = download_file(url, timeout=timeout, **retry_kwargs) + content_length = response.headers.get("content-length") + file_size = int(content_length) if content_length else None + return response.content, file_size diff --git a/gslides_api/common/google_errors.py b/gslides_api/common/google_errors.py new file mode 100644 index 0000000..d1cb8ea --- /dev/null +++ b/gslides_api/common/google_errors.py @@ -0,0 +1,163 @@ +import json + +from fastapi import HTTPException + + +class GoogleSlidesExportError(Exception): + """Custom exception for Google Slides export errors.""" + + pass + + +class GoogleSlidesAuthError(GoogleSlidesExportError): + """Authentication-related errors during Google Slides export.""" + + pass + + +class GoogleSlidesConnectionError(GoogleSlidesExportError, HTTPException): + """Connection/integration not found errors.""" + + def __init__(self, message: str, status_code: int = 400): + super().__init__(status_code=status_code, detail=message) + + +class GoogleOAuthExpiredException(GoogleSlidesAuthError, HTTPException): + """Raised when Google OAuth tokens have expired and user needs to re-authenticate.""" + + def __init__(self, redirect_url: str | None = None): + HTTPException.__init__( + self, + status_code=401, + detail={ + "error": "google_oauth_expired", + "message": "Your Google connection has expired. Please reconnect your Google account.", + "redirect_url": redirect_url, + }, + ) + + +class GoogleDriveFileAccessDeniedException(GoogleSlidesExportError, HTTPException): + """Raised when access to a specific Google Drive file is denied (appNotAuthorizedToFile). + + This typically happens when using drive.file scope and the user hasn't granted + access to the specific file via the Google Picker. + """ + + def __init__(self, file_id: str, message: str | None = None): + self.file_id = file_id + HTTPException.__init__( + self, + status_code=403, + detail={ + "error_type": "google_drive_file_access_denied", + "file_id": file_id, + "message": message or f"Access denied to Google Drive file: {file_id}", + "requires_picker": True, + }, + ) + + +def detect_file_access_denied_error(error: Exception, file_id: str) -> None: + """Check if an error is a Google Drive file access denied error and raise appropriate exception. + + This detects errors that occur when using drive.file scope and the app doesn't have access + to a specific file. Google Drive API returns: + - 404 "File not found" when the app can't see the file (most common with drive.file scope) + - 403 "appNotAuthorizedToFile" in some cases + + Args: + error: The exception to check + file_id: The Google Drive file ID that was being accessed + + Raises: + GoogleDriveFileAccessDeniedException: If the error indicates file access was denied + """ + error_str = str(error).lower() + + # Check for common 404 file not found patterns (drive.file scope returns 404 for inaccessible files) + if "404" in error_str and ( + "file not found" in error_str + or "not found" in error_str + or "requested entity was not found" in error_str + ): + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=f"File not accessible: {file_id}. Please grant access using the file picker.", + ) + + # Check for common 403 file access denied patterns + if "403" in error_str and ( + "appnotauthorizedtofile" in error_str + or "has not granted the app" in error_str + or "access to the file" in error_str + ): + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=f"Access denied to file {file_id}. Please grant access using the file picker.", + ) + + # Handle googleapiclient HttpError objects + if hasattr(error, "resp") and hasattr(error, "content"): + resp_status = getattr(error.resp, "status", None) + if resp_status in (403, 404): + try: + error_content = error.content + if isinstance(error_content, bytes): + error_content = error_content.decode("utf-8") + error_data = json.loads(error_content) + errors = error_data.get("error", {}).get("errors", []) + for err in errors: + reason = err.get("reason", "") + # 404 with notFound reason or 403 with appNotAuthorizedToFile + if reason in ("notFound", "appNotAuthorizedToFile"): + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=err.get("message"), + ) + # If 404 without specific reason, still treat as access denied + if resp_status == 404: + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=f"File not accessible: {file_id}. Please grant access using the file picker.", + ) + except GoogleDriveFileAccessDeniedException: + raise + except (json.JSONDecodeError, KeyError, UnicodeDecodeError): + # If we can't parse but got 404, treat as access denied + if resp_status == 404: + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=f"File not accessible: {file_id}. Please grant access using the file picker.", + ) + + # Handle httpx response errors + if hasattr(error, "response"): + response = getattr(error, "response", None) + if response is not None and hasattr(response, "status_code"): + if response.status_code in (403, 404): + try: + error_data = response.json() + errors = error_data.get("error", {}).get("errors", []) + for err in errors: + reason = err.get("reason", "") + if reason in ("notFound", "appNotAuthorizedToFile"): + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=err.get("message"), + ) + # If 404 without specific reason, still treat as access denied + if response.status_code == 404: + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=f"File not accessible: {file_id}. Please grant access using the file picker.", + ) + except GoogleDriveFileAccessDeniedException: + raise + except (json.JSONDecodeError, KeyError, AttributeError): + # If we can't parse but got 404, treat as access denied + if response.status_code == 404: + raise GoogleDriveFileAccessDeniedException( + file_id=file_id, + message=f"File not accessible: {file_id}. Please grant access using the file picker.", + ) diff --git a/gslides_api/common/log_time.py b/gslides_api/common/log_time.py new file mode 100644 index 0000000..9852ef2 --- /dev/null +++ b/gslides_api/common/log_time.py @@ -0,0 +1,43 @@ +import asyncio +import functools +import logging +import time +from typing import Any, Callable, TypeVar + +F = TypeVar("F", bound=Callable[..., object]) + + +def log_time(func: F) -> F: + """ + Decorator to log entry and exit of a function together with the execution time. + """ + + @functools.wraps(func) + async def async_wrapper(*args, **kwargs) -> Any: + # Get logger from the module where the decorated function is defined + logger = logging.getLogger(func.__module__) + + start_time = time.time() + logger.info("Entering %s", func.__qualname__) + try: + result = await func(*args, **kwargs) + return result + finally: + elapsed_time = time.time() - start_time + logger.info("Exiting %s (took %.2f s)", func.__qualname__, elapsed_time) + + @functools.wraps(func) + def sync_wrapper(*args, **kwargs) -> Any: + # Get logger from the module where the decorated function is defined + logger = logging.getLogger(func.__module__) + + start_time = time.time() + logger.info("Entering %s", func.__qualname__) + try: + result = func(*args, **kwargs) + return result + finally: + elapsed_time = time.time() - start_time + logger.info("Exiting %s (took %.2f s)", func.__qualname__, elapsed_time) + + return async_wrapper if asyncio.iscoroutinefunction(func) else sync_wrapper diff --git a/gslides_api/common/presentation_id.py b/gslides_api/common/presentation_id.py new file mode 100644 index 0000000..75a1098 --- /dev/null +++ b/gslides_api/common/presentation_id.py @@ -0,0 +1,30 @@ +"""Utility for normalizing Google Slides presentation IDs from URLs.""" + +import logging +import re +from urllib.parse import urlparse + +logger = logging.getLogger(__name__) + + +def normalize_presentation_id(presentation_id_or_url: str) -> str: + """ + Extract presentation ID from a presentation ID or URL (e.g. "https://docs.google.com/presentation/d/1234567890/edit?slide=id.p1"). + """ + presentation_id_or_url = presentation_id_or_url.strip() + + if presentation_id_or_url.startswith("https://docs.google.com/presentation/d/"): + try: + parsed_url = urlparse(presentation_id_or_url) + parts = parsed_url.path.split("/") + idx = parts.index("presentation") + assert parts[idx + 1] == "d" + return parts[idx + 2] + except (TypeError, AssertionError, ValueError, IndexError) as e: + logger.warning(f"Error extracting presentation ID from {presentation_id_or_url}: {e}") + + # check if a valid presentation ID is provided + if re.match(r"^[a-zA-Z0-9_-]{25,}$", presentation_id_or_url): + return presentation_id_or_url + + raise ValueError(f"Invalid presentation ID or URL: {presentation_id_or_url}") diff --git a/gslides_api/common/retry.py b/gslides_api/common/retry.py new file mode 100644 index 0000000..bc2a527 --- /dev/null +++ b/gslides_api/common/retry.py @@ -0,0 +1,160 @@ +import asyncio +import logging +import random +import time +from functools import wraps +from typing import Callable, Optional, Sequence, Type, TypeVar, Union + +logger = logging.getLogger(__name__) + +T = TypeVar("T") + + +def retry( + func: Optional[Callable[..., T]] = None, + args: tuple = (), + kwargs: dict = None, + *, + max_attempts: int = 3, + initial_delay: float = 1.0, + max_delay: float = 60.0, + exponential_base: float = 2.0, + jitter: bool = True, + exceptions: Union[Type[Exception], Sequence[Type[Exception]]] = Exception, +) -> Union[Callable[..., T], T]: + """ + Can be used both as a decorator or a function to retry operations with exponential backoff. + + Can be used in two ways: + 1. As a decorator: + @retry(max_attempts=3) + def my_func(): + ... + + 2. As a function: + retry(my_func, kwargs={'param': 'value'}, max_attempts=3) + + Args: + func: Function to retry (optional when used as decorator) + args: Tuple of positional arguments to pass to the function (default: empty tuple) + kwargs: Dictionary of keyword arguments to pass to the function (default: None) + max_attempts: Maximum number of retry attempts (default: 3) + initial_delay: Initial delay between retries in seconds (default: 1.0) + max_delay: Maximum delay between retries in seconds (default: 60.0) + exponential_base: Base for exponential backoff (default: 2.0) + jitter: Whether to add random jitter to delays (default: True) + exceptions: Exception or tuple of exceptions to catch and retry (default: Exception) + + Returns: + The return value of the function if successful, or a decorator function if used as decorator + + Raises: + The last exception encountered if all retries fail + """ + kwargs = kwargs or {} + + def decorator(f): + if asyncio.iscoroutinefunction(f): + + @wraps(f) + async def wrapped_async(*a, **kw): + return await _async_retry( + f, + args=a, + kwargs=kw, + max_attempts=max_attempts, + initial_delay=initial_delay, + max_delay=max_delay, + exponential_base=exponential_base, + jitter=jitter, + exceptions=exceptions, + ) + + return wrapped_async + + @wraps(f) + def wrapped(*a, **kw): + attempt = 0 + while attempt < max_attempts: + try: + return f(*a, **kw) + except exceptions as e: + attempt += 1 + + if attempt == max_attempts: + logger.error( + "Failed to execute %s after %d attempts. Final error: %s", + f.__name__, + max_attempts, + str(e), + ) + raise + + delay = min(initial_delay * (exponential_base ** (attempt - 1)), max_delay) + + if jitter: + delay *= 1 + random.random() + + logger.warning( + "Attempt %d/%d for %s failed: %s. Retrying in %.2f seconds...", + attempt, + max_attempts, + f.__name__, + str(e), + delay, + ) + + time.sleep(delay) + + return wrapped + + if func is None: + return decorator + return decorator(func)(*args, **kwargs) + + +async def _async_retry( + func: Callable[..., T], + args: tuple = (), + kwargs: dict = None, + *, + max_attempts: int = 3, + initial_delay: float = 1.0, + max_delay: float = 60.0, + exponential_base: float = 2.0, + jitter: bool = True, + exceptions: Union[Type[Exception], Sequence[Type[Exception]]] = Exception, +) -> T: + kwargs = kwargs or {} + + attempt = 0 + while attempt < max_attempts: + try: + return await func(*args, **kwargs) + except exceptions as e: + attempt += 1 + + if attempt == max_attempts: + logger.error( + "Failed to execute %s after %d attempts. Final error: %s", + func.__name__, + max_attempts, + str(e), + ) + raise + + delay = min(initial_delay * (exponential_base ** (attempt - 1)), max_delay) + + if jitter: + delay *= 1 + random.random() + + logger.warning( + "Attempt %d/%d for %s failed: %s. Retrying in %.2f seconds...", + attempt, + max_attempts, + func.__name__, + str(e), + delay, + ) + + await asyncio.sleep(delay) diff --git a/gslides_api/domain/domain.py b/gslides_api/domain/domain.py index fc10c55..9ce36d0 100644 --- a/gslides_api/domain/domain.py +++ b/gslides_api/domain/domain.py @@ -716,7 +716,7 @@ def _convert_emu_to_units(self, value_emu: float, units: OutputUnit) -> float: return from_emu(value_emu, units) - def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: + def absolute_size(self, units: OutputUnit) -> Optional[Tuple[float, float]]: """Calculate the absolute size of the element in the specified units. This method calculates the actual rendered size of the element, taking into @@ -728,18 +728,18 @@ def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: Returns: A tuple of (width, height) representing the element's dimensions - in the specified units. + in the specified units, or None if size or transform is not available + (e.g. for group container elements). Raises: ValueError: If units is not "cm" or "in". - ValueError: If element size is not available. """ if self.size is None: - raise ValueError("Element size is not available") + return None if self.transform is None: - raise ValueError("Element transform is not available") + return None # Extract width and height from size # Size can have width/height as either float or Dimension objects @@ -763,7 +763,7 @@ def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: return width_result, height_result - def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, float]: + def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Optional[Tuple[float, float]]: """Calculate the absolute position of the element on the page in the specified units. Position represents the distance of the top-left corner of the element @@ -775,11 +775,12 @@ def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, f Returns: A tuple of (x, y) representing the position in the specified units, where x is the horizontal distance from the left edge and y is the - vertical distance from the top edge of the slide. + vertical distance from the top edge of the slide, or None if + transform is not available (e.g. for group container elements). """ if self.transform is None: - raise ValueError("Element transform is not available") + return None # Extract position from transform (translateX, translateY are in EMUs) x_emu = self.transform.translateX diff --git a/gslides_api/element/base.py b/gslides_api/element/base.py index d0097e8..fa7c893 100644 --- a/gslides_api/element/base.py +++ b/gslides_api/element/base.py @@ -290,7 +290,7 @@ def to_markdown(self) -> str | None: """ raise NotImplementedError("Subclasses must implement to_markdown method") - def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: + def absolute_size(self, units: OutputUnit) -> Optional[Tuple[float, float]]: """Calculate the absolute size of the element in the specified units. This method calculates the actual rendered size of the element, taking into @@ -302,16 +302,15 @@ def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: Returns: A tuple of (width, height) representing the element's dimensions - in the specified units. + in the specified units, or None if size/transform is not available. Raises: ValueError: If units is not "cm" or "in". - ValueError: If element size is not available. """ element_props = self.element_properties() return element_props.absolute_size(units) - def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, float]: + def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Optional[Tuple[float, float]]: """Calculate the absolute position of the element on the page in the specified units. Position represents the distance of the top-left corner of the element @@ -323,7 +322,8 @@ def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, f Returns: A tuple of (x, y) representing the position in the specified units, where x is the horizontal distance from the left edge and y is the - vertical distance from the top edge of the slide. + vertical distance from the top edge of the slide, or None if + transform is not available. """ element_props = self.element_properties() return element_props.absolute_position(units) diff --git a/gslides_api/element/image.py b/gslides_api/element/image.py index 7c0a566..e219efb 100644 --- a/gslides_api/element/image.py +++ b/gslides_api/element/image.py @@ -17,6 +17,7 @@ CreateImageRequest, ReplaceImageRequest, UpdateImagePropertiesRequest, + UpdatePageElementAltTextRequest, ) from gslides_api.utils import dict_to_dot_separated_field_list @@ -142,6 +143,8 @@ def replace_image( file=file, method=method, api_client=api_client, + title=image.title, + description=image.description, ) """ @@ -169,6 +172,8 @@ def replace_image_from_id( file: str | None = None, method: ImageReplaceMethod | None = None, api_client: Optional[GoogleAPIClient] = None, + title: str | None = None, + description: str | None = None, ): if url is None and file is None: raise ValueError("Must specify either url or file") @@ -180,6 +185,18 @@ def replace_image_from_id( url = client.upload_image_to_drive(file) requests = ImageElement._replace_image_requests(image_id, url, method) + + # Google Slides API replaceImage clears the alt-text title/description, + # so we restore them after the replacement. + if title is not None or description is not None: + requests.append( + UpdatePageElementAltTextRequest( + objectId=image_id, + title=title, + description=description, + ) + ) + return client.batch_update(requests, presentation_id) def get_image_data(self) -> ImageData: diff --git a/gslides_api/mcp/__init__.py b/gslides_api/mcp/__init__.py index 1d69816..1be4c22 100644 --- a/gslides_api/mcp/__init__.py +++ b/gslides_api/mcp/__init__.py @@ -10,11 +10,8 @@ """ from .models import ( - ElementOutline, ErrorResponse, OutputFormat, - PresentationOutline, - SlideOutline, SuccessResponse, ThumbnailSizeOption, ) @@ -37,9 +34,6 @@ "ThumbnailSizeOption", "ErrorResponse", "SuccessResponse", - "ElementOutline", - "SlideOutline", - "PresentationOutline", # Utils "parse_presentation_id", "get_slide_name", diff --git a/gslides_api/mcp/models.py b/gslides_api/mcp/models.py index 1bb91dd..144e408 100644 --- a/gslides_api/mcp/models.py +++ b/gslides_api/mcp/models.py @@ -1,7 +1,7 @@ """Models for the gslides-api MCP server.""" from enum import Enum -from typing import Any, Dict, List, Optional +from typing import Any, Dict from pydantic import BaseModel, Field @@ -11,7 +11,7 @@ class OutputFormat(str, Enum): RAW = "raw" # Raw Google Slides API JSON response DOMAIN = "domain" # gslides-api domain object model_dump() - OUTLINE = "outline" # Bare-bones structure with names and markdown content + MARKDOWN = "markdown" # Slide markdown layout representation class ThumbnailSizeOption(str, Enum): @@ -33,34 +33,6 @@ class ErrorResponse(BaseModel): ) -class ElementOutline(BaseModel): - """Outline representation of a page element.""" - - element_name: Optional[str] = Field(None, description="Element name from alt-text title") - element_id: str = Field(description="Element object ID") - type: str = Field(description="Element type (shape, image, table, etc.)") - alt_description: Optional[str] = Field(None, description="Alt-text description if present") - content_markdown: Optional[str] = Field( - None, description="Markdown content for text elements" - ) - - -class SlideOutline(BaseModel): - """Outline representation of a slide.""" - - slide_name: Optional[str] = Field(None, description="Slide name from speaker notes") - slide_id: str = Field(description="Slide object ID") - elements: List[ElementOutline] = Field(default_factory=list) - - -class PresentationOutline(BaseModel): - """Outline representation of a presentation.""" - - presentation_id: str = Field(description="Presentation ID") - title: str = Field(description="Presentation title") - slides: List[SlideOutline] = Field(default_factory=list) - - class SuccessResponse(BaseModel): """Success response for modification operations.""" diff --git a/gslides_api/mcp/server.py b/gslides_api/mcp/server.py index d163892..1c40608 100644 --- a/gslides_api/mcp/server.py +++ b/gslides_api/mcp/server.py @@ -4,7 +4,6 @@ """ import argparse -import base64 import json import logging import os @@ -13,10 +12,18 @@ import tempfile import traceback import uuid -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional, Union from mcp.server import FastMCP +from mcp.server.fastmcp.utilities.types import Image +from gslides_api.adapters.abstract_slides import ( + AbstractPresentation, + AbstractThumbnailSize, +) +from gslides_api.adapters.add_names import name_slides +from gslides_api.adapters.gslides_adapter import GSlidesAPIClient +from gslides_api.agnostic.element import MarkdownTableElement from gslides_api.client import GoogleAPIClient from gslides_api.domain.domain import ( Color, @@ -31,24 +38,22 @@ from gslides_api.element.base import ElementKind from gslides_api.element.element import ImageElement from gslides_api.element.shape import ShapeElement +from gslides_api.element.table import TableElement from gslides_api.presentation import Presentation from gslides_api.request.request import UpdateShapePropertiesRequest from .models import ( ErrorResponse, OutputFormat, - PresentationOutline, - SlideOutline, SuccessResponse, ThumbnailSizeOption, ) from .utils import ( - build_element_outline, - build_presentation_outline, - build_slide_outline, element_not_found_error, + find_abstract_slide_by_name, find_element_by_name, find_slide_by_name, + get_abstract_slide_names, get_available_element_names, get_available_slide_names, get_slide_name, @@ -144,7 +149,7 @@ def get_presentation( Args: presentation_id_or_url: Google Slides URL or presentation ID - how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'outline' (condensed) + how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'markdown' (slide layout markdown) """ try: pres_id = parse_presentation_id(presentation_id_or_url) @@ -161,17 +166,21 @@ def get_presentation( client.flush_batch_update() return _format_response(result) - elif format_type == OutputFormat.DOMAIN: - # Get domain object and dump - presentation = Presentation.from_id(pres_id, api_client=client) + elif format_type == OutputFormat.MARKDOWN: + gslides_client = GSlidesAPIClient(gslides_client=client) + abs_pres = AbstractPresentation.from_id( + api_client=gslides_client, presentation_id=pres_id + ) + parts = [] + for i, slide in enumerate(abs_pres.slides): + parts.append(f"## Slide {i}\n\n{slide.markdown()}") client.flush_batch_update() - return _format_response(presentation.model_dump()) + return "\n\n---\n\n".join(parts) - else: # OUTLINE + else: # DOMAIN presentation = Presentation.from_id(pres_id, api_client=client) client.flush_batch_update() - outline = build_presentation_outline(presentation) - return _format_response(outline) + return _format_response(presentation.model_dump()) except Exception as e: logger.error(f"Error getting presentation: {e}\n{traceback.format_exc()}") @@ -181,16 +190,40 @@ def get_presentation( @mcp.tool() def get_slide( presentation_id_or_url: str, - slide_name: str, + slide_name: str = None, + slide_index: int = None, how: str = None, -) -> str: - """Get a single slide by name (first line of speaker notes). + include_thumbnail: bool = True, +) -> Union[str, List]: + """Get a single slide by name or index. Args: presentation_id_or_url: Google Slides URL or presentation ID - slide_name: Slide name (first line of speaker notes, stripped) - how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'outline' (condensed) + slide_name: Slide name (first line of speaker notes). Mutually exclusive with slide_index. + slide_index: Zero-based slide index. Mutually exclusive with slide_name. + how: Output format - 'markdown' (default), 'raw' (Google API JSON), or 'domain' (model_dump). + include_thumbnail: Include slide thumbnail as image payload. Default True. """ + # Validate slide_name/slide_index + if slide_name is not None and slide_index is not None: + return _format_response( + None, + validation_error( + "slide_name/slide_index", + "slide_name and slide_index are mutually exclusive", + f"slide_name={slide_name}, slide_index={slide_index}", + ), + ) + if slide_name is None and slide_index is None: + return _format_response( + None, + validation_error( + "slide_name/slide_index", + "Either slide_name or slide_index must be provided", + "both are None", + ), + ) + try: pres_id = parse_presentation_id(presentation_id_or_url) except ValueError as e: @@ -200,27 +233,54 @@ def get_slide( client = get_api_client() try: - presentation = Presentation.from_id(pres_id, api_client=client) - slide = find_slide_by_name(presentation, slide_name) - - if slide is None: - available = get_available_slide_names(presentation) - client.flush_batch_update() - return _format_response(None, slide_not_found_error(pres_id, slide_name, available)) + # Always load via AbstractPresentation for unified slide lookup + gslides_client = GSlidesAPIClient(gslides_client=client) + abs_pres = AbstractPresentation.from_id( + api_client=gslides_client, presentation_id=pres_id + ) - if format_type == OutputFormat.RAW: - result = client.get_slide_json(pres_id, slide.objectId) - client.flush_batch_update() - return _format_response(result) + # Find slide + if slide_name is not None: + abs_slide = find_abstract_slide_by_name(abs_pres, slide_name) + if abs_slide is None: + names = get_abstract_slide_names(abs_pres) + client.flush_batch_update() + return _format_response(None, slide_not_found_error(pres_id, slide_name, names)) + else: + if slide_index < 0 or slide_index >= len(abs_pres.slides): + client.flush_batch_update() + return _format_response( + None, + validation_error( + "slide_index", + f"Slide index {slide_index} out of range (0-{len(abs_pres.slides) - 1})", + str(slide_index), + ), + ) + abs_slide = abs_pres.slides[slide_index] + + # Format output based on `how` + if format_type == OutputFormat.MARKDOWN: + result = abs_slide.markdown() + elif format_type == OutputFormat.RAW: + result = _format_response( + client.get_slide_json(pres_id, abs_slide.objectId) + ) + else: # DOMAIN + result = _format_response(abs_slide._gslides_slide.model_dump()) - elif format_type == OutputFormat.DOMAIN: - client.flush_batch_update() - return _format_response(slide.model_dump()) + client.flush_batch_update() - else: # OUTLINE - client.flush_batch_update() - outline = build_slide_outline(slide) - return _format_response(outline) + # Optionally attach thumbnail + if include_thumbnail: + thumb = abs_slide.thumbnail( + api_client=gslides_client, + size=AbstractThumbnailSize.MEDIUM, + include_data=True, + ) + return [result, Image(data=thumb.content, format="png")] + else: + return result except Exception as e: logger.error(f"Error getting slide: {e}\n{traceback.format_exc()}") @@ -240,7 +300,7 @@ def get_element( presentation_id_or_url: Google Slides URL or presentation ID slide_name: Slide name (first line of speaker notes) element_name: Element name (from alt-text title, stripped) - how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'outline' (condensed) + how: Output format - 'raw' (Google API JSON) or 'domain' (model_dump) """ try: pres_id = parse_presentation_id(presentation_id_or_url) @@ -272,13 +332,9 @@ def get_element( # For raw, we return the element's API format return _format_response(element.to_api_format() if hasattr(element, "to_api_format") else element.model_dump()) - elif format_type == OutputFormat.DOMAIN: + else: # DOMAIN (also handles MARKDOWN since element-level markdown is not distinct) return _format_response(element.model_dump()) - else: # OUTLINE - outline = build_element_outline(element) - return _format_response(outline) - except Exception as e: logger.error(f"Error getting element: {e}\n{traceback.format_exc()}") return _format_response(None, presentation_error(pres_id, e)) @@ -546,25 +602,20 @@ def write_element_markdown( return _format_response(None, presentation_error(pres_id, e)) -# ============================================================================= -# IMAGE TOOLS -# ============================================================================= - - @mcp.tool() -def replace_element_image( +def write_table_markdown( presentation_id_or_url: str, slide_name: str, element_name: str, - image_url: str, + markdown_table: str, ) -> str: - """Replace an image element with a new image from URL. + """Write a markdown-formatted table to a table element, resizing if needed. Args: presentation_id_or_url: Google Slides URL or presentation ID slide_name: Slide name (first line of speaker notes) - element_name: Element name (image alt-title) - image_url: URL of new image + element_name: Element name (table alt-title) + markdown_table: Markdown table string (with | delimiters and --- separator) """ try: pres_id = parse_presentation_id(presentation_id_or_url) @@ -589,6 +640,257 @@ def replace_element_image( client.flush_batch_update() return _format_response(None, element_not_found_error(pres_id, slide_name, element_name, available)) + # Check if it's a table element + if not isinstance(element, TableElement): + client.flush_batch_update() + return _format_response( + None, + validation_error( + "element_name", + f"Element '{element_name}' is not a table element (type: {element.type.value})", + element_name, + ), + ) + + # Parse the markdown table + markdown_elem = MarkdownTableElement.from_markdown(element_name, markdown_table) + + # Compare shapes and resize if needed + current_shape = (element.table.rows, element.table.columns) + target_shape = markdown_elem.shape + font_scale_factor = 1.0 + + if current_shape != target_shape: + font_scale_factor = element.resize( + target_shape[0], target_shape[1], api_client=client + ) + client.flush_batch_update() + + # Re-fetch presentation to get updated table structure after resize + presentation = Presentation.from_id(pres_id, api_client=client) + slide = find_slide_by_name(presentation, slide_name) + element = find_element_by_name(slide, element_name) + + # Generate and execute content update requests + requests = element.content_update_requests( + markdown_elem, check_shape=False, font_scale_factor=font_scale_factor + ) + client.batch_update(requests, pres_id) + client.flush_batch_update() + + result = SuccessResponse( + message=f"Successfully wrote table to element '{element_name}'", + details={ + "element_id": element.objectId, + "slide_name": slide_name, + "table_shape": list(target_shape), + "resized": current_shape != target_shape, + }, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error writing table markdown: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + +@mcp.tool() +def bulk_write_element_markdown( + presentation_id_or_url: str, + writes: str, +) -> str: + """Write markdown content to multiple shape elements in a single batch operation. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + writes: JSON string containing a list of write operations. + Each entry: {"slide_name": str, "element_name": str, "markdown": str} + """ + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + # Parse writes JSON + try: + write_list = json.loads(writes) + except json.JSONDecodeError as e: + return _format_response( + None, + validation_error("writes", f"Invalid JSON: {e}", writes[:200]), + ) + + if not isinstance(write_list, list): + return _format_response( + None, + validation_error("writes", "Expected a JSON array of write operations", type(write_list).__name__), + ) + + # Validate each entry has required keys + required_keys = {"slide_name", "element_name", "markdown"} + for i, entry in enumerate(write_list): + if not isinstance(entry, dict): + return _format_response( + None, + validation_error("writes", f"Entry {i} is not an object", str(entry)[:200]), + ) + missing = required_keys - set(entry.keys()) + if missing: + return _format_response( + None, + validation_error("writes", f"Entry {i} missing keys: {missing}", str(entry)[:200]), + ) + + client = get_api_client() + + try: + presentation = Presentation.from_id(pres_id, api_client=client) + + # Cache slides by name for efficient lookup + slides_by_name = {} + for slide in presentation.slides: + name = get_slide_name(slide) + if name: + slides_by_name[name] = slide + + successes = [] + failures = [] + + for entry in write_list: + slide_name = entry["slide_name"] + element_name = entry["element_name"] + markdown = entry["markdown"] + + try: + slide = slides_by_name.get(slide_name) + if slide is None: + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": f"Slide '{slide_name}' not found", + }) + continue + + element = find_element_by_name(slide, element_name) + if element is None: + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": f"Element '{element_name}' not found", + }) + continue + + if not isinstance(element, ShapeElement): + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": f"Element '{element_name}' is not a text element (type: {element.type.value})", + }) + continue + + element.write_text(markdown, as_markdown=True, api_client=client) + successes.append({ + "slide_name": slide_name, + "element_name": element_name, + "element_id": element.objectId, + }) + except Exception as entry_error: + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": str(entry_error), + }) + + client.flush_batch_update() + + result = SuccessResponse( + message=f"Bulk write completed: {len(successes)} succeeded, {len(failures)} failed", + details={ + "total": len(write_list), + "succeeded": len(successes), + "failed": len(failures), + "successes": successes, + "failures": failures, + }, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error in bulk write: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + +# ============================================================================= +# IMAGE TOOLS +# ============================================================================= + + +@mcp.tool() +def replace_element_image( + presentation_id_or_url: str, + slide_name: str, + element_name: str = None, + image_source: str = "", + element_id: str = None, +) -> str: + """Replace an image element with a new image from a URL or local file path. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + slide_name: Slide name (first line of speaker notes) + element_name: Element name (image alt-title). Either this or element_id must be provided. + image_source: URL (http/https) or local file path of the new image + element_id: Element object ID (alternative to element_name, for unnamed elements) + """ + if element_name is None and element_id is None: + return _format_response( + None, + validation_error("element_name", "Either element_name or element_id must be provided", None), + ) + + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + client = get_api_client() + + try: + presentation = Presentation.from_id(pres_id, api_client=client) + slide = find_slide_by_name(presentation, slide_name) + + if slide is None: + available = get_available_slide_names(presentation) + client.flush_batch_update() + return _format_response(None, slide_not_found_error(pres_id, slide_name, available)) + + # Find element by name or by ID + element = None + if element_id is not None: + for el in slide.page_elements_flat: + if el.objectId == element_id: + element = el + break + if element is None: + available = get_available_element_names(slide) + client.flush_batch_update() + return _format_response( + None, + validation_error( + "element_id", + f"No element found with ID '{element_id}' on slide '{slide_name}'", + element_id, + ), + ) + else: + element = find_element_by_name(slide, element_name) + if element is None: + available = get_available_element_names(slide) + client.flush_batch_update() + return _format_response(None, element_not_found_error(pres_id, slide_name, element_name, available)) + + display_name = element_name or element_id + # Check if it's an image element if not isinstance(element, ImageElement): client.flush_batch_update() @@ -596,21 +898,24 @@ def replace_element_image( None, validation_error( "element_name", - f"Element '{element_name}' is not an image element (type: {element.type.value})", - element_name, + f"Element '{display_name}' is not an image element (type: {element.type.value})", + display_name, ), ) - # Replace the image - element.replace_image(url=image_url, api_client=client) + # Replace the image - route to url= or file= based on source + if image_source.startswith(("http://", "https://")): + element.replace_image(url=image_source, api_client=client) + else: + element.replace_image(file=image_source, api_client=client) client.flush_batch_update() result = SuccessResponse( - message=f"Successfully replaced image in element '{element_name}'", + message=f"Successfully replaced image in element '{display_name}'", details={ "element_id": element.objectId, "slide_name": slide_name, - "new_image_url": image_url, + "image_source": image_source, }, ) return _format_response(result) @@ -779,6 +1084,116 @@ def delete_slide( return _format_response(None, presentation_error(pres_id, e)) +# ============================================================================= +# PRESENTATION MANIPULATION TOOLS +# ============================================================================= + + +@mcp.tool() +def copy_presentation( + presentation_id_or_url: str, + copy_title: str = None, + folder_id: str = None, +) -> str: + """Copy an entire presentation to create a new one. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + copy_title: Title for the copy (defaults to "Copy of {original title}") + folder_id: Google Drive folder ID to place the copy in (optional) + """ + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + client = get_api_client() + + try: + # Load presentation to get its title for the default copy name + presentation = Presentation.from_id(pres_id, api_client=client) + original_title = presentation.title or "Untitled" + + if copy_title is None: + copy_title = f"Copy of {original_title}" + + # Copy the presentation + copy_result = client.copy_presentation(pres_id, copy_title, folder_id) + new_pres_id = copy_result["id"] + + result = SuccessResponse( + message=f"Successfully copied presentation '{original_title}'", + details={ + "original_presentation_id": pres_id, + "new_presentation_id": new_pres_id, + "new_presentation_url": f"https://docs.google.com/presentation/d/{new_pres_id}/edit", + "new_title": copy_title, + }, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error copying presentation: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + +@mcp.tool() +def add_element_names( + presentation_id_or_url: str, + skip_empty_text_boxes: bool = False, + min_image_size_cm: float = 4.0, +) -> str: + """Name all slides and elements in a presentation. + + Names slides based on their speaker notes (first line). + Names elements (text boxes, images, charts, tables) with descriptive alt-text titles. + The topmost text box becomes "Title", others become "Text_1", "Text_2", etc. + Images and charts are named "Image_1", "Chart_1", etc. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + skip_empty_text_boxes: Skip text boxes that contain only whitespace + min_image_size_cm: Minimum image dimension (cm) to include (smaller images are skipped) + """ + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + client = get_api_client() + + try: + gslides_client = GSlidesAPIClient(gslides_client=client) + slide_names = name_slides( + pres_id, + name_elements=True, + api_client=gslides_client, + skip_empty_text_boxes=skip_empty_text_boxes, + min_image_size_cm=min_image_size_cm, + ) + client.flush_batch_update() + + # Convert SlideElementNames dataclass to serializable dict + names_dict = {} + for slide_name, element_names in slide_names.items(): + names_dict[slide_name] = { + "text_names": element_names.text_names, + "image_names": element_names.image_names, + "chart_names": element_names.chart_names, + "table_names": element_names.table_names, + } + + result = SuccessResponse( + message=f"Successfully named {len(slide_names)} slides and their elements", + details={"slide_element_names": names_dict}, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error naming elements: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + # ============================================================================= # MAIN ENTRY POINT # ============================================================================= @@ -796,9 +1211,9 @@ def main(): parser.add_argument( "--default-format", type=str, - choices=["raw", "domain", "outline"], - default="raw", - help="Default output format for tools (default: raw)", + choices=["raw", "domain", "markdown"], + default="markdown", + help="Default output format for tools (default: markdown)", ) args = parser.parse_args() diff --git a/gslides_api/mcp/utils.py b/gslides_api/mcp/utils.py index e8ba946..731afa7 100644 --- a/gslides_api/mcp/utils.py +++ b/gslides_api/mcp/utils.py @@ -1,14 +1,15 @@ """Utility functions for the gslides-api MCP server.""" import re -from typing import List, Optional, Tuple +from typing import List, Optional +from gslides_api.adapters.abstract_slides import AbstractPresentation, AbstractSlide from gslides_api.element.base import ElementKind, PageElementBase from gslides_api.element.element import PageElement from gslides_api.page.slide import Slide from gslides_api.presentation import Presentation -from .models import ElementOutline, ErrorResponse, PresentationOutline, SlideOutline +from .models import ErrorResponse # Pattern to match Google Slides URLs and extract the presentation ID # Matches: https://docs.google.com/presentation/d/{ID}/edit @@ -90,20 +91,6 @@ def get_element_name(element: PageElementBase) -> Optional[str]: return None -def get_element_alt_description(element: PageElementBase) -> Optional[str]: - """Get the alt-text description of an element. - - Args: - element: The element to get the description from - - Returns: - The alt-text description, or None if not present - """ - if hasattr(element, "description") and element.description: - return element.description.strip() or None - return None - - def find_slide_by_name(presentation: Presentation, slide_name: str) -> Optional[Slide]: """Find a slide by its name (first line of speaker notes). @@ -193,77 +180,6 @@ def get_element_type_string(element: PageElement) -> str: return "unknown" -def get_element_markdown_content(element: PageElement) -> Optional[str]: - """Get the markdown content of a shape element. - - Args: - element: The element to get content from - - Returns: - Markdown content if it's a text element, None otherwise - """ - if hasattr(element, "type") and element.type == ElementKind.SHAPE: - try: - # Try to read text as markdown - if hasattr(element, "read_text"): - return element.read_text(as_markdown=True) - except Exception: - pass - return None - - -def build_element_outline(element: PageElement) -> ElementOutline: - """Build an outline representation of an element. - - Args: - element: The element to build outline from - - Returns: - ElementOutline representation - """ - return ElementOutline( - element_name=get_element_name(element), - element_id=element.objectId, - type=get_element_type_string(element), - alt_description=get_element_alt_description(element), - content_markdown=get_element_markdown_content(element), - ) - - -def build_slide_outline(slide: Slide) -> SlideOutline: - """Build an outline representation of a slide. - - Args: - slide: The slide to build outline from - - Returns: - SlideOutline representation - """ - elements = [build_element_outline(e) for e in slide.page_elements_flat] - return SlideOutline( - slide_name=get_slide_name(slide), - slide_id=slide.objectId, - elements=elements, - ) - - -def build_presentation_outline(presentation: Presentation) -> PresentationOutline: - """Build an outline representation of a presentation. - - Args: - presentation: The presentation to build outline from - - Returns: - PresentationOutline representation - """ - slides = [build_slide_outline(s) for s in presentation.slides] - return PresentationOutline( - presentation_id=presentation.presentationId, - title=presentation.title or "Untitled", - slides=slides, - ) - - def create_error_response( error_type: str, message: str, @@ -374,3 +290,45 @@ def validation_error(field: str, message: str, value: str = None) -> ErrorRespon message=message, **details, ) + + +def find_abstract_slide_by_name( + presentation: AbstractPresentation, slide_name: str +) -> Optional[AbstractSlide]: + """Find an AbstractSlide by speaker notes name. + + Args: + presentation: The abstract presentation to search in + slide_name: The slide name to find (first line of speaker notes) + + Returns: + The abstract slide if found, None otherwise + """ + for slide in presentation.slides: + if slide.speaker_notes: + text = slide.speaker_notes.read_text() + if text: + first_line = text.strip().split("\n")[0].strip() + if first_line == slide_name: + return slide + return None + + +def get_abstract_slide_names(presentation: AbstractPresentation) -> List[str]: + """Get slide names from an AbstractPresentation for error messages. + + Args: + presentation: The abstract presentation to get slide names from + + Returns: + List of slide names (or placeholder for unnamed slides) + """ + names = [] + for i, slide in enumerate(presentation.slides): + name = None + if slide.speaker_notes: + text = slide.speaker_notes.read_text() + if text: + name = text.strip().split("\n")[0].strip() or None + names.append(name or f"(unnamed slide at index {i})") + return names diff --git a/gslides_api/pptx/__init__.py b/gslides_api/pptx/__init__.py new file mode 100644 index 0000000..0b5bf38 --- /dev/null +++ b/gslides_api/pptx/__init__.py @@ -0,0 +1 @@ +"""PPTX file manipulation utilities.""" diff --git a/gslides_api/pptx/chart_renderer.py b/gslides_api/pptx/chart_renderer.py new file mode 100644 index 0000000..8720012 --- /dev/null +++ b/gslides_api/pptx/chart_renderer.py @@ -0,0 +1,426 @@ +"""Render PowerPoint charts to images using LibreOffice. + +This module provides functionality to extract chart images from native PowerPoint +charts (GraphicFrame elements with embedded chart data) that cannot be directly +exported as images via python-pptx. +""" + +import asyncio +import io +import logging +import os +import shutil +import subprocess +import tempfile +from typing import Optional, Tuple + +from PIL import Image +from pptx import Presentation +from pptx.util import Emu + +from gslides_api.agnostic.domain import ImageData + +from gslides_api.common.log_time import log_time + +logger = logging.getLogger(__name__) + + +@log_time +def render_slide_to_image( + presentation_path: str, + slide_index: int, + crop_bounds: Optional[Tuple[int, int, int, int]] = None, +) -> Optional[bytes]: + """Render a single slide to a PNG image using LibreOffice. + + Args: + presentation_path: Path to the PPTX file + slide_index: Zero-based index of the slide to render + crop_bounds: Optional (left, top, right, bottom) in EMUs to crop the image + + Returns: + PNG image bytes, or None if rendering fails + """ + soffice_path = shutil.which("soffice") + if not soffice_path: + logger.warning("LibreOffice (soffice) not available for chart rendering") + return None + + try: + with tempfile.TemporaryDirectory() as tmpdir: + # First, convert PPTX to PDF (LibreOffice only exports first slide to PNG) + pdf_result = subprocess.run( + [ + soffice_path, + "--headless", + "--convert-to", + "pdf", + "--outdir", + tmpdir, + presentation_path, + ], + capture_output=True, + timeout=60, + ) + + if pdf_result.returncode != 0: + logger.warning(f"LibreOffice PDF conversion failed: {pdf_result.stderr.decode()}") + return None + + # Find the PDF file + base_name = os.path.splitext(os.path.basename(presentation_path))[0] + pdf_path = os.path.join(tmpdir, f"{base_name}.pdf") + + if not os.path.exists(pdf_path): + logger.warning("LibreOffice produced no PDF output") + return None + + # Use pdftoppm to extract the specific page as PNG + pdftoppm_path = shutil.which("pdftoppm") + if pdftoppm_path: + png_prefix = os.path.join(tmpdir, "slide") + page_num = slide_index + 1 # pdftoppm uses 1-based indexing + + pdftoppm_result = subprocess.run( + [ + pdftoppm_path, + "-png", + "-f", + str(page_num), + "-l", + str(page_num), + "-r", + "150", # 150 DPI for good quality + pdf_path, + png_prefix, + ], + capture_output=True, + timeout=30, + ) + + if pdftoppm_result.returncode != 0: + logger.warning(f"pdftoppm conversion failed: {pdftoppm_result.stderr.decode()}") + return None + + # Find the output PNG (pdftoppm adds page number suffix) + png_files = [ + f for f in os.listdir(tmpdir) if f.startswith("slide") and f.endswith(".png") + ] + if not png_files: + logger.warning("pdftoppm produced no PNG output") + return None + + png_path = os.path.join(tmpdir, png_files[0]) + else: + # Fallback: use ImageMagick convert + convert_path = shutil.which("convert") + if not convert_path: + logger.warning("Neither pdftoppm nor ImageMagick available for PDF to PNG") + return None + + png_path = os.path.join(tmpdir, "slide.png") + convert_result = subprocess.run( + [ + convert_path, + "-density", + "150", + f"{pdf_path}[{slide_index}]", # Page number in square brackets + png_path, + ], + capture_output=True, + timeout=30, + ) + + if convert_result.returncode != 0: + logger.warning( + f"ImageMagick conversion failed: {convert_result.stderr.decode()}" + ) + return None + + if not os.path.exists(png_path): + logger.warning("No PNG file produced") + return None + + # Read and optionally crop the image + with Image.open(png_path) as img: + if crop_bounds: + # Convert EMU bounds to pixel coordinates + # EMU to inches: 914400 EMU per inch + # Then multiply by DPI (150) + dpi = 150 + emu_per_inch = 914400 + + left_px = int(crop_bounds[0] / emu_per_inch * dpi) + top_px = int(crop_bounds[1] / emu_per_inch * dpi) + right_px = int(crop_bounds[2] / emu_per_inch * dpi) + bottom_px = int(crop_bounds[3] / emu_per_inch * dpi) + + # Ensure bounds are within image + left_px = max(0, left_px) + top_px = max(0, top_px) + right_px = min(img.width, right_px) + bottom_px = min(img.height, bottom_px) + + img = img.crop((left_px, top_px, right_px, bottom_px)) + + # Convert to PNG bytes + png_buffer = io.BytesIO() + img.save(fp=png_buffer, format="PNG") + return png_buffer.getvalue() + + except subprocess.TimeoutExpired: + logger.warning("LibreOffice/conversion command timed out") + return None + except Exception as e: + logger.warning(f"Chart rendering failed: {e}") + return None + + +@log_time +async def render_all_slides_to_images( + presentation_path: str, + dpi: int = 150, +) -> list[bytes]: + """Render all slides to PNG images using LibreOffice (async). + + This is more efficient than calling render_slide_to_image() for each slide + because it converts the PPTX to PDF only once and extracts all pages at once. + + Args: + presentation_path: Path to the PPTX file + dpi: Resolution for rendering (default 150) + + Returns: + List of PNG image bytes, one per slide. Empty list if rendering fails. + """ + soffice_path = shutil.which("soffice") + if not soffice_path: + logger.warning("LibreOffice (soffice) not available for slide rendering") + return [] + + try: + with tempfile.TemporaryDirectory() as tmpdir: + # Convert PPTX to PDF using async subprocess + pdf_process = await asyncio.create_subprocess_exec( + soffice_path, + "--headless", + "--convert-to", + "pdf", + "--outdir", + tmpdir, + presentation_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + try: + _, stderr = await asyncio.wait_for(pdf_process.communicate(), timeout=60) + except asyncio.TimeoutError: + pdf_process.kill() + logger.warning("LibreOffice PDF conversion timed out") + return [] + + if pdf_process.returncode != 0: + logger.warning(f"LibreOffice PDF conversion failed: {stderr.decode()}") + return [] + + # Find the PDF file + base_name = os.path.splitext(os.path.basename(presentation_path))[0] + pdf_path = os.path.join(tmpdir, f"{base_name}.pdf") + + if not os.path.exists(pdf_path): + logger.warning("LibreOffice produced no PDF output") + return [] + + # Use pdftoppm to extract ALL pages as PNG (no -f/-l flags) + pdftoppm_path = shutil.which("pdftoppm") + if not pdftoppm_path: + logger.warning("pdftoppm not available for PDF to PNG conversion") + return [] + + png_prefix = os.path.join(tmpdir, "slide") + + pdftoppm_process = await asyncio.create_subprocess_exec( + pdftoppm_path, + "-png", + "-r", + str(dpi), + pdf_path, + png_prefix, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + try: + _, stderr = await asyncio.wait_for(pdftoppm_process.communicate(), timeout=60) + except asyncio.TimeoutError: + pdftoppm_process.kill() + logger.warning("pdftoppm conversion timed out") + return [] + + if pdftoppm_process.returncode != 0: + logger.warning(f"pdftoppm conversion failed: {stderr.decode()}") + return [] + + # Find all output PNGs (pdftoppm names them slide-01.png, slide-02.png, etc.) + png_files = sorted( + [f for f in os.listdir(tmpdir) if f.startswith("slide") and f.endswith(".png")] + ) + + if not png_files: + logger.warning("pdftoppm produced no PNG output") + return [] + + # Read all PNG files + png_bytes_list = [] + for png_file in png_files: + png_path = os.path.join(tmpdir, png_file) + with open(png_path, "rb") as f: + png_bytes_list.append(f.read()) + + logger.info(f"Rendered {len(png_bytes_list)} slides from {presentation_path}") + return png_bytes_list + + except Exception as e: + logger.warning(f"Batch slide rendering failed: {e}") + return [] + + +@log_time +def render_chart_element_to_image( + presentation: Presentation, + slide_index: int, + shape_left: int, + shape_top: int, + shape_width: int, + shape_height: int, +) -> Optional[ImageData]: + """Render a chart shape from a presentation to an ImageData object. + + This saves the presentation to a temp file, renders the slide, + and crops to the shape's bounding box. + + Args: + presentation: python-pptx Presentation object + slide_index: Zero-based index of the slide containing the chart + shape_left: Shape left position in EMUs + shape_top: Shape top position in EMUs + shape_width: Shape width in EMUs + shape_height: Shape height in EMUs + + Returns: + ImageData with PNG bytes, or None if rendering fails + """ + temp_path = None + try: + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp: + temp_path = tmp.name + + # Save presentation to temp file + presentation.save(temp_path) + + # Calculate crop bounds + crop_bounds = ( + shape_left, + shape_top, + shape_left + shape_width, + shape_top + shape_height, + ) + + # Render and crop + png_bytes = render_slide_to_image( + presentation_path=temp_path, + slide_index=slide_index, + crop_bounds=crop_bounds, + ) + + if png_bytes is None: + return None + + return ImageData( + content=png_bytes, + mime_type="image/png", + ) + + except Exception as e: + logger.warning(f"Failed to render chart element: {e}") + return None + finally: + # Clean up temp file + if temp_path and os.path.exists(temp_path): + os.unlink(temp_path) + + +@log_time +def render_pptx_chart_to_image( + pptx_element: "GraphicFrame", + presentation: Presentation, + slide_index: int, +) -> Optional[ImageData]: + """Render a native PowerPoint chart to an ImageData object. + + Convenience wrapper that extracts position/size from the shape. + + Args: + pptx_element: python-pptx GraphicFrame containing the chart + presentation: python-pptx Presentation object + slide_index: Zero-based index of the slide containing the chart + + Returns: + ImageData with PNG bytes, or None if rendering fails + """ + return render_chart_element_to_image( + presentation=presentation, + slide_index=slide_index, + shape_left=pptx_element.left, + shape_top=pptx_element.top, + shape_width=pptx_element.width, + shape_height=pptx_element.height, + ) + + +@log_time +def render_chart_from_file( + presentation_path: str, + slide_index: int, + shape_left: int, + shape_top: int, + shape_width: int, + shape_height: int, +) -> Optional[ImageData]: + """Render a chart from a PPTX file directly without re-saving. + + This is more efficient than render_chart_element_to_image when you already + have the presentation file path (e.g., during ingestion). + + Args: + presentation_path: Path to the PPTX file + slide_index: Zero-based index of the slide containing the chart + shape_left: Shape left position in EMUs + shape_top: Shape top position in EMUs + shape_width: Shape width in EMUs + shape_height: Shape height in EMUs + + Returns: + ImageData with PNG bytes, or None if rendering fails + """ + crop_bounds = ( + shape_left, + shape_top, + shape_left + shape_width, + shape_top + shape_height, + ) + + png_bytes = render_slide_to_image( + presentation_path=presentation_path, + slide_index=slide_index, + crop_bounds=crop_bounds, + ) + + if png_bytes is None: + return None + + return ImageData( + content=png_bytes, + mime_type="image/png", + ) diff --git a/gslides_api/pptx/converters.py b/gslides_api/pptx/converters.py new file mode 100644 index 0000000..c5c15d6 --- /dev/null +++ b/gslides_api/pptx/converters.py @@ -0,0 +1,441 @@ +"""Converters between PowerPoint font styles and platform-agnostic styles. + +This module provides bidirectional conversion between python-pptx font objects +and the platform-agnostic MarkdownRenderableStyle/RichStyle classes from gslides-api. +""" + +from typing import List, Optional + +from pptx.dml.color import RGBColor +from pptx.table import Table +from pptx.text.text import TextFrame +from pptx.util import Pt + +from gslides_api.agnostic.text import ( + AbstractColor, + BaselineOffset, + FullTextStyle, + MarkdownRenderableStyle, + RichStyle, +) + +# XML namespace for DrawingML (used for bullet detection) +_DRAWINGML_NS = "http://schemas.openxmlformats.org/drawingml/2006/main" + + +# Monospace font families for code detection +MONOSPACE_FONTS = { + "courier new", + "courier", + "monospace", + "consolas", + "monaco", + "lucida console", + "dejavu sans mono", + "source code pro", + "fira code", + "jetbrains mono", +} + + +def _is_monospace(font_name: Optional[str]) -> bool: + """Check if a font name is a monospace font.""" + if not font_name: + return False + return font_name.lower() in MONOSPACE_FONTS + + +def _pptx_color_to_abstract(pptx_color) -> Optional[AbstractColor]: + """Convert python-pptx color to AbstractColor. + + Args: + pptx_color: python-pptx font.color object + + Returns: + AbstractColor or None if no color set + """ + try: + if pptx_color is None: + return None + rgb = pptx_color.rgb + if rgb is None: + return None + # RGBColor is a tuple-like (r, g, b) with 0-255 values + return AbstractColor( + red=rgb[0] / 255, + green=rgb[1] / 255, + blue=rgb[2] / 255, + ) + except (AttributeError, TypeError): + return None + + +def _abstract_to_pptx_rgb(abstract_color: Optional[AbstractColor]) -> Optional[RGBColor]: + """Convert AbstractColor to python-pptx RGBColor. + + Args: + abstract_color: AbstractColor object + + Returns: + RGBColor or None + """ + if abstract_color is None: + return None + r, g, b = abstract_color.to_rgb_tuple() + return RGBColor(r=r, g=g, b=b) + + +def _pptx_baseline_to_abstract(font) -> BaselineOffset: + """Convert python-pptx font baseline to abstract BaselineOffset. + + Args: + font: python-pptx font object + + Returns: + BaselineOffset enum value + """ + try: + if getattr(font, "superscript", False): + return BaselineOffset.SUPERSCRIPT + if getattr(font, "subscript", False): + return BaselineOffset.SUBSCRIPT + except AttributeError: + pass + return BaselineOffset.NONE + + +def pptx_font_to_full(font, hyperlink_address: Optional[str] = None) -> FullTextStyle: + """Extract full FullTextStyle from python-pptx font object. + + Args: + font: python-pptx font object + hyperlink_address: Optional hyperlink URL + + Returns: + FullTextStyle with both markdown and rich parts + """ + # Extract markdown-renderable properties + markdown = MarkdownRenderableStyle( + bold=getattr(font, "bold", False) or False, + italic=getattr(font, "italic", False) or False, + strikethrough=getattr(font, "strike", False) or False, + is_code=_is_monospace(getattr(font, "name", None)), + hyperlink=hyperlink_address, + ) + + # Get font size in points + font_size_pt = None + try: + if font.size is not None: + font_size_pt = font.size.pt + except (AttributeError, TypeError): + pass + + # Extract rich properties (non-markdown-renderable) + rich = RichStyle( + font_family=getattr(font, "name", None), + font_size_pt=font_size_pt, + font_weight=None, # python-pptx doesn't expose weight directly + foreground_color=_pptx_color_to_abstract(getattr(font, "color", None)), + background_color=None, # highlight_color handled separately in pptx + underline=getattr(font, "underline", False) or False, + small_caps=getattr(font, "small_caps", False) or False, + all_caps=getattr(font, "all_caps", False) or False, + baseline_offset=_pptx_baseline_to_abstract(font), + character_spacing=None, # Not commonly used + shadow=getattr(font, "shadow", False) or False, + emboss=getattr(font, "emboss", False) or False, + imprint=getattr(font, "imprint", False) or False, + double_strike=getattr(font, "double_strike", False) or False, + ) + + return FullTextStyle(markdown=markdown, rich=rich) + + +def pptx_font_to_rich(font) -> RichStyle: + """Extract only RichStyle from python-pptx font (for styles() method). + + This is used by the styles() method to get only the non-markdown-renderable + properties for uniqueness checking. + + Args: + font: python-pptx font object + + Returns: + RichStyle with only non-markdown-renderable properties + """ + return pptx_font_to_full(font).rich + + +def apply_rich_style_to_pptx_run(rich: RichStyle, run) -> None: + """Apply RichStyle properties to a python-pptx run. + + Args: + rich: RichStyle with non-markdown-renderable properties + run: python-pptx run object + """ + if rich.font_family is not None: + run.font.name = rich.font_family + + if rich.font_size_pt is not None: + run.font.size = Pt(rich.font_size_pt) + + if rich.foreground_color is not None: + run.font.color.rgb = _abstract_to_pptx_rgb(rich.foreground_color) + + # Apply boolean properties definitively (False means "not formatted") + run.font.underline = rich.underline + run.font.small_caps = rich.small_caps + run.font.all_caps = rich.all_caps + run.font.shadow = rich.shadow + run.font.emboss = rich.emboss + run.font.imprint = rich.imprint + run.font.double_strike = rich.double_strike + + # Handle baseline offset - explicitly set both properties + if rich.baseline_offset == BaselineOffset.SUPERSCRIPT: + run.font.superscript = True + run.font.subscript = False + elif rich.baseline_offset == BaselineOffset.SUBSCRIPT: + run.font.subscript = True + run.font.superscript = False + else: # BaselineOffset.NONE + run.font.superscript = False + run.font.subscript = False + + +def apply_markdown_style_to_pptx_run(md: MarkdownRenderableStyle, run) -> None: + """Apply MarkdownRenderableStyle to a python-pptx run. + + Args: + md: MarkdownRenderableStyle with markdown-derivable properties + run: python-pptx run object + """ + # Apply boolean properties definitively (False means "not formatted") + run.font.bold = md.bold + run.font.italic = md.italic + run.font.strike = md.strikethrough + + # Set Courier New for code if no font family already set + if md.is_code and not run.font.name: + run.font.name = "Courier New" + + if md.hyperlink: + run.hyperlink.address = md.hyperlink + + +def apply_full_style_to_pptx_run(style: FullTextStyle, run) -> None: + """Apply complete FullTextStyle to a python-pptx run. + + Args: + style: FullTextStyle with both markdown and rich parts + run: python-pptx run object + """ + apply_rich_style_to_pptx_run(rich=style.rich, run=run) + apply_markdown_style_to_pptx_run(md=style.markdown, run=run) + + +# ============================================================================= +# Markdown Generation Functions (PPT -> Markdown) +# ============================================================================= + + +def _escape_markdown_for_table(text: str) -> str: + """Escape text for use in markdown table cells. + + Only escapes pipe characters and converts newlines to <br> tags. + Does NOT escape curly braces to preserve template variables like {var}. + + Args: + text: Raw text content + + Returns: + Text safe for markdown table cells + """ + # Escape pipe characters (table cell delimiters) + text = text.replace("|", "\\|") + # Convert newlines to HTML break tags for multiline cells + text = text.replace("\n", "<br>") + return text + + +def pptx_run_to_markdown(run) -> str: + """Convert a python-pptx run to markdown string. + + Uses pptx_font_to_full() to extract styling and applies markdown formatting. + + Args: + run: python-pptx run object + + Returns: + Markdown-formatted string + """ + text = run.text + if not text: + return "" + + # Get hyperlink if present + hyperlink_address = None + if run.hyperlink and run.hyperlink.address: + hyperlink_address = run.hyperlink.address + + # Extract style using existing converter + style = pptx_font_to_full(run.font, hyperlink_address=hyperlink_address) + md = style.markdown + + # Apply formatting in correct order (inner to outer) + # Code formatting (backticks) + if md.is_code: + text = f"`{text}`" + # Bold + Italic combined + elif md.bold and md.italic: + text = f"***{text}***" + # Bold only + elif md.bold: + text = f"**{text}**" + # Italic only + elif md.italic: + text = f"*{text}*" + + # Strikethrough (can combine with other formatting) + if md.strikethrough: + text = f"~~{text}~~" + + # Hyperlink wraps everything + if md.hyperlink: + text = f"[{text}]({md.hyperlink})" + + return text + + +def _paragraph_has_bullet(paragraph) -> bool: + """Check if a paragraph has bullet formatting in XML. + + A paragraph has bullets if: + - buNone is NOT present (which explicitly disables bullets), AND + - Either buChar (character bullet) or buAutoNum (numbered) is present + + Args: + paragraph: python-pptx paragraph object + + Returns: + True if paragraph has bullet formatting, False otherwise + """ + try: + # Get paragraph properties element + pPr = paragraph._element.get_or_add_pPr() + + # If buNone exists, bullets are explicitly disabled + if pPr.find(f"{{{_DRAWINGML_NS}}}buNone") is not None: + return False + + # Check for actual bullet elements + if pPr.find(f"{{{_DRAWINGML_NS}}}buChar") is not None: + return True + if pPr.find(f"{{{_DRAWINGML_NS}}}buAutoNum") is not None: + return True + + return False + except Exception: + return False + + +def pptx_paragraph_to_markdown(paragraph) -> str: + """Convert a python-pptx paragraph to markdown string. + + Handles bullet points and list indentation. + + Args: + paragraph: python-pptx paragraph object + + Returns: + Markdown-formatted paragraph string + """ + # Build paragraph from runs + parts: List[str] = [] + for run in paragraph.runs: + parts.append(pptx_run_to_markdown(run)) + + text = "".join(parts) + + # Handle bullet points / list indentation + # Check if paragraph has actual bullet formatting in XML + has_bullet = _paragraph_has_bullet(paragraph) + level = getattr(paragraph, "level", 0) or 0 + + if has_bullet: + # Apply bullet formatting with appropriate indentation + indent = " " * level + text = f"{indent}- {text}" + + return text + + +def pptx_text_frame_to_markdown(text_frame: Optional[TextFrame]) -> str: + """Convert a python-pptx text frame to markdown string. + + Args: + text_frame: python-pptx TextFrame object (can be None) + + Returns: + Markdown-formatted string with paragraphs joined by newlines + """ + if not text_frame: + return "" + + lines: List[str] = [] + for paragraph in text_frame.paragraphs: + para_text = pptx_paragraph_to_markdown(paragraph) + # Only add non-empty paragraphs (but preserve intentional empty lines) + if para_text.strip() or lines: # Include empty lines after first content + lines.append(para_text) + + return "\n".join(lines) + + +def pptx_table_to_markdown(table: Table) -> str: + """Convert a python-pptx Table to markdown table string. + + Args: + table: python-pptx Table object + + Returns: + Markdown table string + """ + if not table or not table.rows: + return "" + + rows: List[List[str]] = [] + + for row in table.rows: + row_cells: List[str] = [] + for cell in row.cells: + # Extract text from cell's text frame + cell_text = pptx_text_frame_to_markdown(cell.text_frame) + # Escape for table cell usage + cell_text = _escape_markdown_for_table(cell_text) + row_cells.append(cell_text) + rows.append(row_cells) + + if not rows: + return "" + + # Build markdown table + lines: List[str] = [] + + # Header row (first row) + header = rows[0] + lines.append("| " + " | ".join(header) + " |") + + # Separator row + separator = "| " + " | ".join(["---"] * len(header)) + " |" + lines.append(separator) + + # Data rows + for row in rows[1:]: + # Ensure row has same number of columns as header + while len(row) < len(header): + row.append("") + lines.append("| " + " | ".join(row) + " |") + + return "\n".join(lines) diff --git a/gslides_api/pptx/id_manager.py b/gslides_api/pptx/id_manager.py new file mode 100644 index 0000000..21b57de --- /dev/null +++ b/gslides_api/pptx/id_manager.py @@ -0,0 +1,195 @@ +""" +ID Manager for PowerPoint presentations. + +Handles generation and tracking of unique IDs to prevent XML corruption +and PowerPoint repair prompts when copying slides and shapes. +""" + +import uuid +import logging +from typing import Set, Dict, Optional +from pptx import Presentation +from pptx.slide import Slide + +logger = logging.getLogger(__name__) + + +class IdManager: + """ + Manages unique ID generation for PowerPoint elements. + + Tracks used IDs across a presentation to ensure no duplicates are created, + which would cause PowerPoint to prompt for file repair. + """ + + def __init__(self, presentation: Presentation): + """ + Initialize ID manager with existing presentation IDs. + + Args: + presentation: The PowerPoint presentation to track IDs for + """ + self.presentation = presentation + self.used_slide_ids: Set[int] = set() + self.used_shape_ids: Set[int] = set() + self.used_creation_ids: Set[str] = set() + self.next_slide_id = 256 # PowerPoint typically starts at 256 + self.next_shape_id = 1 + + # Scan existing presentation for used IDs + self._scan_existing_ids() + + def _scan_existing_ids(self): + """Scan the presentation for existing IDs to avoid conflicts.""" + try: + # Scan slide IDs + for slide in self.presentation.slides: + slide_id = slide.slide_id + if slide_id: + self.used_slide_ids.add(slide_id) + self.next_slide_id = max(self.next_slide_id, slide_id + 1) + + # Scan shape IDs in each slide + for shape in slide.shapes: + shape_id = getattr(shape, 'shape_id', None) + if shape_id: + self.used_shape_ids.add(shape_id) + self.next_shape_id = max(self.next_shape_id, shape_id + 1) + + # Extract creation IDs from XML if present + creation_id = self._extract_creation_id(shape) + if creation_id: + self.used_creation_ids.add(creation_id) + + except Exception as e: + logger.warning(f"Error scanning existing IDs: {e}") + # Continue with default starting values + + def _extract_creation_id(self, shape) -> Optional[str]: + """Extract a16:creationId from shape XML if present.""" + try: + if hasattr(shape, '_element') and shape._element is not None: + # Look for a16:creationId attribute in the XML + creation_id_elem = shape._element.xpath('.//a16:creationId', + namespaces={'a16': 'http://schemas.microsoft.com/office/drawing/2013/main-command'}) + if creation_id_elem: + return creation_id_elem[0].get('id') + except Exception: + # Ignore XML parsing errors + pass + return None + + def generate_unique_slide_id(self) -> int: + """ + Generate a unique slide ID for the presentation. + + Returns: + A unique integer slide ID + """ + while self.next_slide_id in self.used_slide_ids: + self.next_slide_id += 1 + + slide_id = self.next_slide_id + self.used_slide_ids.add(slide_id) + self.next_slide_id += 1 + + logger.debug(f"Generated unique slide ID: {slide_id}") + return slide_id + + def generate_unique_shape_id(self) -> int: + """ + Generate a unique shape ID for the presentation. + + Returns: + A unique integer shape ID + """ + while self.next_shape_id in self.used_shape_ids: + self.next_shape_id += 1 + + shape_id = self.next_shape_id + self.used_shape_ids.add(shape_id) + self.next_shape_id += 1 + + logger.debug(f"Generated unique shape ID: {shape_id}") + return shape_id + + def generate_unique_creation_id(self) -> str: + """ + Generate a unique creation ID (a16:creationId) for PowerPoint elements. + + Returns: + A unique GUID string for creation ID + """ + while True: + creation_id = str(uuid.uuid4()).upper() + if creation_id not in self.used_creation_ids: + self.used_creation_ids.add(creation_id) + logger.debug(f"Generated unique creation ID: {creation_id}") + return creation_id + + def reserve_slide_id(self, slide_id: int): + """ + Reserve a specific slide ID to prevent conflicts. + + Args: + slide_id: The slide ID to reserve + """ + self.used_slide_ids.add(slide_id) + self.next_slide_id = max(self.next_slide_id, slide_id + 1) + + def reserve_shape_id(self, shape_id: int): + """ + Reserve a specific shape ID to prevent conflicts. + + Args: + shape_id: The shape ID to reserve + """ + self.used_shape_ids.add(shape_id) + self.next_shape_id = max(self.next_shape_id, shape_id + 1) + + def reserve_creation_id(self, creation_id: str): + """ + Reserve a specific creation ID to prevent conflicts. + + Args: + creation_id: The creation ID to reserve + """ + self.used_creation_ids.add(creation_id) + + def get_id_mapping(self, source_slide: Slide) -> Dict[str, int]: + """ + Generate ID mapping for all shapes in a source slide. + + This creates a mapping from old shape IDs to new unique shape IDs + that can be used when copying the slide. + + Args: + source_slide: The slide to generate ID mapping for + + Returns: + Dictionary mapping old shape IDs to new unique shape IDs + """ + id_mapping = {} + + for shape in source_slide.shapes: + old_shape_id = getattr(shape, 'shape_id', None) + if old_shape_id: + new_shape_id = self.generate_unique_shape_id() + id_mapping[str(old_shape_id)] = new_shape_id + + return id_mapping + + def get_stats(self) -> Dict[str, int]: + """ + Get statistics about ID usage. + + Returns: + Dictionary with ID usage statistics + """ + return { + 'used_slide_ids': len(self.used_slide_ids), + 'used_shape_ids': len(self.used_shape_ids), + 'used_creation_ids': len(self.used_creation_ids), + 'next_slide_id': self.next_slide_id, + 'next_shape_id': self.next_shape_id, + } \ No newline at end of file diff --git a/gslides_api/pptx/markdown_to_pptx.py b/gslides_api/pptx/markdown_to_pptx.py new file mode 100644 index 0000000..bd02235 --- /dev/null +++ b/gslides_api/pptx/markdown_to_pptx.py @@ -0,0 +1,730 @@ +"""Convert markdown to PowerPoint formatted text using python-pptx. + +This module uses the shared markdown parser from gslides-api to convert markdown text +to PowerPoint text frames with proper formatting. +""" + +import logging +import os +import platform +from typing import Optional + +from lxml import etree +from pydantic import BaseModel + +from gslides_api.agnostic.ir import FormattedDocument, FormattedList, FormattedParagraph +from gslides_api.agnostic.markdown_parser import parse_markdown_to_ir +from gslides_api.agnostic.text import FullTextStyle, ParagraphStyle, SpacingValue +from pptx.enum.text import MSO_AUTO_SIZE, PP_PARAGRAPH_ALIGNMENT +from pptx.text.text import TextFrame +from pptx.util import Pt +from pptx.dml.color import RGBColor + +logger = logging.getLogger(__name__) + +# XML namespace for DrawingML (used for bullet manipulation) +_DRAWINGML_NS = "http://schemas.openxmlformats.org/drawingml/2006/main" + +# EMU constants for bullet spacing (914400 EMUs = 1 inch) +_EMU_PER_INCH = 914400 +_BULLET_INDENT_EMU = 342900 # ~0.375 inches - standard bullet indent + + +def _find_font_file(font_family: str, bold: bool = False, italic: bool = False) -> str | None: + """Try to find a font file on the system for the given font family. + + python-pptx's fit_text() requires a font file on Linux because it doesn't + support auto-locating fonts on non-Windows systems. + + Args: + font_family: The font family name (e.g., "Calibri", "Arial") + bold: Whether to look for bold variant + italic: Whether to look for italic variant + + Returns: + Path to font file if found, None otherwise + """ + # On Windows, fit_text() can auto-locate fonts, so return None to use default behavior + if platform.system() == "Windows": + return None + + # Common font directories on Linux/macOS + font_dirs = [ + "/usr/share/fonts", + "/usr/local/share/fonts", + os.path.expanduser("~/.fonts"), + os.path.expanduser("~/.local/share/fonts"), + ] + if platform.system() == "Darwin": # macOS + font_dirs.extend( + [ + "/Library/Fonts", + "/System/Library/Fonts", + os.path.expanduser("~/Library/Fonts"), + ] + ) + + # Map common fonts to system equivalents + # On macOS: use Helvetica/Arial (built-in) + # On Linux: use Liberation fonts (metrically compatible with MS fonts) + if platform.system() == "Darwin": + font_substitutes = { + "calibri": ["helvetica", "arial"], + "arial": ["helvetica"], + "times new roman": ["times"], + "times": [], + "courier new": ["courier"], + "courier": [], + } + else: + font_substitutes = { + "calibri": ["liberation"], + "arial": ["liberation"], + "helvetica": ["liberation"], + "times new roman": ["liberation"], + "times": ["liberation"], + "courier new": ["liberation"], + "courier": ["liberation"], + } + + # Determine style suffix + if bold and italic: + style_patterns = ["BoldItalic", "Bold-Italic", "bolditalic", "bold-italic"] + elif bold: + style_patterns = ["Bold", "bold"] + elif italic: + style_patterns = ["Italic", "italic"] + else: + style_patterns = ["Regular", "regular", ""] + + # Search for font file + font_lower = font_family.lower() + search_terms = [font_family, font_lower] + + # Add substitutes if available + for key, substitutes in font_substitutes.items(): + if key in font_lower: + for substitute in substitutes: + search_terms.append(substitute) + search_terms.append(substitute.capitalize()) + break + + for font_dir in font_dirs: + if not os.path.isdir(font_dir): + continue + + for root, dirs, files in os.walk(font_dir): + for filename in files: + if not filename.endswith((".ttf", ".TTF", ".otf", ".OTF")): + continue + + filename_lower = filename.lower() + # Check if any search term matches + for term in search_terms: + if term.lower() in filename_lower: + # Check style + for style in style_patterns: + if style.lower() in filename_lower or ( + style == "" + and "regular" not in filename_lower + and "bold" not in filename_lower + and "italic" not in filename_lower + ): + font_path = os.path.join(root, filename) + logger.debug(f"Found font file for '{font_family}': {font_path}") + return font_path + + logger.debug(f"Could not find font file for '{font_family}' on this system") + return None + + +def _get_max_font_size_from_textframe(text_frame: TextFrame) -> float | None: + """Get the maximum font size from existing text frame runs. + + Used to cap autoscaling so that font size can only decrease, never increase. + + Args: + text_frame: The PowerPoint text frame to read from + + Returns: + Maximum font size in points if found, None otherwise + """ + max_size = None + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + if run.font.size is not None: + size_pt = run.font.size.pt + if max_size is None or size_pt > max_size: + max_size = size_pt + return max_size + + +def _preserve_bodypr_insets(text_frame: TextFrame) -> dict: + """Preserve bodyPr inset values before clearing text frame. + + bodyPr insets (lIns, rIns, tIns, bIns) control the internal margins + of the text frame. These may be reset when text_frame.clear() is called, + so we need to preserve and restore them. + + Args: + text_frame: The PowerPoint text frame to read from + + Returns: + Dictionary of inset attribute names to values + """ + insets = {} + try: + bodyPr = text_frame._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if bodyPr is not None: + for attr in ["lIns", "rIns", "tIns", "bIns"]: + val = bodyPr.get(attr) + if val is not None: + insets[attr] = val + logger.debug(f"Preserved bodyPr.{attr}: {val}") + except Exception as e: + logger.debug(f"Could not preserve bodyPr insets: {e}") + return insets + + +def _restore_bodypr_insets(text_frame: TextFrame, insets: dict) -> None: + """Restore bodyPr inset values after clearing text frame. + + Args: + text_frame: The PowerPoint text frame to restore insets to + insets: Dictionary of inset attribute names to values + """ + if not insets: + return + try: + bodyPr = text_frame._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if bodyPr is not None: + for attr, val in insets.items(): + bodyPr.set(attr, val) + logger.debug(f"Restored bodyPr.{attr}: {val}") + except Exception as e: + logger.debug(f"Could not restore bodyPr insets: {e}") + + +class PreservedParagraphStyles(BaseModel): + """Container for preserved paragraph styles from template. + + Template analysis shows consistent pattern across text boxes: + - Paragraph 0 (title): unique spacing (typically spcBef=0) + - Paragraphs 1+ (bullets): identical spacing within each box (typically spcBef=9pt) + + This structure stores both styles so they can be applied correctly: + - first_para_style → applied to output paragraph index 0 + - bullet_style → applied to output paragraphs index 1+ + """ + + first_para_style: Optional[ParagraphStyle] = None # Paragraph index 0 (title) + bullet_style: Optional[ParagraphStyle] = None # First bullet paragraph (applies to all 1+) + + +def _preserve_paragraph_properties(text_frame: TextFrame) -> PreservedParagraphStyles: + """Preserve paragraph properties for first paragraph and bullet paragraphs. + + Template analysis shows consistent pattern: + - Paragraph 0 (title): unique spacing (typically spcBef=0) + - Paragraphs 1+ (bullets): identical spacing within each box (typically spcBef=9pt) + + Args: + text_frame: The PowerPoint text frame to read from + + Returns: + PreservedParagraphStyles with first_para_style and bullet_style + """ + result = PreservedParagraphStyles() + + try: + paragraphs = list(text_frame.paragraphs) + logger.debug(f"_preserve_paragraph_properties: scanning {len(paragraphs)} paragraphs") + + for i, para in enumerate(paragraphs): + pPr = para._element.find(f"{{{_DRAWINGML_NS}}}pPr") + if pPr is not None: + style = ParagraphStyle.from_pptx_pPr(pPr=pPr, ns=_DRAWINGML_NS) + logger.debug( + f"Para {i}: marL={style.margin_left}, indent={style.indent}, " + f"spcBef={style.space_before}, lnSpc={style.line_spacing}, " + f"has_bullet={style.has_bullet_properties()}" + ) + + # First paragraph (index 0) = title/first para style + if i == 0: + result.first_para_style = style + logger.debug(f"Preserved FIRST paragraph {i} style") + + # First bullet paragraph = bullet style (applies to all subsequent) + if style.has_bullet_properties() and result.bullet_style is None: + result.bullet_style = style + logger.debug(f"Preserved BULLET paragraph {i} style") + + # Early exit if we found both + if result.first_para_style is not None and result.bullet_style is not None: + break + else: + logger.debug(f"Para {i}: no pPr element found") + + except Exception as e: + logger.warning(f"Could not preserve paragraph properties: {e}") + + logger.debug( + f"Preserved styles: first_para={result.first_para_style is not None}, " + f"bullet={result.bullet_style is not None}" + ) + return result + + +def apply_markdown_to_textframe( + markdown_text: str, + text_frame: TextFrame, + base_style: Optional[FullTextStyle] = None, + autoscale: bool = False, +) -> None: + """Apply markdown formatting to a PowerPoint text frame. + + Args: + markdown_text: The markdown text to convert + text_frame: The PowerPoint text frame to write to + base_style: Optional base text style (from gslides-api TextStyle). + NOTE: Only RichStyle properties (font_family, font_size, color, underline) + are applied from base_style. Markdown-renderable properties (bold, italic) + should come from the markdown content itself (e.g., **bold**, *italic*). + autoscale: Whether to enable PowerPoint autoscaling + + Note: + This function clears the existing content of the text frame before writing. + """ + # Parse markdown to IR using shared parser + ir_doc = parse_markdown_to_ir(markdown_text, base_style=base_style) + + # Capture original font size before clearing (for autoscale cap) + # Autoscaling should only decrease font size, never increase it + original_max_font_size = None + if autoscale: + original_max_font_size = _get_max_font_size_from_textframe(text_frame) + # Fall back to base_style font size if no font found in text frame + if original_max_font_size is None and base_style and base_style.rich.font_size_pt: + original_max_font_size = base_style.rich.font_size_pt + + # Preserve bodyPr insets before clearing (they may be reset by clear()) + preserved_insets = _preserve_bodypr_insets(text_frame) + + # Preserve paragraph styles (first para + bullet styles) before clearing + preserved_styles = _preserve_paragraph_properties(text_frame) + + # Clear existing content + text_frame.clear() + + # Restore bodyPr insets after clearing + _restore_bodypr_insets(text_frame, preserved_insets) + + # Enable word wrap to ensure text wraps to box width + text_frame.word_wrap = True + + # Convert IR to PowerPoint operations (writes the text content) + _apply_ir_to_textframe(ir_doc, text_frame, base_style, preserved_styles) + + # Apply autoscaling AFTER text is written - fit_text() calculates + # the best font size based on actual text content and shape dimensions. + # Note: MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE only sets a flag that PowerPoint's + # rendering engine applies when you edit the text - it doesn't work on file open. + # fit_text() directly calculates and sets the font size, working immediately. + if autoscale: + # Get font family from first paragraph/run + font_family = "Calibri" # Default + if text_frame.paragraphs and text_frame.paragraphs[0].runs: + first_run = text_frame.paragraphs[0].runs[0] + if first_run.font.name: + font_family = first_run.font.name + + try: + # STEP 1: Save per-run bold/italic state before fit_text overwrites them + # fit_text() calls _set_font() which sets bold/italic on ALL runs, + # so we need to save and restore individual run styles. + saved_styles: list[list[tuple[bool | None, bool | None]]] = [] + for para in text_frame.paragraphs: + para_styles: list[tuple[bool | None, bool | None]] = [] + for run in para.runs: + para_styles.append((run.font.bold, run.font.italic)) + saved_styles.append(para_styles) + + # STEP 2: Call fit_text to calculate and apply best font size + # Use bold=False, italic=False for font file lookup (sizing only) + font_file = _find_font_file(font_family, bold=False, italic=False) + + # Use original font size as cap to prevent increasing font size + # Autoscaling should only decrease, never increase + # Fall back to 18pt if we couldn't determine original size + max_size = original_max_font_size if original_max_font_size is not None else 18 + + text_frame.fit_text( + font_family=font_family, + max_size=max_size, + bold=False, # Doesn't matter - we restore after + italic=False, # Doesn't matter - we restore after + font_file=font_file, + ) + + # STEP 3: Restore per-run bold/italic state that fit_text overwrote + for para_idx, para in enumerate(text_frame.paragraphs): + if para_idx < len(saved_styles): + for run_idx, run in enumerate(para.runs): + if run_idx < len(saved_styles[para_idx]): + saved_bold, saved_italic = saved_styles[para_idx][run_idx] + if saved_bold is not None: + run.font.bold = saved_bold + if saved_italic is not None: + run.font.italic = saved_italic + + except Exception as e: + # If fit_text fails (e.g., font not found), fall back to setting the flag + # The flag at least preserves the intent for when file is edited in PowerPoint + logger.warning(f"fit_text() failed, falling back to auto_size flag: {e}") + text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE + + +def _apply_ir_to_textframe( + doc_ir: FormattedDocument, + text_frame: TextFrame, + base_style: Optional[FullTextStyle] = None, + preserved_styles: Optional[PreservedParagraphStyles] = None, +) -> None: + """Convert IR to PowerPoint text frame operations. + + Args: + doc_ir: The intermediate representation document + text_frame: The PowerPoint text frame to write to + base_style: Optional base text style + preserved_styles: Optional PreservedParagraphStyles with first_para_style + and bullet_style from the original template + """ + # Start with the first paragraph (text_frame always has one) + first_para = True + output_para_index = 0 # Track output paragraph index for style selection + + for element in doc_ir.elements: + if isinstance(element, FormattedParagraph): + # Add paragraph (reuse first paragraph, create new ones after) + if first_para: + p = text_frame.paragraphs[0] + first_para = False + else: + p = text_frame.add_paragraph() + + # Add runs to the paragraph (handling soft line breaks properly) + for run_ir in element.runs: + _add_run_with_soft_breaks(p, run_ir.content, run_ir.style) + + # Apply preserved spacing properties based on output paragraph index + # First paragraph uses first_para_style, rest use bullet_style + if preserved_styles is not None: + if output_para_index == 0: + style = preserved_styles.first_para_style + else: + style = preserved_styles.bullet_style + _apply_paragraph_spacing(p, style) + + output_para_index += 1 + + elif isinstance(element, FormattedList): + # Add list items as paragraphs with bullet formatting + for item in element.items: + for para in item.paragraphs: + # Create paragraph + if first_para: + p = text_frame.paragraphs[0] + first_para = False + else: + p = text_frame.add_paragraph() + + # Set indentation level + p.level = item.nesting_level + + # Enable bullet formatting via XML with proper spacing + # Note: Setting p.level alone only sets indentation, not bullet markers + # All list items use bullet_style + bullet_props = preserved_styles.bullet_style if preserved_styles else None + _enable_paragraph_bullets( + p, + level=item.nesting_level, + preserved_props=bullet_props, + ) + + # Add runs to the paragraph (handling soft line breaks properly) + for run_ir in para.runs: + _add_run_with_soft_breaks(p, run_ir.content, run_ir.style) + + output_para_index += 1 + + +def _set_space_before(pPr, val: str) -> None: + """Set the space-before (spcBef) property on a paragraph properties element. + + Args: + pPr: The paragraph properties XML element (<a:pPr>) + val: The space before value in 100ths of a point (e.g., "900" = 9pt) + """ + # Remove existing spcBef if present + existing = pPr.find(f"{{{_DRAWINGML_NS}}}spcBef") + if existing is not None: + pPr.remove(existing) + + # Create spcBef with spcPts child: <a:spcBef><a:spcPts val="900"/></a:spcBef> + spcBef = etree.SubElement(pPr, f"{{{_DRAWINGML_NS}}}spcBef") + spcPts = etree.SubElement(spcBef, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", val) + + +def _set_space_after(pPr, val: str) -> None: + """Set the space-after (spcAft) property on a paragraph properties element. + + Args: + pPr: The paragraph properties XML element (<a:pPr>) + val: The space after value in 100ths of a point (e.g., "0" = 0pt) + """ + # Remove existing spcAft if present + existing = pPr.find(f"{{{_DRAWINGML_NS}}}spcAft") + if existing is not None: + pPr.remove(existing) + + # Create spcAft with spcPts child: <a:spcAft><a:spcPts val="0"/></a:spcAft> + spcAft = etree.SubElement(pPr, f"{{{_DRAWINGML_NS}}}spcAft") + spcPts = etree.SubElement(spcAft, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", val) + + +def _set_line_spacing(pPr, spacing: SpacingValue) -> None: + """Set the line spacing (lnSpc) property on a paragraph properties element. + + Args: + pPr: The paragraph properties XML element (<a:pPr>) + spacing: SpacingValue with either points or percentage + """ + # Remove existing lnSpc if present + existing = pPr.find(f"{{{_DRAWINGML_NS}}}lnSpc") + if existing is not None: + pPr.remove(existing) + + # Create lnSpc with either spcPct or spcPts child + lnSpc = etree.SubElement(pPr, f"{{{_DRAWINGML_NS}}}lnSpc") + if spacing.percentage is not None: + # Use percentage: <a:lnSpc><a:spcPct val="110000"/></a:lnSpc> + spcPct = etree.SubElement(lnSpc, f"{{{_DRAWINGML_NS}}}spcPct") + spcPct.set("val", spacing.to_pptx_pct()) + elif spacing.points is not None: + # Use points: <a:lnSpc><a:spcPts val="1800"/></a:lnSpc> + spcPts = etree.SubElement(lnSpc, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", spacing.to_pptx_pts()) + + +def _apply_paragraph_spacing( + paragraph, + preserved_props: Optional[ParagraphStyle] = None, +) -> None: + """Apply preserved spacing properties to a paragraph. + + This applies line spacing, space-before, and space-after properties from a + ParagraphStyle to any paragraph (both regular and bullet paragraphs). + + IMPORTANT: XML element order in <a:pPr> affects PowerPoint rendering. + The correct order is: lnSpc, spcBef, spcAft, then bullet elements (buClr, buSzPts, buFont, buChar). + This function inserts spacing elements at the beginning of pPr to ensure correct order. + + Args: + paragraph: python-pptx paragraph object + preserved_props: Optional ParagraphStyle with preserved paragraph properties + from the original template (space_before, space_after, line_spacing) + """ + if preserved_props is None: + return + + pPr = paragraph._element.get_or_add_pPr() + + # Remove existing spacing elements (we'll re-add in correct order) + for tag in ["lnSpc", "spcBef", "spcAft"]: + existing = pPr.find(f"{{{_DRAWINGML_NS}}}{tag}") + if existing is not None: + pPr.remove(existing) + + # Insert spacing elements in REVERSE order at position 0 + # This results in correct order: lnSpc, spcBef, spcAft, [other elements] + + # 3. Space after (insert at position 0 first, will be pushed to position 2) + if preserved_props.space_after is not None: + space_val = preserved_props.space_after.to_pptx_pts() + spcAft = etree.Element(f"{{{_DRAWINGML_NS}}}spcAft") + spcPts = etree.SubElement(spcAft, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", space_val) + pPr.insert(0, spcAft) + + # 2. Space before (insert at position 0, pushes spcAft to position 1) + if preserved_props.space_before is not None: + space_val = preserved_props.space_before.to_pptx_pts() + if space_val != "0": + spcBef = etree.Element(f"{{{_DRAWINGML_NS}}}spcBef") + spcPts = etree.SubElement(spcBef, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", space_val) + pPr.insert(0, spcBef) + + # 1. Line spacing (insert at position 0, pushes others down) + if preserved_props.line_spacing is not None: + lnSpc = etree.Element(f"{{{_DRAWINGML_NS}}}lnSpc") + spacing = preserved_props.line_spacing + if spacing.percentage is not None: + spcPct = etree.SubElement(lnSpc, f"{{{_DRAWINGML_NS}}}spcPct") + spcPct.set("val", spacing.to_pptx_pct()) + elif spacing.points is not None: + spcPts = etree.SubElement(lnSpc, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", spacing.to_pptx_pts()) + pPr.insert(0, lnSpc) + + +def _enable_paragraph_bullets( + paragraph, + char: str = "•", + level: int = 0, + preserved_props: Optional[ParagraphStyle] = None, +) -> None: + """Enable bullet formatting for a paragraph via XML manipulation. + + In python-pptx, setting paragraph.level only sets indentation, it does NOT + enable bullet formatting. To actually show bullets, we need to add the + buChar (bullet character) XML element, remove any buNone element, and set + proper marL (left margin) and indent (first-line indent) for spacing. + + IMPORTANT: XML element order in <a:pPr> affects PowerPoint rendering. + The correct order is: lnSpc, spcBef, spcAft, then bullet elements (buClr, buSzPts, buFont, buChar). + This function applies spacing FIRST (which inserts at position 0), then appends bullet elements. + + Args: + paragraph: python-pptx paragraph object + char: Bullet character to use (default: •) + level: Nesting level for indentation (0 = first level) + preserved_props: Optional ParagraphStyle with preserved paragraph properties + from the original template (margins, indents, spacing) + """ + try: + # Get or create paragraph properties element + pPr = paragraph._element.get_or_add_pPr() + + # Remove buNone if it exists (it explicitly disables bullets) + buNone = pPr.find(f"{{{_DRAWINGML_NS}}}buNone") + if buNone is not None: + pPr.remove(buNone) + + # Set margin and indent for hanging bullet effect + # marL = total left margin (where text starts) + # indent = first-line offset (negative = hanging indent, pulls bullet left) + # Use preserved values from template if available, otherwise use defaults + if preserved_props and preserved_props.margin_left is not None and level == 0: + # Use preserved values for first-level bullets + margin_left = str(preserved_props.margin_left) + indent = ( + str(preserved_props.indent) if preserved_props.indent else str(-_BULLET_INDENT_EMU) + ) + else: + # Fall back to default values (or adjust based on level) + margin_left = str(_BULLET_INDENT_EMU * (level + 1)) + indent = str(-_BULLET_INDENT_EMU) + + pPr.set("marL", margin_left) + pPr.set("indent", indent) + + # Apply spacing properties FIRST (this inserts lnSpc, spcBef, spcAft at position 0) + # This ensures spacing elements come BEFORE bullet elements + _apply_paragraph_spacing(paragraph, preserved_props) + + # Remove existing bullet character if present (we'll re-add at correct position) + existing_buChar = pPr.find(f"{{{_DRAWINGML_NS}}}buChar") + if existing_buChar is not None: + pPr.remove(existing_buChar) + + # Append buChar at the END (after spacing elements) + # This ensures correct order: lnSpc, spcBef, spcAft, buChar + buChar = etree.Element(f"{{{_DRAWINGML_NS}}}buChar") + buChar.set("char", char) + pPr.append(buChar) + + logger.debug(f"Enabled bullet formatting for paragraph with char='{char}', level={level}") + except Exception as e: + logger.warning(f"Could not enable bullet formatting: {e}") + + +# Soft line break character (vertical tab) - used by PowerPoint for in-cell line breaks +_SOFT_LINE_BREAK = "\x0b" +# Escaped form that python-pptx produces when \x0b is assigned to run.text +_SOFT_LINE_BREAK_ESCAPED = "_x000B_" + + +def _add_run_with_soft_breaks(paragraph, content: str, style: FullTextStyle) -> None: + """Add run content to paragraph, handling soft line breaks properly. + + PowerPoint uses vertical tab (\\x0b) for soft line breaks within a paragraph. + When assigned directly to run.text, python-pptx escapes it to '_x000B_'. + This function splits on soft breaks and uses add_line_break() to create + proper <a:br> elements in the XML. + + Args: + paragraph: The python-pptx paragraph object + content: The text content (may contain \\x0b or '_x000B_') + style: The FullTextStyle to apply to the runs + """ + # Normalize: replace escaped form with actual character for consistent handling + normalized_content = content.replace(_SOFT_LINE_BREAK_ESCAPED, _SOFT_LINE_BREAK) + + # Split on soft line breaks + parts = normalized_content.split(_SOFT_LINE_BREAK) + + for i, part in enumerate(parts): + if part: # Skip empty parts + run = paragraph.add_run() + run.text = part + _apply_style_to_run(run, style) + + # Add line break after each part except the last + if i < len(parts) - 1: + paragraph.add_line_break() + + +def _apply_style_to_run(run, style: FullTextStyle) -> None: + """Apply FullTextStyle to a python-pptx run. + + Args: + run: The python-pptx run object + style: The FullTextStyle to apply + """ + if not style: + return + + # Apply markdown-renderable properties + md = style.markdown + + # CRITICAL: Always explicitly set bold/italic to avoid inheritance from defRPr + # When bold=True: set font.bold = True + # When bold=False: set font.bold = False (prevents inheritance from template) + run.font.bold = True if md.bold else False + run.font.italic = True if md.italic else False + + if md.strikethrough and hasattr(run.font, "strikethrough"): + run.font.strikethrough = True + + if md.hyperlink: + run.hyperlink.address = md.hyperlink + + # Apply rich properties + rich = style.rich + if rich.underline: + run.font.underline = True + + if rich.font_family: + run.font.name = rich.font_family + + if rich.font_size_pt: + run.font.size = Pt(rich.font_size_pt) + + if rich.foreground_color: + # Convert AbstractColor to RGBColor + rgb = rich.foreground_color.to_rgb_tuple() + run.font.color.rgb = RGBColor(*rgb) diff --git a/gslides_api/pptx/relationship_copier.py b/gslides_api/pptx/relationship_copier.py new file mode 100644 index 0000000..001fc1b --- /dev/null +++ b/gslides_api/pptx/relationship_copier.py @@ -0,0 +1,442 @@ +""" +Relationship copier for PowerPoint presentations. + +Handles copying of slide relationships including images, charts, hyperlinks, +and other embedded objects to ensure no broken references. +""" + +import io +import logging +import os +import tempfile +from typing import Any, Dict, List, Optional, Tuple + +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.opc.package import Part +from pptx.parts.chart import ChartPart +from pptx.parts.slide import SlidePart +from pptx.slide import Slide + +logger = logging.getLogger(__name__) + + +class RelationshipCopier: + """ + Manages copying of relationships between slides. + + Handles the complex task of copying images, charts, hyperlinks, and other + embedded objects while maintaining proper references and avoiding corruption. + """ + + def __init__(self): + """Initialize the relationship copier.""" + self.copied_relationships: Dict[str, str] = {} + self.temp_files: List[str] = [] + + def copy_slide_relationships( + self, + source_slide: Slide, + target_slide: Slide, + exclude_notes: bool = True + ) -> Dict[str, str]: + """ + Copy all relationships from source slide to target slide. + + Args: + source_slide: The slide to copy relationships from + target_slide: The slide to copy relationships to + exclude_notes: Whether to exclude notes slide relationships + + Returns: + Dictionary mapping old relationship IDs to new ones + """ + relationship_mapping = {} + + try: + if not hasattr(source_slide, 'part') or not hasattr(target_slide, 'part'): + logger.warning("Slides missing part attribute for relationship copying") + return relationship_mapping + + source_part = source_slide.part + target_part = target_slide.part + + # Copy each relationship + for rel_id, relationship in source_part.rels.items(): + try: + if exclude_notes and "notesSlide" in relationship.reltype: + logger.debug(f"Skipping notes slide relationship: {rel_id}") + continue + + # Skip slideLayout relationships - target slide already has its own layout + # Copying these causes duplicate layout files in the ZIP + if "slideLayout" in relationship.reltype: + logger.debug(f"Skipping slideLayout relationship: {rel_id}") + continue + + new_rel_id = self._copy_single_relationship( + relationship, + source_part, + target_part, + rel_id + ) + + if new_rel_id: + relationship_mapping[rel_id] = new_rel_id + logger.debug(f"Copied relationship {rel_id} -> {new_rel_id}") + + except Exception as e: + logger.warning(f"Failed to copy relationship {rel_id}: {e}") + continue + + except Exception as e: + logger.error(f"Error copying slide relationships: {e}") + + return relationship_mapping + + def _copy_single_relationship( + self, + relationship, + source_part: SlidePart, + target_part: SlidePart, + original_rel_id: str + ) -> Optional[str]: + """ + Copy a single relationship from source to target part. + + Args: + relationship: The relationship object to copy + source_part: Source slide part + target_part: Target slide part + original_rel_id: Original relationship ID + + Returns: + New relationship ID if successful, None otherwise + """ + try: + rel_type = relationship.reltype + target_part_obj = relationship._target + + # Handle different relationship types + if "image" in rel_type.lower(): + return self._copy_image_relationship( + relationship, source_part, target_part, original_rel_id + ) + elif "chart" in rel_type.lower(): + return self._copy_chart_relationship( + relationship, source_part, target_part, original_rel_id + ) + elif "hyperlink" in rel_type.lower(): + return self._copy_hyperlink_relationship( + relationship, source_part, target_part, original_rel_id + ) + else: + # Generic relationship copying + return self._copy_generic_relationship( + relationship, source_part, target_part, original_rel_id + ) + + except Exception as e: + logger.warning(f"Failed to copy relationship {original_rel_id}: {e}") + return None + + def _copy_image_relationship( + self, + relationship, + source_part: SlidePart, + target_part: SlidePart, + original_rel_id: str + ) -> Optional[str]: + """ + Copy an image relationship, including the image data. + + Args: + relationship: The image relationship to copy + source_part: Source slide part + target_part: Target slide part + original_rel_id: Original relationship ID + + Returns: + New relationship ID if successful, None otherwise + """ + try: + image_part = relationship._target + + if not hasattr(image_part, 'blob'): + logger.warning(f"Image part {original_rel_id} has no blob data") + return None + + # Get image data and wrap in seekable BytesIO stream + image_data = image_part.blob + image_stream = io.BytesIO(image_data) + + # Use the correct SlidePart API - returns (ImagePart, rId_string) + new_image_part, new_rel_id = target_part.get_or_add_image_part(image_stream) + + logger.debug(f"Copied image relationship {original_rel_id} -> {new_rel_id}") + return new_rel_id + + except Exception as e: + logger.error(f"Failed to copy image relationship {original_rel_id}: {e}") + return None + + def _copy_chart_relationship( + self, + relationship, + source_part: SlidePart, + target_part: SlidePart, + original_rel_id: str + ) -> Optional[str]: + """ + Copy a chart relationship with all embedded data. + + Charts consist of: + - Main chart XML (chartN.xml) + - Chart style XML (styleN.xml) + - Chart color style XML (colorsN.xml) + - Embedded Excel workbook (Microsoft_Excel_Worksheet.xlsx) + + Args: + relationship: The chart relationship to copy + source_part: Source slide part + target_part: Target slide part + original_rel_id: Original relationship ID + + Returns: + New relationship ID if successful, None otherwise + """ + try: + source_chart_part = relationship._target + package = target_part.package + + # 1. Generate new partname for the chart + new_chart_partname = package.next_partname('/ppt/charts/chart%d.xml') + + # 2. Create new ChartPart with copied blob + new_chart_part = ChartPart.load( + partname=new_chart_partname, + content_type=source_chart_part.content_type, + package=package, + blob=source_chart_part.blob + ) + + # 3. Copy chart's sub-relationships (style, colors, Excel) + for sub_rel_id, sub_rel in source_chart_part.rels.items(): + sub_target = sub_rel._target + if not hasattr(sub_target, 'blob'): + logger.debug(f"Skipping sub-relationship {sub_rel_id} without blob") + continue + + # Generate appropriate partname based on relationship type + if 'chartStyle' in sub_rel.reltype: + new_sub_partname = package.next_partname('/ppt/charts/style%d.xml') + elif 'chartColorStyle' in sub_rel.reltype: + new_sub_partname = package.next_partname('/ppt/charts/colors%d.xml') + elif 'package' in sub_rel.reltype: + # Embedded Excel - use embeddings folder + new_sub_partname = package.next_partname( + '/ppt/embeddings/Microsoft_Excel_Worksheet%d.xlsx' + ) + else: + logger.warning(f"Unknown chart sub-relationship type: {sub_rel.reltype}") + continue + + # Create new sub-part + new_sub_part = Part.load( + partname=new_sub_partname, + content_type=sub_target.content_type, + package=package, + blob=sub_target.blob + ) + + # Relate new chart to new sub-part + new_chart_part.relate_to(new_sub_part, sub_rel.reltype) + logger.debug( + f"Copied chart sub-relationship: {sub_rel.reltype} -> {new_sub_partname}" + ) + + # 4. Create relationship from target slide to new chart + new_rel_id = target_part.relate_to(new_chart_part, relationship.reltype) + + logger.info( + f"Successfully copied chart relationship {original_rel_id} -> {new_rel_id} " + f"({source_chart_part.partname} -> {new_chart_partname})" + ) + return new_rel_id + + except Exception as e: + logger.error(f"Failed to copy chart relationship {original_rel_id}: {e}") + return None + + def _copy_hyperlink_relationship( + self, + relationship, + source_part: SlidePart, + target_part: SlidePart, + original_rel_id: str + ) -> Optional[str]: + """ + Copy a hyperlink relationship. + + Args: + relationship: The hyperlink relationship to copy + source_part: Source slide part + target_part: Target slide part + original_rel_id: Original relationship ID + + Returns: + New relationship ID if successful, None otherwise + """ + try: + # Get the target URL + if hasattr(relationship, '_target_ref'): + target_url = relationship._target_ref + else: + target_url = str(relationship._target) + + # Create new hyperlink relationship + new_rel_id = target_part.relate_to(target_url, relationship.reltype) + + logger.debug(f"Copied hyperlink relationship {original_rel_id} -> {new_rel_id}") + return new_rel_id + + except Exception as e: + logger.error(f"Failed to copy hyperlink relationship {original_rel_id}: {e}") + return None + + def _copy_generic_relationship( + self, + relationship, + source_part: SlidePart, + target_part: SlidePart, + original_rel_id: str + ) -> Optional[str]: + """ + Copy a generic relationship. + + Args: + relationship: The relationship to copy + source_part: Source slide part + target_part: Target slide part + original_rel_id: Original relationship ID + + Returns: + New relationship ID if successful, None otherwise + """ + try: + # For generic relationships, try to create a relationship to the same target + target_obj = relationship._target + rel_type = relationship.reltype + + new_rel_id = target_part.relate_to(target_obj, rel_type) + + logger.debug(f"Copied generic relationship {original_rel_id} -> {new_rel_id}") + return new_rel_id + + except Exception as e: + logger.warning(f"Failed to copy generic relationship {original_rel_id}: {e}") + return None + + def update_relationship_references( + self, + slide_element, + relationship_mapping: Dict[str, str] + ) -> bool: + """ + Update relationship references in slide XML elements. + + Args: + slide_element: The slide XML element to update + relationship_mapping: Mapping of old rel IDs to new rel IDs + + Returns: + True if successful, False otherwise + """ + try: + if not slide_element or not relationship_mapping: + return False + + # Find all r:id attributes in the XML + nsmap = {'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'} + rel_id_elements = slide_element.xpath('.//@r:id', namespaces=nsmap) + + updated_count = 0 + for element in rel_id_elements: + if hasattr(element, 'getparent'): + parent = element.getparent() + old_rel_id = parent.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id') + + if old_rel_id in relationship_mapping: + new_rel_id = relationship_mapping[old_rel_id] + parent.set( + '{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id', + new_rel_id + ) + updated_count += 1 + + logger.debug(f"Updated {updated_count} relationship references") + return True + + except Exception as e: + logger.error(f"Failed to update relationship references: {e}") + return False + + def copy_notes_slide_relationships( + self, + source_slide: Slide, + target_slide: Slide + ) -> bool: + """ + Copy notes slide relationships if present. + + Args: + source_slide: Source slide with notes + target_slide: Target slide to copy notes to + + Returns: + True if successful, False otherwise + """ + try: + if not source_slide.has_notes_slide: + return True # No notes to copy + + source_notes = source_slide.notes_slide + target_notes = target_slide.notes_slide + + if not source_notes.notes_text_frame or not target_notes.notes_text_frame: + return True # No text frames to copy + + # Copy the text content + source_text = source_notes.notes_text_frame.text + if source_text.strip(): + target_notes.notes_text_frame.text = source_text + + logger.debug("Copied notes slide content") + return True + + except Exception as e: + logger.warning(f"Failed to copy notes slide relationships: {e}") + return False + + def cleanup(self): + """Clean up temporary files created during copying.""" + for temp_file in self.temp_files: + try: + if os.path.exists(temp_file): + os.unlink(temp_file) + except Exception as e: + logger.warning(f"Failed to clean up temp file {temp_file}: {e}") + + self.temp_files.clear() + self.copied_relationships.clear() + + def get_relationship_stats(self) -> Dict[str, int]: + """ + Get statistics about copied relationships. + + Returns: + Dictionary with relationship copying statistics + """ + return { + 'copied_relationships': len(self.copied_relationships), + 'temp_files_created': len(self.temp_files), + } \ No newline at end of file diff --git a/gslides_api/pptx/shape_copier.py b/gslides_api/pptx/shape_copier.py new file mode 100644 index 0000000..f093da3 --- /dev/null +++ b/gslides_api/pptx/shape_copier.py @@ -0,0 +1,1071 @@ +""" +Shape copier for PowerPoint presentations. + +Handles copying of individual shapes including text boxes, images, tables, +and other elements while preserving formatting and relationships. +""" + +import logging +from copy import deepcopy +from typing import Dict, List, Optional, Any, Tuple + +from pptx.shapes.base import BaseShape +from pptx.shapes.picture import Picture +from pptx.shapes.graphfrm import GraphicFrame +from pptx.shapes.autoshape import Shape +from pptx.slide import Slide +from pptx.enum.shapes import MSO_SHAPE_TYPE +from pptx.util import Inches, Emu + +from .xml_utils import XmlUtils +from .id_manager import IdManager + +logger = logging.getLogger(__name__) + +# XML namespaces for shape properties +_DRAWINGML_NS = "http://schemas.openxmlformats.org/drawingml/2006/main" +_PRESENTATIONML_NS = "http://schemas.openxmlformats.org/presentationml/2006/main" + +# Fill element tags to copy +_FILL_TAGS = [ + f"{{{_DRAWINGML_NS}}}solidFill", + f"{{{_DRAWINGML_NS}}}gradFill", + f"{{{_DRAWINGML_NS}}}pattFill", + f"{{{_DRAWINGML_NS}}}blipFill", + f"{{{_DRAWINGML_NS}}}noFill", +] + +# Border element tags to copy for table cells +_BORDER_TAGS = [ + f"{{{_DRAWINGML_NS}}}lnL", # Left border + f"{{{_DRAWINGML_NS}}}lnR", # Right border + f"{{{_DRAWINGML_NS}}}lnT", # Top border + f"{{{_DRAWINGML_NS}}}lnB", # Bottom border + f"{{{_DRAWINGML_NS}}}lnTlToBr", # Diagonal top-left to bottom-right + f"{{{_DRAWINGML_NS}}}lnBlToTr", # Diagonal bottom-left to top-right +] + +# Table cell properties attributes to copy +_TCPR_ATTRS = [ + "marL", # Left margin + "marR", # Right margin + "marT", # Top margin + "marB", # Bottom margin + "anchor", # Vertical alignment + "anchorCtr", # Horizontal center anchor + "horzOverflow", # Horizontal overflow + "vert", # Text direction +] + +# bodyPr attributes to copy for text positioning +_BODYPR_ATTRS = [ + "anchor", # Vertical alignment (t, ctr, b) + "anchorCtr", # Horizontal center anchor + "lIns", # Left inset (internal margin) + "rIns", # Right inset + "tIns", # Top inset + "bIns", # Bottom inset + "wrap", # Text wrapping mode + "rtlCol", # Right-to-left column mode + "rot", # Text rotation + "spcFirstLastPara", # Honor spcBef/spcAft on first/last paragraph +] + + +class ShapeCopier: + """ + Handles copying of individual shapes between slides. + + Provides robust copying of different shape types while maintaining + formatting and handling edge cases that could cause corruption. + """ + + def __init__(self, id_manager: IdManager): + """ + Initialize shape copier. + + Args: + id_manager: ID manager for generating unique IDs + """ + self.id_manager = id_manager + self.copied_shapes: List[Dict[str, Any]] = [] + + def copy_shape( + self, + source_shape: BaseShape, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """ + Copy a shape from source to target slide. + + Args: + source_shape: The shape to copy + target_slide: The slide to copy the shape to + position_offset: Optional (x, y) offset for positioning + relationship_mapping: Optional mapping of old relationship IDs to new ones. + Required for proper image handling in GROUP shapes and generic XML copying. + + Returns: + The newly created shape, or None if copying failed + """ + try: + shape_type = source_shape.shape_type + logger.debug(f"Copying shape type: {shape_type}") + + # Dispatch to appropriate copying method based on shape type + if shape_type == MSO_SHAPE_TYPE.TEXT_BOX or ( + shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE and hasattr(source_shape, 'text_frame') + ): + return self._copy_text_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + elif shape_type == MSO_SHAPE_TYPE.PICTURE: + return self._copy_image_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + elif shape_type == MSO_SHAPE_TYPE.TABLE: + return self._copy_table_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + elif shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE: + return self._copy_auto_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + elif shape_type == MSO_SHAPE_TYPE.PLACEHOLDER: + return self._copy_placeholder_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + elif shape_type == MSO_SHAPE_TYPE.GROUP: + return self._copy_group_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + elif shape_type == MSO_SHAPE_TYPE.FREEFORM: + return self._copy_freeform_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + else: + logger.warning(f"Unsupported shape type for copying: {shape_type}") + return self._copy_generic_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + + except Exception as e: + logger.error(f"Failed to copy shape: {e}") + return None + + def _copy_text_shape( + self, + source_shape: BaseShape, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """Copy a text box or text-containing auto shape.""" + try: + # Get position and size + left, top, width, height = self._get_shape_geometry(source_shape, position_offset) + + # Create new text box + text_box = target_slide.shapes.add_textbox(left, top, width, height) + + # Copy text content and formatting + if hasattr(source_shape, 'text_frame') and source_shape.text_frame: + self._copy_text_frame(source_shape.text_frame, text_box.text_frame) + + # Copy other shape properties + self._copy_shape_properties(source_shape, text_box, relationship_mapping) + + logger.debug("Successfully copied text shape") + return text_box + + except Exception as e: + logger.error(f"Failed to copy text shape: {e}") + return None + + def _copy_image_shape( + self, + source_shape: Picture, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """ + Copy an image shape preserving all blipFill properties. + + Uses XML-level copying to preserve critical properties that are lost + when using python-pptx's add_picture(): + - srcRect (crop settings) + - rotWithShape attribute + - alphaModFix and other blip effects + - stretch mode settings + + Args: + source_shape: The PICTURE shape to copy + target_slide: The slide to copy the shape to + position_offset: Optional (x, y) offset for positioning + relationship_mapping: Mapping of old relationship IDs to new ones + + Returns: + None (PICTURE shapes are copied via XML and don't return a shape object) + """ + try: + logger.debug("Copying PICTURE shape via XML to preserve blipFill properties") + + # Generate new IDs + new_shape_id = self.id_manager.generate_unique_shape_id() + new_creation_id = self.id_manager.generate_unique_creation_id() + + # Copy XML element with relationship remapping + new_element = XmlUtils.copy_shape_element( + source_shape, + new_shape_id, + new_creation_id, + relationship_mapping=relationship_mapping + ) + + if new_element is not None: + # Apply position offset if specified + if position_offset: + self._apply_position_offset_to_element(new_element, position_offset) + + # Insert into target slide + target_slide.shapes._spTree.insert_element_before(new_element, 'p:extLst') + logger.debug("Successfully copied PICTURE shape via XML") + return None # XML-copied shapes don't return a shape object + + return None + + except Exception as e: + logger.error(f"Failed to copy image shape: {e}") + return None + + def _copy_table_shape( + self, + source_shape: GraphicFrame, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """Copy a table shape.""" + try: + if not hasattr(source_shape, 'table'): + logger.warning("Source shape is not a table") + return None + + source_table = source_shape.table + rows = len(source_table.rows) + cols = len(source_table.columns) + + # Get position and size + left, top, width, height = self._get_shape_geometry(source_shape, position_offset) + + # Create new table + table_shape = target_slide.shapes.add_table(rows, cols, left, top, width, height) + target_table = table_shape.table + + # Copy table-level style properties (first_row, horz_banding, etc.) + # This prevents default PowerPoint styling from overriding the source table's look + self._copy_table_style_properties(source_table, target_table) + + # Copy table content with formatting preserved + for row_idx in range(rows): + for col_idx in range(cols): + if row_idx < len(source_table.rows) and col_idx < len(source_table.columns): + source_cell = source_table.cell(row_idx, col_idx) + target_cell = target_table.cell(row_idx, col_idx) + + # Copy cell text WITH formatting (not just plain text) + self._copy_text_frame( + source_cell.text_frame, target_cell.text_frame + ) + + # Copy cell fill/background if present + try: + self._copy_cell_fill(source_cell, target_cell) + except Exception: + pass + + # Copy column widths + for col_idx in range(min(cols, len(source_table.columns))): + if col_idx < len(target_table.columns): + target_table.columns[col_idx].width = source_table.columns[col_idx].width + + # Copy row heights + for row_idx in range(min(rows, len(source_table.rows))): + if row_idx < len(target_table.rows): + target_table.rows[row_idx].height = source_table.rows[row_idx].height + + # Copy other shape properties + self._copy_shape_properties(source_shape, table_shape, relationship_mapping) + + logger.debug("Successfully copied table shape") + return table_shape + + except Exception as e: + logger.error(f"Failed to copy table shape: {e}") + return None + + def _copy_auto_shape( + self, + source_shape: Shape, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """Copy an auto shape (geometric shape).""" + try: + # Get position and size + left, top, width, height = self._get_shape_geometry(source_shape, position_offset) + + # Get auto shape type + if hasattr(source_shape, 'auto_shape_type'): + auto_shape_type = source_shape.auto_shape_type + else: + # Default to rectangle if we can't determine the type + from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE + auto_shape_type = MSO_AUTO_SHAPE_TYPE.RECTANGLE + + # Create new auto shape + auto_shape = target_slide.shapes.add_shape(auto_shape_type, left, top, width, height) + + # Copy text if present + if hasattr(source_shape, 'text_frame') and source_shape.text_frame: + self._copy_text_frame(source_shape.text_frame, auto_shape.text_frame) + + # Copy other shape properties + self._copy_shape_properties(source_shape, auto_shape, relationship_mapping) + + logger.debug("Successfully copied auto shape") + return auto_shape + + except Exception as e: + logger.error(f"Failed to copy auto shape: {e}") + return None + + def _copy_placeholder_shape( + self, + source_shape: BaseShape, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """Copy a placeholder shape.""" + try: + # Placeholder copying is complex because they're tied to slide layouts + # For now, we'll convert placeholders to regular text boxes + if hasattr(source_shape, 'text_frame') and source_shape.text_frame: + return self._copy_text_shape( + source_shape, target_slide, position_offset, relationship_mapping + ) + else: + logger.warning("Placeholder shape has no text frame, skipping") + return None + + except Exception as e: + logger.error(f"Failed to copy placeholder shape: {e}") + return None + + def _copy_group_shape( + self, + source_shape: BaseShape, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """ + Copy a GROUP shape including all nested shapes. + + GROUP shapes (device mockups, etc.) contain nested shapes that may include + images with r:embed references. We use XML-level copying with proper + relationship remapping to preserve all nested content. + + Args: + source_shape: The GROUP shape to copy + target_slide: The slide to copy the shape to + position_offset: Optional (x, y) offset for positioning + relationship_mapping: Mapping of old relationship IDs to new ones + + Returns: + None (GROUP shapes are copied via XML and don't return a shape object) + """ + try: + logger.debug(f"Copying GROUP shape with {len(list(source_shape.shapes))} nested shapes") + + # Generate new IDs for the group shape itself + new_shape_id = self.id_manager.generate_unique_shape_id() + new_creation_id = self.id_manager.generate_unique_creation_id() + + # Copy XML element with relationship remapping + new_element = XmlUtils.copy_shape_element( + source_shape, + new_shape_id, + new_creation_id, + relationship_mapping=relationship_mapping + ) + + if new_element is not None: + # Regenerate ALL nested shape IDs to avoid conflicts + # GROUP shapes can have deeply nested structures + cnv_pr_elements = new_element.xpath( + './/p:cNvPr', namespaces=XmlUtils.NAMESPACES + ) + for cnv_pr in cnv_pr_elements: + nested_id = self.id_manager.generate_unique_shape_id() + cnv_pr.set('id', str(nested_id)) + + # Insert into target slide + target_slide.shapes._spTree.insert_element_before(new_element, 'p:extLst') + logger.debug(f"Successfully copied GROUP shape with {len(cnv_pr_elements)} nested elements") + return None # We can't return the actual shape object for XML-copied shapes + + return None + + except Exception as e: + logger.error(f"Failed to copy GROUP shape: {e}") + return None + + def _copy_freeform_shape( + self, + source_shape: BaseShape, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """ + Copy a FREEFORM shape (custom geometry paths). + + FREEFORM shapes use custom geometry (a:custGeom) and may have + image fills or other relationships that need remapping. + + Args: + source_shape: The FREEFORM shape to copy + target_slide: The slide to copy the shape to + position_offset: Optional (x, y) offset for positioning + relationship_mapping: Mapping of old relationship IDs to new ones + + Returns: + None (FREEFORM shapes are copied via XML and don't return a shape object) + """ + try: + logger.debug("Copying FREEFORM shape via XML") + + # Generate new IDs + new_shape_id = self.id_manager.generate_unique_shape_id() + new_creation_id = self.id_manager.generate_unique_creation_id() + + # Copy XML element with relationship remapping + new_element = XmlUtils.copy_shape_element( + source_shape, + new_shape_id, + new_creation_id, + relationship_mapping=relationship_mapping + ) + + if new_element is not None: + # Insert into target slide + target_slide.shapes._spTree.insert_element_before(new_element, 'p:extLst') + logger.debug("Successfully copied FREEFORM shape via XML") + return None # We can't return the actual shape object for XML-copied shapes + + return None + + except Exception as e: + logger.error(f"Failed to copy FREEFORM shape: {e}") + return None + + def _copy_generic_shape( + self, + source_shape: BaseShape, + target_slide: Slide, + position_offset: Optional[Tuple[float, float]] = None, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[BaseShape]: + """Copy a generic shape using XML manipulation.""" + try: + # This is a fallback for unsupported shape types + # We'll use XML copying as a last resort + logger.warning(f"Using generic XML copying for shape type: {source_shape.shape_type}") + + # Generate new IDs + new_shape_id = self.id_manager.generate_unique_shape_id() + new_creation_id = self.id_manager.generate_unique_creation_id() + + # Copy XML element with relationship remapping + new_element = XmlUtils.copy_shape_element( + source_shape, + new_shape_id, + new_creation_id, + relationship_mapping=relationship_mapping + ) + + if new_element is not None: + # Insert into target slide + target_slide.shapes._spTree.insert_element_before(new_element, 'p:extLst') + logger.debug("Successfully copied generic shape via XML") + return None # We can't return the actual shape object in this case + + return None + + except Exception as e: + logger.error(f"Failed to copy generic shape: {e}") + return None + + def _get_shape_geometry( + self, + shape: BaseShape, + position_offset: Optional[Tuple[float, float]] = None + ) -> Tuple[int, int, int, int]: + """Get shape geometry (left, top, width, height) with optional offset.""" + left = shape.left + top = shape.top + width = shape.width + height = shape.height + + if position_offset: + offset_x, offset_y = position_offset + left += Inches(offset_x) + top += Inches(offset_y) + + return left, top, width, height + + def _apply_position_offset_to_element( + self, + element, + position_offset: Tuple[float, float] + ): + """ + Apply position offset to an XML element's xfrm (transform). + + Args: + element: The XML element containing an a:xfrm child + position_offset: (x, y) offset in inches to apply + """ + try: + offset_x, offset_y = position_offset + offset_x_emu = int(Inches(offset_x)) + offset_y_emu = int(Inches(offset_y)) + + # Find the xfrm element (could be a:xfrm or p:xfrm) + xfrm = element.find(f".//{{{_DRAWINGML_NS}}}xfrm") + if xfrm is None: + xfrm = element.find(f".//{{{_PRESENTATIONML_NS}}}xfrm") + + if xfrm is not None: + off = xfrm.find(f"{{{_DRAWINGML_NS}}}off") + if off is not None: + current_x = int(off.get('x', 0)) + current_y = int(off.get('y', 0)) + off.set('x', str(current_x + offset_x_emu)) + off.set('y', str(current_y + offset_y_emu)) + logger.debug( + f"Applied position offset: ({offset_x}, {offset_y}) inches" + ) + except Exception as e: + logger.warning(f"Could not apply position offset: {e}") + + def _copy_text_frame(self, source_frame, target_frame): + """Copy text frame content and formatting.""" + try: + # Clear existing content + target_frame.clear() + + # Copy text frame-level properties + # These control text wrapping and auto-sizing behavior + if source_frame.word_wrap is not None: + target_frame.word_wrap = source_frame.word_wrap + + # Copy auto_size property + if hasattr(source_frame, 'auto_size') and source_frame.auto_size is not None: + target_frame.auto_size = source_frame.auto_size + + # Copy margin properties if available + for margin in ['margin_top', 'margin_bottom', 'margin_left', 'margin_right']: + if hasattr(source_frame, margin): + source_value = getattr(source_frame, margin) + if source_value is not None: + setattr(target_frame, margin, source_value) + + # Copy vertical alignment (anchor) via XML + self._copy_text_frame_anchor(source_frame, target_frame) + + # Copy paragraphs + for para_idx, source_para in enumerate(source_frame.paragraphs): + if para_idx == 0: + # Use existing first paragraph + target_para = target_frame.paragraphs[0] + else: + # Add new paragraph + target_para = target_frame.add_paragraph() + + # Copy paragraph properties + target_para.level = source_para.level + if hasattr(source_para, 'alignment'): + target_para.alignment = source_para.alignment + + # Copy line spacing properties + try: + if source_para.line_spacing is not None: + target_para.line_spacing = source_para.line_spacing + if source_para.space_before is not None: + target_para.space_before = source_para.space_before + if source_para.space_after is not None: + target_para.space_after = source_para.space_after + except Exception as e: + logger.debug(f"Could not copy line spacing: {e}") + + # Copy XML-level paragraph properties (marL, indent, spcBef, lnSpc, buChar) + # These are not handled by python-pptx's paragraph properties + self._copy_paragraph_xml_properties(source_para, target_para) + + # Copy runs + for run_idx, source_run in enumerate(source_para.runs): + if run_idx == 0 and target_para.runs: + # Use existing first run + target_run = target_para.runs[0] + else: + # Add new run + target_run = target_para.add_run() + + # Copy text and basic formatting + target_run.text = source_run.text + + # Copy XML-level run properties (bold, italic, color, font family, etc.) + # This is needed because python-pptx doesn't expose all XML attributes reliably + self._copy_run_xml_properties(source_run, target_run) + + if hasattr(source_run, 'font') and hasattr(target_run, 'font'): + try: + # Copy basic font properties + if source_run.font.bold is not None: + target_run.font.bold = source_run.font.bold + if source_run.font.italic is not None: + target_run.font.italic = source_run.font.italic + if source_run.font.size is not None: + target_run.font.size = source_run.font.size + if source_run.font.name is not None: + target_run.font.name = source_run.font.name + + # Copy underline + if source_run.font.underline is not None: + target_run.font.underline = source_run.font.underline + + # Copy font color (RGB or theme) + try: + if source_run.font.color.rgb is not None: + target_run.font.color.rgb = source_run.font.color.rgb + elif source_run.font.color.theme_color is not None: + target_run.font.color.theme_color = source_run.font.color.theme_color + except Exception: + pass # Color copying can fail for various reasons + + except Exception as e: + logger.debug(f"Could not copy font properties: {e}") + + except Exception as e: + logger.warning(f"Failed to copy text frame formatting: {e}") + # At least copy the basic text + try: + target_frame.text = source_frame.text + except Exception: + pass + + def _copy_paragraph_xml_properties(self, source_para, target_para): + """Copy XML-level paragraph properties that python-pptx doesn't handle. + + This copies paragraph properties that are stored as XML attributes/elements + but not exposed through python-pptx's paragraph API: + - marL: left margin for bullet indentation + - indent: first-line indent (typically negative for hanging indent) + - spcBef: space before paragraph + - lnSpc: line spacing + - buChar: bullet character + """ + try: + source_pPr = source_para._element.find(f"{{{_DRAWINGML_NS}}}pPr") + if source_pPr is None: + return + + target_pPr = target_para._element.get_or_add_pPr() + + # Copy marL (left margin) attribute - critical for bullet indentation + if source_pPr.get("marL"): + target_pPr.set("marL", source_pPr.get("marL")) + + # Copy indent (first-line indent) attribute - typically negative for hanging indent + if source_pPr.get("indent"): + target_pPr.set("indent", source_pPr.get("indent")) + + # Copy spcBef element (space before paragraph) + source_spcBef = source_pPr.find(f"{{{_DRAWINGML_NS}}}spcBef") + if source_spcBef is not None: + existing = target_pPr.find(f"{{{_DRAWINGML_NS}}}spcBef") + if existing is not None: + target_pPr.remove(existing) + target_pPr.append(deepcopy(source_spcBef)) + + # Copy lnSpc element (line spacing) + source_lnSpc = source_pPr.find(f"{{{_DRAWINGML_NS}}}lnSpc") + if source_lnSpc is not None: + existing = target_pPr.find(f"{{{_DRAWINGML_NS}}}lnSpc") + if existing is not None: + target_pPr.remove(existing) + target_pPr.append(deepcopy(source_lnSpc)) + + # Copy buChar element (bullet character) + source_buChar = source_pPr.find(f"{{{_DRAWINGML_NS}}}buChar") + if source_buChar is not None: + existing = target_pPr.find(f"{{{_DRAWINGML_NS}}}buChar") + if existing is not None: + target_pPr.remove(existing) + target_pPr.append(deepcopy(source_buChar)) + + except Exception as e: + logger.debug(f"Could not copy paragraph XML properties: {e}") + + def _copy_run_xml_properties(self, source_run, target_run): + """Copy XML-level run properties that python-pptx doesn't handle reliably. + + This copies run properties that are stored as XML attributes but may not be + correctly exposed through python-pptx's Font API due to inheritance rules: + - b: bold + - i: italic + - lang: language + - u: underline + - strike: strikethrough + - cap: capitalization + - sz: font size + """ + try: + source_rPr = source_run._r.find(f"{{{_DRAWINGML_NS}}}rPr") + if source_rPr is None: + return + + # Get or create target rPr + target_rPr = target_run._r.find(f"{{{_DRAWINGML_NS}}}rPr") + if target_rPr is None: + # Create rPr element if it doesn't exist + from lxml import etree + target_rPr = etree.SubElement( + target_run._r, f"{{{_DRAWINGML_NS}}}rPr" + ) + # Move rPr to be first child (required position) + target_run._r.insert(0, target_rPr) + + # Copy key attributes + for attr in ['b', 'i', 'lang', 'u', 'strike', 'cap', 'sz']: + value = source_rPr.get(attr) + if value is not None: + target_rPr.set(attr, value) + + # Copy solidFill element (text color) + source_fill = source_rPr.find(f"{{{_DRAWINGML_NS}}}solidFill") + if source_fill is not None: + existing = target_rPr.find(f"{{{_DRAWINGML_NS}}}solidFill") + if existing is not None: + target_rPr.remove(existing) + target_rPr.append(deepcopy(source_fill)) + + # Copy latin font element (font family) + source_latin = source_rPr.find(f"{{{_DRAWINGML_NS}}}latin") + if source_latin is not None: + existing = target_rPr.find(f"{{{_DRAWINGML_NS}}}latin") + if existing is not None: + target_rPr.remove(existing) + target_rPr.append(deepcopy(source_latin)) + + # Copy ea (East Asian) font element + source_ea = source_rPr.find(f"{{{_DRAWINGML_NS}}}ea") + if source_ea is not None: + existing = target_rPr.find(f"{{{_DRAWINGML_NS}}}ea") + if existing is not None: + target_rPr.remove(existing) + target_rPr.append(deepcopy(source_ea)) + + # Copy cs (Complex Script) font element + source_cs = source_rPr.find(f"{{{_DRAWINGML_NS}}}cs") + if source_cs is not None: + existing = target_rPr.find(f"{{{_DRAWINGML_NS}}}cs") + if existing is not None: + target_rPr.remove(existing) + target_rPr.append(deepcopy(source_cs)) + + # Copy sym (Symbol) font element + source_sym = source_rPr.find(f"{{{_DRAWINGML_NS}}}sym") + if source_sym is not None: + existing = target_rPr.find(f"{{{_DRAWINGML_NS}}}sym") + if existing is not None: + target_rPr.remove(existing) + target_rPr.append(deepcopy(source_sym)) + + except Exception as e: + logger.debug(f"Could not copy run XML properties: {e}") + + def _copy_shape_properties( + self, + source_shape: BaseShape, + target_shape: BaseShape, + relationship_mapping: Optional[Dict[str, str]] = None + ): + """Copy basic shape properties like fill, line, etc. + + Args: + source_shape: The shape to copy properties from + target_shape: The shape to copy properties to + relationship_mapping: Optional mapping of old relationship IDs to new ones. + Required for proper handling of blipFill elements that reference images. + """ + try: + # Copy name/title if available + if hasattr(source_shape, 'name') and hasattr(target_shape, 'name'): + target_shape.name = source_shape.name + + # Copy rotation + if hasattr(source_shape, 'rotation') and hasattr(target_shape, 'rotation'): + target_shape.rotation = source_shape.rotation + + # Copy alt text (title attribute in p:cNvPr XML element) + # This is critical for element matching during export + try: + source_cnvpr = source_shape._element.xpath(".//p:cNvPr") + target_cnvpr = target_shape._element.xpath(".//p:cNvPr") + if source_cnvpr and target_cnvpr: + source_title = source_cnvpr[0].attrib.get("title") + if source_title is not None: # Copy even if empty string + target_cnvpr[0].attrib["title"] = source_title + logger.debug(f"Copied alt text: '{source_title}'") + else: + logger.debug(f"cnvpr elements not found: source={bool(source_cnvpr)}, target={bool(target_cnvpr)}") + except Exception as e: + logger.warning(f"Could not copy alt text: {e}", exc_info=True) + + # Copy fill properties (background colors, gradients, etc.) + self._copy_shape_fill(source_shape, target_shape, relationship_mapping) + + except Exception as e: + logger.debug(f"Could not copy all shape properties: {e}") + + def _copy_shape_fill( + self, + source_shape: BaseShape, + target_shape: BaseShape, + relationship_mapping: Optional[Dict[str, str]] = None + ): + """Copy fill properties (solidFill, gradFill, etc.) from source to target. + + This copies background colors, gradients, and other fill properties + that are defined in the spPr (shape properties) element. + + Args: + source_shape: The shape to copy fill from + target_shape: The shape to copy fill to + relationship_mapping: Optional mapping of old relationship IDs to new ones. + Required for proper handling of blipFill elements that reference images. + """ + try: + # Get spPr elements - may be under p:spPr or a:spPr depending on shape type + source_spPr = source_shape._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + if source_spPr is None: + source_spPr = source_shape._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + + target_spPr = target_shape._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + if target_spPr is None: + target_spPr = target_shape._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + + if source_spPr is None or target_spPr is None: + logger.debug("Could not find spPr elements for fill copying") + return + + # Find and copy fill element + for fill_tag in _FILL_TAGS: + source_fill = source_spPr.find(fill_tag) + if source_fill is not None: + # Remove existing fill from target + for tag in _FILL_TAGS: + existing = target_spPr.find(tag) + if existing is not None: + target_spPr.remove(existing) + + # Copy the fill element (deepcopy is safe for individual elements) + new_fill = deepcopy(source_fill) + + # Remap relationship IDs for blipFill elements that reference images + if relationship_mapping: + remapped = XmlUtils.remap_element_relationships( + new_fill, relationship_mapping + ) + if remapped > 0: + logger.debug( + f"Remapped {remapped} relationship(s) in fill element" + ) + + target_spPr.append(new_fill) + logger.debug(f"Copied fill element: {fill_tag}") + break # Only one fill type can be active + + except Exception as e: + logger.debug(f"Could not copy fill: {e}") + + def _copy_table_style_properties(self, source_table, target_table): + """Copy table-level style properties from source to target table. + + This copies boolean properties that control PowerPoint's built-in table styling: + - first_row: Header row styling (typically dark background) + - first_col: First column styling + - last_row: Last row styling (typically for totals) + - last_col: Last column styling + - horz_banding: Alternating row colors + - vert_banding: Alternating column colors + + Also copies/removes the tableStyleId to match the source table's theme styling. + """ + try: + # Copy boolean style properties + target_table.first_row = source_table.first_row + target_table.first_col = source_table.first_col + target_table.last_row = source_table.last_row + target_table.last_col = source_table.last_col + target_table.horz_banding = source_table.horz_banding + target_table.vert_banding = source_table.vert_banding + + # Copy the tableStyleId from source to target (or remove if source doesn't have one) + try: + source_tbl = source_table._tbl + target_tbl = target_table._tbl + + source_tblPr = source_tbl.tblPr + target_tblPr = target_tbl.tblPr + + if source_tblPr is not None and target_tblPr is not None: + # Find tableStyleId in source + ns = {"a": _DRAWINGML_NS} + source_style_id = source_tblPr.find(f"{{{_DRAWINGML_NS}}}tableStyleId") + target_style_id = target_tblPr.find(f"{{{_DRAWINGML_NS}}}tableStyleId") + + # Remove existing tableStyleId from target + if target_style_id is not None: + target_tblPr.remove(target_style_id) + + # Copy source tableStyleId if it exists + if source_style_id is not None: + new_style_id = deepcopy(source_style_id) + target_tblPr.append(new_style_id) + except Exception as e: + logger.debug(f"Could not copy tableStyleId: {e}") + + except Exception as e: + logger.debug(f"Could not copy table style properties: {e}") + + def _copy_cell_fill(self, source_cell, target_cell): + """Copy cell properties from source table cell to target table cell. + + This copies: + - Fill properties (solidFill, gradFill, etc.) + - Border properties (lnL, lnR, lnT, lnB) + - Cell attributes (margins, anchor, etc.) + """ + try: + # Table cell properties are in a:tc/a:tcPr + source_tc = source_cell._tc + target_tc = target_cell._tc + + source_tcPr = source_tc.find(f"{{{_DRAWINGML_NS}}}tcPr") + target_tcPr = target_tc.find(f"{{{_DRAWINGML_NS}}}tcPr") + + if source_tcPr is None: + return # No properties to copy + + # Create tcPr if it doesn't exist + if target_tcPr is None: + from lxml import etree + target_tcPr = etree.SubElement(target_tc, f"{{{_DRAWINGML_NS}}}tcPr") + + # Copy tcPr attributes (margins, anchor, etc.) + for attr in _TCPR_ATTRS: + source_val = source_tcPr.get(attr) + if source_val is not None: + target_tcPr.set(attr, source_val) + elif attr in target_tcPr.attrib: + # Remove attribute if source doesn't have it + del target_tcPr.attrib[attr] + + # Copy fill elements + for fill_tag in _FILL_TAGS: + source_fill = source_tcPr.find(fill_tag) + if source_fill is not None: + # Remove existing fill from target + for tag in _FILL_TAGS: + existing = target_tcPr.find(tag) + if existing is not None: + target_tcPr.remove(existing) + + # Copy the fill element + new_fill = deepcopy(source_fill) + target_tcPr.append(new_fill) + break # Only one fill type can be active + + # Copy border elements + for border_tag in _BORDER_TAGS: + source_border = source_tcPr.find(border_tag) + # Remove existing border from target first + existing = target_tcPr.find(border_tag) + if existing is not None: + target_tcPr.remove(existing) + + # Copy border if source has one + if source_border is not None: + new_border = deepcopy(source_border) + target_tcPr.append(new_border) + + except Exception as e: + logger.debug(f"Could not copy cell properties: {e}") + + def _copy_text_frame_anchor(self, source_frame, target_frame): + """Copy all bodyPr attributes from source to target text frame. + + bodyPr attributes control text positioning within the text frame: + - 'anchor' = vertical alignment (t=top, ctr=center, b=bottom) + - 'anchorCtr' = horizontal center anchor + - 'lIns', 'rIns', 'tIns', 'bIns' = internal margins (insets in EMUs) + - 'wrap' = text wrapping mode + - 'rot' = text rotation + + This property is stored in the bodyPr XML element and is not directly + exposed as a python-pptx property in all cases. + """ + try: + source_bodyPr = source_frame._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + target_bodyPr = target_frame._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + + if source_bodyPr is not None and target_bodyPr is not None: + # Copy all relevant bodyPr attributes + for attr in _BODYPR_ATTRS: + value = source_bodyPr.get(attr) + if value is not None: + target_bodyPr.set(attr, value) + logger.debug(f"Copied bodyPr.{attr}: {value}") + except Exception as e: + logger.debug(f"Could not copy text anchor: {e}") + + def get_copy_stats(self) -> Dict[str, int]: + """ + Get statistics about shape copying operations. + + Returns: + Dictionary with copying statistics + """ + return { + 'shapes_copied': len(self.copied_shapes), + } + + def clear_stats(self): + """Clear copying statistics.""" + self.copied_shapes.clear() \ No newline at end of file diff --git a/gslides_api/pptx/slide_copier.py b/gslides_api/pptx/slide_copier.py new file mode 100644 index 0000000..feaa2f8 --- /dev/null +++ b/gslides_api/pptx/slide_copier.py @@ -0,0 +1,456 @@ +""" +Slide copier manager for PowerPoint presentations. + +Main orchestration class that coordinates all aspects of slide copying +including ID management, XML manipulation, relationship copying, and shape copying. +""" + +import logging +from typing import Any, Dict, List, Optional, Tuple + +from pptx import Presentation +from pptx.enum.shapes import MSO_SHAPE_TYPE +from pptx.parts.slide import SlidePart +from pptx.slide import Slide + +from .id_manager import IdManager +from .relationship_copier import RelationshipCopier +from .shape_copier import ShapeCopier +from .xml_utils import XmlUtils + +logger = logging.getLogger(__name__) + + +def _remove_layout_placeholders(slide: Slide) -> None: + """Remove placeholder shapes that come from slide layouts. + + When slides.add_slide(layout) is called, python-pptx automatically + includes all placeholder shapes from that layout. These placeholders + show text like "Click to add title" or "Click to add text" which we + don't want in the exported presentation. + """ + shapes_to_remove = [] + for shape in slide.shapes: + if shape.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER: + shapes_to_remove.append(shape) + + for shape in shapes_to_remove: + sp = shape.element + sp.getparent().remove(sp) + + +class SlideCopierManager: + """ + Main orchestration class for robust slide copying operations. + + Coordinates all aspects of slide copying to ensure no XML corruption, + duplicate IDs, or broken relationships. + """ + + def __init__(self, target_presentation: Presentation): + """ + Initialize the slide copier manager. + + Args: + target_presentation: The presentation to copy slides into + """ + self.target_presentation = target_presentation + self.id_manager = IdManager(target_presentation) + self.relationship_copier = RelationshipCopier() + self.shape_copier = ShapeCopier(self.id_manager) + + self.copy_operations: List[Dict[str, Any]] = [] + self.errors: List[str] = [] + + def copy_slide( + self, + source_slide: Slide, + insertion_index: Optional[int] = None, + copy_relationships: bool = True, + regenerate_ids: bool = True, + position_offset: Optional[Tuple[float, float]] = None, + layout_matching: str = "auto" + ) -> Optional[Slide]: + """ + Copy a slide with all its content to the target presentation. + + Args: + source_slide: The slide to copy + insertion_index: Index where to insert the slide (None = append) + copy_relationships: Whether to copy relationships (images, etc.) + regenerate_ids: Whether to regenerate all IDs + position_offset: Optional position offset for shapes + layout_matching: How to match layouts ("auto", "blank", "match") + + Returns: + The newly created slide, or None if copying failed + """ + operation_id = len(self.copy_operations) + operation = { + 'id': operation_id, + 'source_slide_id': getattr(source_slide, 'slide_id', 'unknown'), + 'status': 'started', + 'errors': [], + 'warnings': [], + } + self.copy_operations.append(operation) + + try: + logger.info(f"Starting slide copy operation {operation_id}") + + # Step 1: Create target slide with appropriate layout + target_slide = self._create_target_slide(source_slide, layout_matching) + if not target_slide: + operation['status'] = 'failed' + operation['errors'].append('Failed to create target slide') + return None + + operation['target_slide_id'] = target_slide.slide_id + + # Step 2: Copy relationships if requested + relationship_mapping = {} + if copy_relationships: + try: + relationship_mapping = self.relationship_copier.copy_slide_relationships( + source_slide, target_slide + ) + operation['relationships_copied'] = len(relationship_mapping) + except Exception as e: + operation['warnings'].append(f'Relationship copying failed: {e}') + logger.warning(f"Relationship copying failed for operation {operation_id}: {e}") + + # Step 3: Copy all shapes + copied_shapes = [] + shape_errors = [] + + for shape_idx, source_shape in enumerate(source_slide.shapes): + try: + copied_shape = self.shape_copier.copy_shape( + source_shape=source_shape, + target_slide=target_slide, + position_offset=position_offset, + relationship_mapping=relationship_mapping + ) + if copied_shape: + copied_shapes.append({ + 'source_idx': shape_idx, + 'target_shape': copied_shape, + 'shape_type': source_shape.shape_type + }) + else: + # Shape was copied via XML (GROUP, FREEFORM, etc.) or failed + # Not all shape types return a shape object + logger.debug(f'Shape {shape_idx} copied via XML or skipped (no return object)') + + except Exception as e: + error_msg = f'Error copying shape {shape_idx}: {e}' + shape_errors.append(error_msg) + logger.warning(error_msg) + + operation['shapes_copied'] = len(copied_shapes) + operation['shape_errors'] = shape_errors + + # Step 4: Copy speaker notes + try: + notes_copied = self.relationship_copier.copy_notes_slide_relationships( + source_slide, target_slide + ) + operation['notes_copied'] = notes_copied + except Exception as e: + operation['warnings'].append(f'Notes copying failed: {e}') + + # Step 5: Move slide to correct position if needed + if insertion_index is not None: + try: + self._move_slide_to_position(target_slide, insertion_index) + operation['moved_to_index'] = insertion_index + except Exception as e: + operation['warnings'].append(f'Slide positioning failed: {e}') + + # Step 6: Validate the result + validation_result = self._validate_copied_slide(target_slide) + operation['validation'] = validation_result + + if validation_result['valid']: + operation['status'] = 'completed' + logger.info(f"Successfully completed slide copy operation {operation_id}") + return target_slide + else: + operation['status'] = 'completed_with_warnings' + operation['warnings'].extend(validation_result['issues']) + logger.warning(f"Slide copy operation {operation_id} completed with warnings") + return target_slide + + except Exception as e: + operation['status'] = 'failed' + operation['errors'].append(str(e)) + logger.error(f"Slide copy operation {operation_id} failed: {e}") + return None + + def copy_slide_safe( + self, + source_slide: Slide, + insertion_index: Optional[int] = None, + fallback_to_layout_only: bool = True + ) -> Optional[Slide]: + """ + Copy a slide with automatic fallback strategies. + + This method tries different copying strategies if the full copy fails, + ensuring that at least some version of the slide is created. + + Args: + source_slide: The slide to copy + insertion_index: Index where to insert the slide + fallback_to_layout_only: Whether to fall back to layout-only copy + + Returns: + The newly created slide, or None if all strategies failed + """ + # Try full copying first + result = self.copy_slide( + source_slide, + insertion_index=insertion_index, + copy_relationships=True, + regenerate_ids=True + ) + + if result: + return result + + logger.warning("Full slide copy failed, trying without relationships") + + # Try without relationship copying + result = self.copy_slide( + source_slide, + insertion_index=insertion_index, + copy_relationships=False, + regenerate_ids=True + ) + + if result: + return result + + if fallback_to_layout_only: + logger.warning("Shape copying failed, falling back to layout-only copy") + + # Final fallback: create slide with same layout only + try: + layout = source_slide.slide_layout + target_slide = self.target_presentation.slides.add_slide(layout) + _remove_layout_placeholders(target_slide) + + # Copy just the text content if possible + if source_slide.has_notes_slide: + try: + notes_text = source_slide.notes_slide.notes_text_frame.text + if notes_text.strip(): + target_slide.notes_slide.notes_text_frame.text = notes_text + except Exception: + pass + + return target_slide + + except Exception as e: + logger.error(f"Even layout-only copy failed: {e}") + + return None + + def _create_target_slide(self, source_slide: Slide, layout_matching: str = "auto") -> Optional[Slide]: + """Create a target slide with appropriate layout. + + When copying between presentations, we try to find a matching layout + in the target presentation to avoid duplicating layouts/masters. + """ + try: + if layout_matching == "blank": + # Use blank layout + layout = self._get_blank_layout() + elif layout_matching == "match": + # Try to find matching layout in target, fall back to source layout + layout = self._find_matching_layout(source_slide.slide_layout) + if layout is None: + layout = source_slide.slide_layout + else: # auto + # Try to find matching layout in target, fall back to blank + layout = self._find_matching_layout(source_slide.slide_layout) + if layout is None: + layout = self._get_blank_layout() + + slide = self.target_presentation.slides.add_slide(layout) + _remove_layout_placeholders(slide) + return slide + + except Exception as e: + logger.error(f"Failed to create target slide: {e}") + return None + + def _find_matching_layout(self, source_layout) -> Optional[Any]: + """Find a layout in the target presentation that matches the source layout. + + Matching is done by layout name, which is typically descriptive + (e.g., "Title Slide", "Title and Content", "Blank"). + """ + try: + source_name = source_layout.name + logger.info(f"Looking for layout matching: {source_name}") + for layout in self.target_presentation.slide_layouts: + if layout.name == source_name: + logger.info(f"Found matching layout in target: {source_name}") + return layout + + # Try partial matching if exact match fails + for layout in self.target_presentation.slide_layouts: + if source_name.lower() in layout.name.lower() or layout.name.lower() in source_name.lower(): + logger.info(f"Found partial matching layout: {layout.name} for {source_name}") + return layout + + logger.warning(f"No matching layout found for: {source_name}") + return None + except Exception as e: + logger.error(f"Error finding matching layout: {e}") + return None + + def _get_blank_layout(self): + """Get the blank slide layout from the presentation.""" + try: + # Find the layout with the fewest placeholders (likely blank) + layout_items_count = [len(layout.placeholders) for layout in self.target_presentation.slide_layouts] + min_items = min(layout_items_count) + blank_layout_id = layout_items_count.index(min_items) + return self.target_presentation.slide_layouts[blank_layout_id] + except Exception: + # Fall back to first layout + return self.target_presentation.slide_layouts[0] + + def _move_slide_to_position(self, slide: Slide, insertion_index: int): + """Move a slide to a specific position (simplified implementation).""" + # Note: python-pptx doesn't have built-in slide reordering + # This would require complex XML manipulation + # For now, we'll just log the intent + logger.debug(f"Slide movement requested to index {insertion_index} (not implemented)") + + def _validate_copied_slide(self, slide: Slide) -> Dict[str, Any]: + """Validate a copied slide for common issues.""" + validation = { + 'valid': True, + 'issues': [], + 'warnings': [], + 'shape_count': len(slide.shapes) if slide else 0, + } + + try: + if not slide: + validation['valid'] = False + validation['issues'].append('Slide is None') + return validation + + # Check slide has valid ID + slide_id = getattr(slide, 'slide_id', None) + if not slide_id: + validation['warnings'].append('Slide missing ID') + + # Check shapes for issues + for idx, shape in enumerate(slide.shapes): + try: + # Try to access basic properties + _ = shape.shape_type + _ = shape.left + _ = shape.top + _ = shape.width + _ = shape.height + except Exception as e: + validation['warnings'].append(f'Shape {idx} has property access issues: {e}') + + # Check if slide can be saved (basic validation) + if hasattr(slide, 'part'): + try: + # This is a basic check that the slide's XML is well-formed + _ = slide.part.element + except Exception as e: + validation['valid'] = False + validation['issues'].append(f'Slide XML structure invalid: {e}') + + except Exception as e: + validation['valid'] = False + validation['issues'].append(f'Validation error: {e}') + + return validation + + def batch_copy_slides( + self, + source_slides: List[Slide], + copy_relationships: bool = True, + fail_fast: bool = False + ) -> List[Optional[Slide]]: + """ + Copy multiple slides in batch. + + Args: + source_slides: List of slides to copy + copy_relationships: Whether to copy relationships + fail_fast: Whether to stop on first error + + Returns: + List of copied slides (None for failed copies) + """ + results = [] + + for idx, source_slide in enumerate(source_slides): + try: + logger.info(f"Batch copying slide {idx + 1}/{len(source_slides)}") + + result = self.copy_slide( + source_slide, + copy_relationships=copy_relationships + ) + + results.append(result) + + if not result and fail_fast: + logger.error(f"Batch copy failed at slide {idx}, stopping due to fail_fast") + break + + except Exception as e: + logger.error(f"Error in batch copy for slide {idx}: {e}") + results.append(None) + + if fail_fast: + break + + return results + + def get_copy_statistics(self) -> Dict[str, Any]: + """Get detailed statistics about copy operations.""" + stats = { + 'total_operations': len(self.copy_operations), + 'successful_operations': sum(1 for op in self.copy_operations if op['status'] == 'completed'), + 'failed_operations': sum(1 for op in self.copy_operations if op['status'] == 'failed'), + 'operations_with_warnings': sum(1 for op in self.copy_operations if op['status'] == 'completed_with_warnings'), + 'total_shapes_copied': sum(op.get('shapes_copied', 0) for op in self.copy_operations), + 'total_relationships_copied': sum(op.get('relationships_copied', 0) for op in self.copy_operations), + 'id_manager_stats': self.id_manager.get_stats(), + 'relationship_copier_stats': self.relationship_copier.get_relationship_stats(), + 'shape_copier_stats': self.shape_copier.get_copy_stats(), + } + + return stats + + def cleanup(self): + """Clean up resources used during copying.""" + try: + self.relationship_copier.cleanup() + self.shape_copier.clear_stats() + self.copy_operations.clear() + self.errors.clear() + logger.debug("Slide copier cleanup completed") + except Exception as e: + logger.warning(f"Error during cleanup: {e}") + + def __enter__(self): + """Context manager entry.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit with cleanup.""" + self.cleanup() \ No newline at end of file diff --git a/gslides_api/pptx/slide_deleter.py b/gslides_api/pptx/slide_deleter.py new file mode 100644 index 0000000..fe63800 --- /dev/null +++ b/gslides_api/pptx/slide_deleter.py @@ -0,0 +1,263 @@ +""" +Slide deletion utilities for PowerPoint presentations. + +This module provides robust slide deletion functionality that properly handles +XML manipulation and relationship cleanup to prevent presentation corruption. +""" + +import logging +from typing import List, Dict, Any, Optional +from pptx import Presentation + +from .xml_utils import XmlUtils + +logger = logging.getLogger(__name__) + + +class SlideDeletionResult: + """Result of a slide deletion operation.""" + + def __init__(self, success: bool = True, error_message: str = "", slide_index: int = -1): + self.success = success + self.error_message = error_message + self.slide_index = slide_index + self.warnings: List[str] = [] + + def add_warning(self, warning: str): + """Add a warning to the result.""" + self.warnings.append(warning) + logger.warning(f"Slide deletion warning: {warning}") + + +class SlideDeleter: + """ + Safely delete slides from PowerPoint presentations. + + This class handles XML manipulation and relationship cleanup based on + proven workarounds for python-pptx's lack of official slide deletion API. + + Implementation based on solutions from: + - https://github.com/scanny/python-pptx/issues/67 + - https://github.com/pyhub-apps/pyhub-office-automation/issues/76 + """ + + def __init__(self, presentation: Presentation): + """ + Initialize the slide deleter. + + Args: + presentation: The python-pptx Presentation object + """ + self.presentation = presentation + self.deleted_count = 0 + self.deletion_log: List[Dict[str, Any]] = [] + + def validate_deletion(self, slide_index: int) -> SlideDeletionResult: + """ + Check if a slide can be safely deleted. + + Args: + slide_index: Index of the slide to validate + + Returns: + SlideDeletionResult with validation status + """ + result = SlideDeletionResult(slide_index=slide_index) + + try: + # Check if index is valid + slide_count = len(self.presentation.slides) + if slide_index < 0 or slide_index >= slide_count: + result.success = False + result.error_message = f"Invalid slide index {slide_index}. Presentation has {slide_count} slides." + return result + + # Check if this is the only slide + if slide_count == 1: + result.add_warning("Deleting the only slide in the presentation") + + # Check if slide has complex relationships that might cause issues + slide = self.presentation.slides[slide_index] + if hasattr(slide, 'notes_slide') and slide.notes_slide: + logger.debug("Slide has notes that will be deleted") + + # Validate that we can access the slide ID list + if not hasattr(self.presentation.slides, '_sldIdLst'): + result.success = False + result.error_message = "Cannot access internal slide ID list (_sldIdLst)" + return result + + # Check that the slide has a relationship ID + xml_slides = self.presentation.slides._sldIdLst + slides = list(xml_slides) + if slide_index >= len(slides): + result.success = False + result.error_message = f"Slide index {slide_index} not found in XML slide list" + return result + + slide_element = slides[slide_index] + if not hasattr(slide_element, 'rId'): + result.success = False + result.error_message = "Slide element missing relationship ID (rId)" + return result + + logger.debug(f"Slide {slide_index} validation passed") + + except Exception as e: + result.success = False + result.error_message = f"Validation error: {str(e)}" + logger.error(f"Slide validation failed: {e}") + + return result + + def delete_slide(self, slide_index: int) -> SlideDeletionResult: + """ + Delete a single slide by index with proper cleanup. + + This method implements the proven workaround from python-pptx issues: + 1. Access the internal slide ID list (_sldIdLst) + 2. Get the relationship ID (rId) + 3. Drop the relationship first + 4. Remove from the XML slide list + + Args: + slide_index: Index of the slide to delete (0-based) + + Returns: + SlideDeletionResult indicating success/failure + """ + logger.info(f"Attempting to delete slide at index {slide_index}") + + # Validate the deletion first + validation_result = self.validate_deletion(slide_index) + if not validation_result.success: + return validation_result + + result = SlideDeletionResult(slide_index=slide_index) + result.warnings.extend(validation_result.warnings) + + try: + # Access the internal slide ID list + xml_slides = self.presentation.slides._sldIdLst + slides = list(xml_slides) + slide_element = slides[slide_index] + + # Get the relationship ID + rId = slide_element.rId + logger.debug(f"Deleting slide with relationship ID: {rId}") + + # Step 1: Drop the relationship FIRST (critical for preventing corruption) + self.presentation.part.drop_rel(rId) + logger.debug(f"Dropped relationship {rId}") + + # Step 2: Remove from the XML slide list + xml_slides.remove(slide_element) + logger.debug(f"Removed slide element from XML list") + + # Log the successful operation + self.deleted_count += 1 + deletion_record = { + 'index': slide_index, + 'rId': rId, + 'timestamp': logger.name, # Simple timestamp placeholder + 'success': True + } + self.deletion_log.append(deletion_record) + + logger.info(f"Successfully deleted slide at index {slide_index}") + + except Exception as e: + result.success = False + result.error_message = f"Deletion failed: {str(e)}" + logger.error(f"Failed to delete slide {slide_index}: {e}") + + # Log the failed operation + deletion_record = { + 'index': slide_index, + 'error': str(e), + 'timestamp': logger.name, + 'success': False + } + self.deletion_log.append(deletion_record) + + return result + + def delete_slides(self, slide_indices: List[int]) -> List[SlideDeletionResult]: + """ + Delete multiple slides in reverse order. + + Deleting in reverse order is critical to prevent index shifting issues + when removing multiple slides from the same presentation. + + Args: + slide_indices: List of slide indices to delete + + Returns: + List of SlideDeletionResult objects, one for each attempted deletion + """ + logger.info(f"Attempting to delete {len(slide_indices)} slides") + + # Sort indices in reverse order to prevent index shifting + sorted_indices = sorted(slide_indices, reverse=True) + results = [] + + for slide_index in sorted_indices: + result = self.delete_slide(slide_index) + results.append(result) + + # If a deletion fails, log it but continue with others + if not result.success: + logger.warning(f"Slide deletion failed for index {slide_index}: {result.error_message}") + + successful_deletions = sum(1 for r in results if r.success) + logger.info(f"Completed batch deletion: {successful_deletions}/{len(slide_indices)} slides deleted successfully") + + return results + + def get_deletion_stats(self) -> Dict[str, Any]: + """ + Get statistics about deletion operations performed. + + Returns: + Dictionary with deletion statistics + """ + successful_deletions = sum(1 for record in self.deletion_log if record.get('success', False)) + failed_deletions = len(self.deletion_log) - successful_deletions + + return { + 'total_attempted': len(self.deletion_log), + 'successful': successful_deletions, + 'failed': failed_deletions, + 'current_slide_count': len(self.presentation.slides), + 'deletion_log': self.deletion_log.copy() + } + + def cleanup_orphaned_parts(self) -> int: + """ + Attempt to clean up orphaned parts after slide deletion. + + Note: This is experimental and may not catch all orphaned parts. + PowerPoint's auto-repair is more comprehensive. + + Returns: + Number of orphaned parts found (may not be cleanable) + """ + logger.info("Attempting to identify orphaned parts") + + # This is a placeholder for future implementation + # Full orphaned part cleanup is complex and beyond the scope + # of the basic slide deletion functionality + + orphaned_count = 0 + + try: + # Future: implement orphaned part detection + # This would involve checking for unreferenced relationships + # and parts that are no longer needed + pass + + except Exception as e: + logger.warning(f"Orphaned part cleanup failed: {e}") + + logger.info(f"Orphaned part cleanup completed, found {orphaned_count} parts") + return orphaned_count \ No newline at end of file diff --git a/gslides_api/pptx/xml_utils.py b/gslides_api/pptx/xml_utils.py new file mode 100644 index 0000000..d2adf39 --- /dev/null +++ b/gslides_api/pptx/xml_utils.py @@ -0,0 +1,373 @@ +""" +XML utilities for safe PowerPoint element manipulation. + +Provides safe alternatives to copy.deepcopy() for XML elements to prevent +corruption and ensure proper namespace handling. +""" + +import logging +from typing import Optional, Dict, Any +from lxml import etree +from pptx.shapes.base import BaseShape + +logger = logging.getLogger(__name__) + + +class XmlUtils: + """ + Utilities for safe XML manipulation in PowerPoint documents. + + Provides methods to safely copy XML elements without using deepcopy, + which can cause corruption in python-pptx. + """ + + # PowerPoint XML namespaces + NAMESPACES = { + 'p': 'http://schemas.openxmlformats.org/presentationml/2006/main', + 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships', + 'a16': 'http://schemas.microsoft.com/office/drawing/2013/main-command', + 'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006', + } + + @classmethod + def safe_copy_element(cls, source_element, new_id: Optional[int] = None) -> etree.Element: + """ + Safely copy an XML element without using deepcopy. + + This method creates a new element with the same tag, attributes, and children + as the source, but generates new unique IDs to prevent conflicts. + + Args: + source_element: The source XML element to copy + new_id: Optional new ID to assign to the element + + Returns: + A new XML element that is a safe copy of the source + """ + if source_element is None: + raise ValueError("Source element cannot be None") + + try: + # Create new element with same tag + new_element = etree.Element(source_element.tag, nsmap=source_element.nsmap) + + # Copy attributes, updating ID if provided + for key, value in source_element.attrib.items(): + if key == 'id' and new_id is not None: + new_element.set(key, str(new_id)) + else: + new_element.set(key, value) + + # Copy text content + if source_element.text: + new_element.text = source_element.text + if source_element.tail: + new_element.tail = source_element.tail + + # Recursively copy children, updating IDs in cNvPr elements + for child in source_element: + new_child = cls.safe_copy_element(child) + new_element.append(new_child) + + # Update ID in cNvPr element if this is a shape and new_id is provided + if new_id is not None: + cnv_pr_elements = new_element.xpath('.//p:cNvPr', namespaces=cls.NAMESPACES) + for cnv_pr in cnv_pr_elements: + cnv_pr.set('id', str(new_id)) + + return new_element + + except Exception as e: + logger.error(f"Error copying XML element: {e}") + raise + + @classmethod + def update_element_id(cls, element, new_id: int) -> bool: + """ + Update the ID attribute of an XML element. + + Args: + element: The XML element to update + new_id: The new ID value + + Returns: + True if successful, False otherwise + """ + try: + if element is not None and hasattr(element, 'set'): + element.set('id', str(new_id)) + return True + except Exception as e: + logger.warning(f"Failed to update element ID: {e}") + + return False + + @classmethod + def update_creation_id(cls, element, new_creation_id: str) -> bool: + """ + Update or add the a16:creationId attribute to prevent corruption. + + Args: + element: The XML element to update + new_creation_id: The new creation ID (GUID) + + Returns: + True if successful, False otherwise + """ + try: + if element is None: + return False + + # Find or create a16:creationId elements + creation_id_xpath = './/a16:creationId' + creation_id_elems = element.xpath(creation_id_xpath, namespaces=cls.NAMESPACES) + + if creation_id_elems: + # Update existing creation ID + for creation_elem in creation_id_elems: + creation_elem.set('id', new_creation_id) + else: + # Add new creation ID if not present + # This is more complex as we need to find the right place to insert it + # For now, we'll just try to set it on the main element if possible + cnv_pr_elems = element.xpath('.//p:cNvPr', namespaces=cls.NAMESPACES) + if cnv_pr_elems: + cnv_pr = cnv_pr_elems[0] + # Create a16:creationId subelement + creation_elem = etree.SubElement( + cnv_pr, + f"{{{cls.NAMESPACES['a16']}}}creationId", + nsmap={'a16': cls.NAMESPACES['a16']} + ) + creation_elem.set('id', new_creation_id) + + return True + + except Exception as e: + logger.warning(f"Failed to update creation ID: {e}") + return False + + @classmethod + def get_element_id(cls, element) -> Optional[int]: + """ + Get the ID attribute from an XML element. + + Args: + element: The XML element to get ID from + + Returns: + The element ID as integer, or None if not found + """ + try: + if element is not None: + id_str = element.get('id') + if id_str: + return int(id_str) + except (ValueError, AttributeError) as e: + logger.debug(f"Could not get element ID: {e}") + + return None + + @classmethod + def get_creation_id(cls, element) -> Optional[str]: + """ + Get the a16:creationId from an XML element. + + Args: + element: The XML element to get creation ID from + + Returns: + The creation ID as string, or None if not found + """ + try: + if element is not None: + creation_id_elems = element.xpath('.//a16:creationId', namespaces=cls.NAMESPACES) + if creation_id_elems: + return creation_id_elems[0].get('id') + except Exception as e: + logger.debug(f"Could not get creation ID: {e}") + + return None + + @classmethod + def validate_element(cls, element) -> Dict[str, Any]: + """ + Validate an XML element for common issues. + + Args: + element: The XML element to validate + + Returns: + Dictionary with validation results + """ + validation_result = { + 'valid': True, + 'issues': [], + 'warnings': [], + } + + try: + if element is None: + validation_result['valid'] = False + validation_result['issues'].append('Element is None') + return validation_result + + # Check for proper namespace declarations + if not element.nsmap: + validation_result['warnings'].append('Element missing namespace declarations') + + # Check for creation ID if it's a shape element + if 'sp' in element.tag or 'pic' in element.tag or 'graphicFrame' in element.tag: + creation_id = cls.get_creation_id(element) + if not creation_id: + validation_result['warnings'].append('Shape element missing creation ID') + + except Exception as e: + validation_result['valid'] = False + validation_result['issues'].append(f'Validation error: {e}') + + return validation_result + + @classmethod + def clean_element_relationships(cls, element) -> bool: + """ + Clean relationship references in an XML element. + + This removes or updates relationship IDs that might cause conflicts + when copying elements between slides. + + Args: + element: The XML element to clean + + Returns: + True if successful, False otherwise + """ + try: + if element is None: + return False + + # Find and clean relationship references + rel_id_xpath = './/@r:id' + rel_id_attrs = element.xpath(rel_id_xpath, namespaces=cls.NAMESPACES) + + for attr in rel_id_attrs: + # For now, we'll clear the relationship ID + # The RelationshipCopier will handle rebuilding them + if hasattr(attr, 'getparent'): + parent = attr.getparent() + if parent is not None: + # Remove the r:id attribute temporarily + parent.attrib.pop(f"{{{cls.NAMESPACES['r']}}}id", None) + + return True + + except Exception as e: + logger.warning(f"Failed to clean element relationships: {e}") + return False + + @classmethod + def remap_element_relationships( + cls, element, relationship_mapping: Dict[str, str] + ) -> int: + """ + Remap relationship references in an XML element using a mapping. + + This updates r:id and r:embed attributes to point to the new relationship IDs + after relationships have been copied to a new slide. + + Args: + element: The XML element to update + relationship_mapping: Dictionary mapping old relationship IDs to new ones + + Returns: + Number of relationships remapped + """ + remapped_count = 0 + try: + if element is None or not relationship_mapping: + return remapped_count + + # Find all elements with r:id attributes + r_namespace = cls.NAMESPACES['r'] + r_id_attr = f"{{{r_namespace}}}id" + r_embed_attr = f"{{{r_namespace}}}embed" + r_link_attr = f"{{{r_namespace}}}link" + + # Find all r:id attributes + for attr_name in [r_id_attr, r_embed_attr, r_link_attr]: + # Search for elements with this attribute + for elem in element.iter(): + old_id = elem.get(attr_name) + if old_id and old_id in relationship_mapping: + new_id = relationship_mapping[old_id] + elem.set(attr_name, new_id) + remapped_count += 1 + logger.debug(f"Remapped {attr_name}: {old_id} -> {new_id}") + + return remapped_count + + except Exception as e: + logger.warning(f"Failed to remap element relationships: {e}") + return remapped_count + + @classmethod + def copy_shape_element( + cls, + source_shape: BaseShape, + new_shape_id: int, + new_creation_id: str, + relationship_mapping: Optional[Dict[str, str]] = None + ) -> Optional[etree.Element]: + """ + Copy a shape's XML element with new IDs. + + Args: + source_shape: The source shape to copy + new_shape_id: New unique shape ID + new_creation_id: New unique creation ID + relationship_mapping: Optional mapping of old relationship IDs to new ones. + If provided, relationship references will be remapped instead of cleared. + + Returns: + The copied XML element with updated IDs, or None if failed + """ + try: + if not hasattr(source_shape, '_element'): + logger.error("Source shape has no _element attribute") + return None + + source_element = source_shape._element + if source_element is None: + logger.error("Source shape element is None") + return None + + # Create safe copy + new_element = cls.safe_copy_element(source_element, new_shape_id) + + # Update creation ID + cls.update_creation_id(new_element, new_creation_id) + + # Handle relationship references + if relationship_mapping: + # Remap relationship IDs to new ones + remapped = cls.remap_element_relationships(new_element, relationship_mapping) + logger.debug(f"Remapped {remapped} relationship references in shape element") + else: + # Clean relationship references (will be rebuilt later - legacy behavior) + cls.clean_element_relationships(new_element) + + # Validate the result + validation = cls.validate_element(new_element) + if not validation['valid']: + logger.error(f"Copied element validation failed: {validation['issues']}") + return None + + if validation['warnings']: + logger.warning(f"Copied element has warnings: {validation['warnings']}") + + return new_element + + except Exception as e: + logger.error(f"Failed to copy shape element: {e}") + return None \ No newline at end of file diff --git a/playground/table_games.py b/playground/table_games.py index d9acba1..87b8434 100644 --- a/playground/table_games.py +++ b/playground/table_games.py @@ -14,7 +14,8 @@ from storyline.domain.chart_image_to_config import image_to_config from storyline.domain.content.chart_block import ChartBlock -from storyline.slides.ingest_presentation import name_slides, delete_alt_titles, ingest_presentation +from storyline.slides.ingest_presentation import ingest_presentation +from gslides_api.adapters.add_names import delete_alt_titles, name_slides from storyline.slides.slide_deck import SlideDeck logger = logging.getLogger(__name__) diff --git a/poetry.lock b/poetry.lock index 24457ae..3bbac8e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,17 @@ # This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +[[package]] +name = "annotated-doc" +version = "0.0.4" +description = "Document parameters, class attributes, return types, and variables inline, with Annotated." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320"}, + {file = "annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4"}, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -43,6 +55,29 @@ files = [ {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.7.0" +groups = ["main"] +files = [ + {file = "beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb"}, + {file = "beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86"}, +] + +[package.dependencies] +soupsieve = ">=1.6.1" +typing-extensions = ">=4.0.0" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "cachetools" version = "5.5.2" @@ -373,6 +408,30 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi (>=2024)", "cryptography-vectors (==46.0.3)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "fastapi" +version = "0.135.1" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "fastapi-0.135.1-py3-none-any.whl", hash = "sha256:46e2fc5745924b7c840f71ddd277382af29ce1cdb7d5eab5bf697e3fb9999c9e"}, + {file = "fastapi-0.135.1.tar.gz", hash = "sha256:d04115b508d936d254cea545b7312ecaa58a7b3a0f84952535b4c9afae7668cd"}, +] + +[package.dependencies] +annotated-doc = ">=0.0.2" +pydantic = ">=2.7.0" +starlette = ">=0.46.0" +typing-extensions = ">=4.8.0" +typing-inspection = ">=0.4.2" + +[package.extras] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"] +standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[standard-no-fastapi-cloud-cli] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"] + [[package]] name = "google-api-core" version = "2.25.1" @@ -551,14 +610,14 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0 [[package]] name = "httpx" -version = "0.28.1" +version = "0.27.2" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, - {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, + {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, + {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, ] [package.dependencies] @@ -566,6 +625,7 @@ anyio = "*" certifi = "*" httpcore = "==1.*" idna = "*" +sniffio = "*" [package.extras] brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] @@ -650,6 +710,155 @@ files = [ [package.dependencies] referencing = ">=0.31.0" +[[package]] +name = "lxml" +version = "5.4.0" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c"}, + {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:696ea9e87442467819ac22394ca36cb3d01848dad1be6fac3fb612d3bd5a12cf"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef80aeac414f33c24b3815ecd560cee272786c3adfa5f31316d8b349bfade28"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b9c2754cef6963f3408ab381ea55f47dabc6f78f4b8ebb0f0b25cf1ac1f7609"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a62cc23d754bb449d63ff35334acc9f5c02e6dae830d78dab4dd12b78a524f4"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f82125bc7203c5ae8633a7d5d20bcfdff0ba33e436e4ab0abc026a53a8960b7"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b67319b4aef1a6c56576ff544b67a2a6fbd7eaee485b241cabf53115e8908b8f"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:a8ef956fce64c8551221f395ba21d0724fed6b9b6242ca4f2f7beb4ce2f41997"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:0a01ce7d8479dce84fc03324e3b0c9c90b1ece9a9bb6a1b6c9025e7e4520e78c"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91505d3ddebf268bb1588eb0f63821f738d20e1e7f05d3c647a5ca900288760b"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a3bcdde35d82ff385f4ede021df801b5c4a5bcdfb61ea87caabcebfc4945dc1b"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aea7c06667b987787c7d1f5e1dfcd70419b711cdb47d6b4bb4ad4b76777a0563"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:a7fb111eef4d05909b82152721a59c1b14d0f365e2be4c742a473c5d7372f4f5"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43d549b876ce64aa18b2328faff70f5877f8c6dede415f80a2f799d31644d776"}, + {file = "lxml-5.4.0-cp310-cp310-win32.whl", hash = "sha256:75133890e40d229d6c5837b0312abbe5bac1c342452cf0e12523477cd3aa21e7"}, + {file = "lxml-5.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:de5b4e1088523e2b6f730d0509a9a813355b7f5659d70eb4f319c76beea2e250"}, + {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9"}, + {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751"}, + {file = "lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4"}, + {file = "lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539"}, + {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4"}, + {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc"}, + {file = "lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f"}, + {file = "lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2"}, + {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0"}, + {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a"}, + {file = "lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82"}, + {file = "lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f"}, + {file = "lxml-5.4.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7be701c24e7f843e6788353c055d806e8bd8466b52907bafe5d13ec6a6dbaecd"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb54f7c6bafaa808f27166569b1511fc42701a7713858dddc08afdde9746849e"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97dac543661e84a284502e0cf8a67b5c711b0ad5fb661d1bd505c02f8cf716d7"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:c70e93fba207106cb16bf852e421c37bbded92acd5964390aad07cb50d60f5cf"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9c886b481aefdf818ad44846145f6eaf373a20d200b5ce1a5c8e1bc2d8745410"}, + {file = "lxml-5.4.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:fa0e294046de09acd6146be0ed6727d1f42ded4ce3ea1e9a19c11b6774eea27c"}, + {file = "lxml-5.4.0-cp36-cp36m-win32.whl", hash = "sha256:61c7bbf432f09ee44b1ccaa24896d21075e533cd01477966a5ff5a71d88b2f56"}, + {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, + {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, + {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, + {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, + {file = "lxml-5.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eaf24066ad0b30917186420d51e2e3edf4b0e2ea68d8cd885b14dc8afdcf6556"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b31a3a77501d86d8ade128abb01082724c0dfd9524f542f2f07d693c9f1175f"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e108352e203c7afd0eb91d782582f00a0b16a948d204d4dec8565024fafeea5"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11a96c3b3f7551c8a8109aa65e8594e551d5a84c76bf950da33d0fb6dfafab7"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:ca755eebf0d9e62d6cb013f1261e510317a41bf4650f22963474a663fdfe02aa"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:4cd915c0fb1bed47b5e6d6edd424ac25856252f09120e3e8ba5154b6b921860e"}, + {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:226046e386556a45ebc787871d6d2467b32c37ce76c2680f5c608e25823ffc84"}, + {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b108134b9667bcd71236c5a02aad5ddd073e372fb5d48ea74853e009fe38acb6"}, + {file = "lxml-5.4.0-cp38-cp38-win32.whl", hash = "sha256:1320091caa89805df7dcb9e908add28166113dcd062590668514dbd510798c88"}, + {file = "lxml-5.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:073eb6dcdf1f587d9b88c8c93528b57eccda40209cf9be549d469b942b41d70b"}, + {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bda3ea44c39eb74e2488297bb39d47186ed01342f0022c8ff407c250ac3f498e"}, + {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9ceaf423b50ecfc23ca00b7f50b64baba85fb3fb91c53e2c9d00bc86150c7e40"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:664cdc733bc87449fe781dbb1f309090966c11cc0c0cd7b84af956a02a8a4729"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67ed8a40665b84d161bae3181aa2763beea3747f748bca5874b4af4d75998f87"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b4a3bd174cc9cdaa1afbc4620c049038b441d6ba07629d89a83b408e54c35cd"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b0989737a3ba6cf2a16efb857fb0dfa20bc5c542737fddb6d893fde48be45433"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:dc0af80267edc68adf85f2a5d9be1cdf062f973db6790c1d065e45025fa26140"}, + {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:639978bccb04c42677db43c79bdaa23785dc7f9b83bfd87570da8207872f1ce5"}, + {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a99d86351f9c15e4a901fc56404b485b1462039db59288b203f8c629260a142"}, + {file = "lxml-5.4.0-cp39-cp39-win32.whl", hash = "sha256:3e6d5557989cdc3ebb5302bbdc42b439733a841891762ded9514e74f60319ad6"}, + {file = "lxml-5.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8c9b7f16b63e65bbba889acb436a1034a82d34fa09752d754f88d708eca80e1"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1b717b00a71b901b4667226bba282dd462c42ccf618ade12f9ba3674e1fabc55"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27a9ded0f0b52098ff89dd4c418325b987feed2ea5cc86e8860b0f844285d740"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7ce10634113651d6f383aa712a194179dcd496bd8c41e191cec2099fa09de5"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53370c26500d22b45182f98847243efb518d268374a9570409d2e2276232fd37"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6364038c519dffdbe07e3cf42e6a7f8b90c275d4d1617a69bb59734c1a2d571"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b12cb6527599808ada9eb2cd6e0e7d3d8f13fe7bbb01c6311255a15ded4c7ab4"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5f11a1526ebd0dee85e7b1e39e39a0cc0d9d03fb527f56d8457f6df48a10dc0c"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48b4afaf38bf79109bb060d9016fad014a9a48fb244e11b94f74ae366a64d252"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de6f6bb8a7840c7bf216fb83eec4e2f79f7325eca8858167b68708b929ab2172"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5cca36a194a4eb4e2ed6be36923d3cffd03dcdf477515dea687185506583d4c9"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b7c86884ad23d61b025989d99bfdd92a7351de956e01c61307cb87035960bcb1"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:53d9469ab5460402c19553b56c3648746774ecd0681b1b27ea74d5d8a3ef5590"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:56dbdbab0551532bb26c19c914848d7251d73edb507c3079d6805fa8bba5b706"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14479c2ad1cb08b62bb941ba8e0e05938524ee3c3114644df905d2331c76cd57"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32697d2ea994e0db19c1df9e40275ffe84973e4232b5c274f47e7c1ec9763cdd"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:24f6df5f24fc3385f622c0c9d63fe34604893bc1a5bdbb2dbf5870f85f9a404a"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:151d6c40bc9db11e960619d2bf2ec5829f0aaffb10b41dcf6ad2ce0f3c0b2325"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4025bf2884ac4370a3243c5aa8d66d3cb9e15d3ddd0af2d796eccc5f0244390e"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9459e6892f59ecea2e2584ee1058f5d8f629446eab52ba2305ae13a32a059530"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47fb24cc0f052f0576ea382872b3fc7e1f7e3028e53299ea751839418ade92a6"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50441c9de951a153c698b9b99992e806b71c1f36d14b154592580ff4a9d0d877"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ab339536aa798b1e17750733663d272038bf28069761d5be57cb4a9b0137b4f8"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9776af1aad5a4b4a1317242ee2bea51da54b2a7b7b48674be736d463c999f37d"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:63e7968ff83da2eb6fdda967483a7a023aa497d85ad8f05c3ad9b1f2e8c84987"}, + {file = "lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html-clean = ["lxml_html_clean"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=3.0.11,<3.1.0)"] + [[package]] name = "marko" version = "2.1.4" @@ -905,10 +1114,9 @@ xml = ["lxml (>=4.9.2)"] name = "pillow" version = "10.4.0" description = "Python Imaging Library (Fork)" -optional = true +optional = false python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"image\"" files = [ {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, @@ -1367,6 +1575,24 @@ files = [ {file = "python_multipart-0.0.21.tar.gz", hash = "sha256:7137ebd4d3bbf70ea1622998f902b97a29434a9e8dc40eb203bbcf7c2a2cba92"}, ] +[[package]] +name = "python-pptx" +version = "1.0.2" +description = "Create, read, and update PowerPoint 2007+ (.pptx) files." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba"}, + {file = "python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +Pillow = ">=3.3.2" +typing-extensions = ">=4.9.0" +XlsxWriter = ">=0.5.7" + [[package]] name = "pytz" version = "2025.2" @@ -1622,6 +1848,30 @@ files = [ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "soupsieve" +version = "2.8.3" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95"}, + {file = "soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349"}, +] + [[package]] name = "sse-starlette" version = "3.2.0" @@ -1692,14 +1942,14 @@ files = [ [[package]] name = "typing-inspection" -version = "0.4.1" +version = "0.4.2" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51"}, - {file = "typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28"}, + {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, + {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, ] [package.dependencies] @@ -1768,6 +2018,18 @@ h11 = ">=0.8" [package.extras] standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +[[package]] +name = "xlsxwriter" +version = "3.2.9" +description = "A Python module for creating Excel XLSX files." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "xlsxwriter-3.2.9-py3-none-any.whl", hash = "sha256:9a5db42bc5dff014806c58a20b9eae7322a134abb6fce3c92c181bfb275ec5b3"}, + {file = "xlsxwriter-3.2.9.tar.gz", hash = "sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c"}, +] + [extras] image = ["pillow"] tables = ["pandas"] @@ -1775,4 +2037,4 @@ tables = ["pandas"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "0238b68ea61296234b0d7e29331387ee7256f8d4ec149c545ad611b522db2863" +content-hash = "6f4fc5af8bfe7cdb9b65b803e6269b188032dc2f5745a0bb4d44b7d3a382e02d" diff --git a/pyproject.toml b/pyproject.toml index 4d4b615..1c5eed1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gslides-api" -version = "0.3.5" +version = "0.4.1" description = "A Python library for working with Google Slides API using Pydantic domain objects" authors = ["motley.ai <info@motley.ai>"] license = "MIT" @@ -17,6 +17,11 @@ marko = "^2.1.4" protobuf = "^6.31.1" requests = "^2.32.4" typeguard = "^4.4.4" +httpx = "^0.27.0" +python-pptx = "^1.0.0" +lxml = "^5.0.0" +beautifulsoup4 = "^4.12.0" +fastapi = ">=0.100.0" pillow = {version = "^10.0.0", optional = true} pandas = {version = "^2.0.0", optional = true} mcp = {version = "^1.0.0"} diff --git a/tests/mcp_tests/test_add_element_names.py b/tests/mcp_tests/test_add_element_names.py new file mode 100644 index 0000000..c350434 --- /dev/null +++ b/tests/mcp_tests/test_add_element_names.py @@ -0,0 +1,132 @@ +"""Tests for the add_element_names MCP tool.""" + +import json +from unittest.mock import Mock, patch + +import pytest + +from gslides_api.adapters.add_names import SlideElementNames +from gslides_api.mcp.server import add_element_names + + +class TestAddElementNames: + """Tests for the add_element_names tool.""" + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_successful_call( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test successful call returns SuccessResponse with slide/element names.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_client = Mock() + mock_gslides_class.return_value = mock_gslides_client + + mock_name_slides.return_value = { + "Intro": SlideElementNames( + image_names=["Image_1"], + text_names=["Title", "Text_1"], + chart_names=["Chart_1"], + table_names=[], + ), + "Summary": SlideElementNames( + image_names=[], + text_names=["Title"], + chart_names=[], + table_names=["Table_1"], + ), + } + + result = json.loads(add_element_names("pres_123")) + + assert result["success"] is True + assert "Successfully named 2 slides" in result["message"] + details = result["details"]["slide_element_names"] + assert details["Intro"]["text_names"] == ["Title", "Text_1"] + assert details["Intro"]["image_names"] == ["Image_1"] + assert details["Intro"]["chart_names"] == ["Chart_1"] + assert details["Intro"]["table_names"] == [] + assert details["Summary"]["table_names"] == ["Table_1"] + + mock_name_slides.assert_called_once_with( + "pres_123", + name_elements=True, + api_client=mock_gslides_client, + skip_empty_text_boxes=False, + min_image_size_cm=4.0, + ) + mock_client.flush_batch_update.assert_called_once() + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_with_custom_parameters( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test that custom parameters are passed through.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_client = Mock() + mock_gslides_class.return_value = mock_gslides_client + mock_name_slides.return_value = {} + + result = json.loads(add_element_names( + "pres_123", + skip_empty_text_boxes=True, + min_image_size_cm=2.0, + )) + + assert result["success"] is True + mock_name_slides.assert_called_once_with( + "pres_123", + name_elements=True, + api_client=mock_gslides_client, + skip_empty_text_boxes=True, + min_image_size_cm=2.0, + ) + + def test_invalid_presentation_url(self): + """Test that invalid presentation URL returns validation error.""" + result = json.loads(add_element_names("https://example.com/bad-url")) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_exception_handling( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test that exceptions are caught and returned as error responses.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_class.return_value = Mock() + mock_name_slides.side_effect = RuntimeError("API connection failed") + + result = json.loads(add_element_names("pres_123")) + + assert result["error"] is True + assert "API connection failed" in result["message"] + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_google_slides_url_parsed( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test that a full Google Slides URL is parsed to extract the presentation ID.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_class.return_value = Mock() + mock_name_slides.return_value = {} + + url = "https://docs.google.com/presentation/d/abc123xyz/edit" + result = json.loads(add_element_names(url)) + + assert result["success"] is True + mock_name_slides.assert_called_once() + call_args = mock_name_slides.call_args + assert call_args[0][0] == "abc123xyz" diff --git a/tests/mcp_tests/test_copy_presentation.py b/tests/mcp_tests/test_copy_presentation.py new file mode 100644 index 0000000..6e1e228 --- /dev/null +++ b/tests/mcp_tests/test_copy_presentation.py @@ -0,0 +1,136 @@ +"""Tests for the copy_presentation MCP tool.""" + +import json +from unittest.mock import Mock, patch + +import pytest + +from gslides_api.mcp.server import copy_presentation + + +@pytest.fixture +def mock_api_client(): + """Create a mock API client.""" + client = Mock() + client.copy_presentation.return_value = {"id": "new_pres_id_123"} + client.flush_batch_update.return_value = None + return client + + +@pytest.fixture +def mock_presentation(): + """Create a mock Presentation object.""" + pres = Mock() + pres.title = "My Presentation" + pres.presentationId = "original_pres_id" + return pres + + +class TestCopyPresentation: + """Tests for the copy_presentation tool.""" + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_default_title(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation with default title.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + result = json.loads(copy_presentation("original_pres_id")) + + assert result["success"] is True + assert result["message"] == "Successfully copied presentation 'My Presentation'" + assert result["details"]["original_presentation_id"] == "original_pres_id" + assert result["details"]["new_presentation_id"] == "new_pres_id_123" + assert result["details"]["new_title"] == "Copy of My Presentation" + assert "docs.google.com/presentation/d/new_pres_id_123/edit" in result["details"]["new_presentation_url"] + + mock_api_client.copy_presentation.assert_called_once_with("original_pres_id", "Copy of My Presentation", None) + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_custom_title(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation with a custom title.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + result = json.loads(copy_presentation("original_pres_id", copy_title="Custom Title")) + + assert result["success"] is True + assert result["details"]["new_title"] == "Custom Title" + mock_api_client.copy_presentation.assert_called_once_with("original_pres_id", "Custom Title", None) + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_folder_id(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation into a specific folder.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + result = json.loads(copy_presentation("original_pres_id", copy_title="In Folder", folder_id="folder_abc")) + + assert result["success"] is True + mock_api_client.copy_presentation.assert_called_once_with("original_pres_id", "In Folder", "folder_abc") + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_url_input(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation using a Google Slides URL.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + url = "https://docs.google.com/presentation/d/abc123_xyz/edit" + result = json.loads(copy_presentation(url, copy_title="From URL")) + + assert result["success"] is True + mock_pres_class.from_id.assert_called_once_with("abc123_xyz", api_client=mock_api_client) + mock_api_client.copy_presentation.assert_called_once_with("abc123_xyz", "From URL", None) + + def test_copy_with_invalid_url(self): + """Test copying with an invalid Google Slides URL.""" + result = json.loads(copy_presentation("https://example.com/not-a-slides-url")) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_untitled_presentation(self, mock_get_client, mock_pres_class, mock_api_client): + """Test copying a presentation with no title defaults correctly.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.title = None + mock_pres.presentationId = "pres_no_title" + mock_pres_class.from_id.return_value = mock_pres + + result = json.loads(copy_presentation("pres_no_title")) + + assert result["success"] is True + assert result["details"]["new_title"] == "Copy of Untitled" + mock_api_client.copy_presentation.assert_called_once_with("pres_no_title", "Copy of Untitled", None) + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_presentation_api_error(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test error handling when the API call fails.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + mock_api_client.copy_presentation.side_effect = Exception("Drive API quota exceeded") + + result = json.loads(copy_presentation("original_pres_id", copy_title="Will Fail")) + + assert result["error"] is True + assert "PresentationError" in result["error_type"] + assert "Drive API quota exceeded" in result["message"] + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_presentation_load_error(self, mock_get_client, mock_pres_class, mock_api_client): + """Test error handling when loading the original presentation fails.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.side_effect = Exception("Presentation not found") + + result = json.loads(copy_presentation("nonexistent_id")) + + assert result["error"] is True + assert "Presentation not found" in result["message"] diff --git a/tests/mcp_tests/test_models.py b/tests/mcp_tests/test_models.py index 47794c1..de4ad8c 100644 --- a/tests/mcp_tests/test_models.py +++ b/tests/mcp_tests/test_models.py @@ -3,11 +3,8 @@ import pytest from gslides_api.mcp.models import ( - ElementOutline, ErrorResponse, OutputFormat, - PresentationOutline, - SlideOutline, SuccessResponse, ThumbnailSizeOption, ) @@ -26,16 +23,21 @@ def test_domain_format(self): assert OutputFormat.DOMAIN.value == "domain" assert OutputFormat("domain") == OutputFormat.DOMAIN - def test_outline_format(self): - """Test OUTLINE format value.""" - assert OutputFormat.OUTLINE.value == "outline" - assert OutputFormat("outline") == OutputFormat.OUTLINE + def test_markdown_format(self): + """Test MARKDOWN format value.""" + assert OutputFormat.MARKDOWN.value == "markdown" + assert OutputFormat("markdown") == OutputFormat.MARKDOWN def test_invalid_format(self): """Test that invalid format raises ValueError.""" with pytest.raises(ValueError): OutputFormat("invalid") + def test_outline_format_removed(self): + """Test that OUTLINE format no longer exists.""" + with pytest.raises(ValueError): + OutputFormat("outline") + class TestThumbnailSizeOption: """Tests for ThumbnailSizeOption enum.""" @@ -109,118 +111,3 @@ def test_success_response_with_details(self): details={"new_slide_id": "abc123", "position": 5}, ) assert response.details == {"new_slide_id": "abc123", "position": 5} - - -class TestElementOutline: - """Tests for ElementOutline model.""" - - def test_minimal_element_outline(self): - """Test creating a minimal element outline.""" - outline = ElementOutline( - element_id="elem123", - type="shape", - ) - assert outline.element_id == "elem123" - assert outline.type == "shape" - assert outline.element_name is None - assert outline.alt_description is None - assert outline.content_markdown is None - - def test_full_element_outline(self): - """Test creating a full element outline.""" - outline = ElementOutline( - element_name="Title", - element_id="elem123", - type="shape", - alt_description="Main title text box", - content_markdown="# Welcome", - ) - assert outline.element_name == "Title" - assert outline.element_id == "elem123" - assert outline.type == "shape" - assert outline.alt_description == "Main title text box" - assert outline.content_markdown == "# Welcome" - - -class TestSlideOutline: - """Tests for SlideOutline model.""" - - def test_minimal_slide_outline(self): - """Test creating a minimal slide outline.""" - outline = SlideOutline( - slide_id="slide123", - ) - assert outline.slide_id == "slide123" - assert outline.slide_name is None - assert outline.elements == [] - - def test_slide_outline_with_elements(self): - """Test creating a slide outline with elements.""" - elements = [ - ElementOutline(element_id="e1", type="shape"), - ElementOutline(element_id="e2", type="image"), - ] - outline = SlideOutline( - slide_name="Introduction", - slide_id="slide123", - elements=elements, - ) - assert outline.slide_name == "Introduction" - assert len(outline.elements) == 2 - assert outline.elements[0].element_id == "e1" - - -class TestPresentationOutline: - """Tests for PresentationOutline model.""" - - def test_minimal_presentation_outline(self): - """Test creating a minimal presentation outline.""" - outline = PresentationOutline( - presentation_id="pres123", - title="My Presentation", - ) - assert outline.presentation_id == "pres123" - assert outline.title == "My Presentation" - assert outline.slides == [] - - def test_presentation_outline_with_slides(self): - """Test creating a presentation outline with slides.""" - slides = [ - SlideOutline(slide_id="s1", slide_name="Cover"), - SlideOutline(slide_id="s2", slide_name="Content"), - ] - outline = PresentationOutline( - presentation_id="pres123", - title="My Presentation", - slides=slides, - ) - assert len(outline.slides) == 2 - assert outline.slides[0].slide_name == "Cover" - assert outline.slides[1].slide_name == "Content" - - def test_presentation_outline_model_dump(self): - """Test that presentation outline can be serialized.""" - outline = PresentationOutline( - presentation_id="pres123", - title="My Presentation", - slides=[ - SlideOutline( - slide_id="s1", - slide_name="Cover", - elements=[ - ElementOutline( - element_name="Title", - element_id="e1", - type="shape", - content_markdown="# Welcome", - ) - ], - ) - ], - ) - data = outline.model_dump() - assert data["presentation_id"] == "pres123" - assert data["title"] == "My Presentation" - assert len(data["slides"]) == 1 - assert data["slides"][0]["slide_name"] == "Cover" - assert data["slides"][0]["elements"][0]["element_name"] == "Title" diff --git a/tests/mcp_tests/test_new_tools.py b/tests/mcp_tests/test_new_tools.py new file mode 100644 index 0000000..1981a70 --- /dev/null +++ b/tests/mcp_tests/test_new_tools.py @@ -0,0 +1,572 @@ +"""Tests for new MCP tools: replace_element_image (file paths), write_table_markdown, bulk_write_element_markdown.""" + +import json +from unittest.mock import Mock, patch, call + +import pytest + +from gslides_api.element.base import ElementKind +from gslides_api.mcp.server import ( + bulk_write_element_markdown, + replace_element_image, + write_table_markdown, +) + + +# ============================================================================= +# Fixtures +# ============================================================================= + + +@pytest.fixture +def mock_api_client(): + """Create a mock API client.""" + client = Mock() + client.flush_batch_update.return_value = None + client.batch_update.return_value = None + return client + + +@pytest.fixture +def mock_slide(): + """Create a mock slide with page_elements_flat.""" + slide = Mock() + slide.objectId = "slide_001" + return slide + + +@pytest.fixture +def mock_image_element(): + """Create a mock ImageElement.""" + from gslides_api.element.element import ImageElement + + element = Mock(spec=ImageElement) + element.objectId = "img_001" + element.type = ElementKind.IMAGE + element.replace_image = Mock() + return element + + +@pytest.fixture +def mock_shape_element(): + """Create a mock ShapeElement.""" + from gslides_api.element.shape import ShapeElement + + element = Mock(spec=ShapeElement) + element.objectId = "shape_001" + element.type = ElementKind.SHAPE + element.write_text = Mock() + return element + + +@pytest.fixture +def mock_table_element(): + """Create a mock TableElement.""" + from gslides_api.element.table import TableElement + + element = Mock(spec=TableElement) + element.objectId = "table_001" + element.type = ElementKind.TABLE + element.table = Mock() + element.table.rows = 3 + element.table.columns = 2 + element.resize = Mock(return_value=1.0) + element.content_update_requests = Mock(return_value=[]) + return element + + +@pytest.fixture +def mock_presentation(mock_slide): + """Create a mock presentation with one slide.""" + pres = Mock() + pres.slides = [mock_slide] + pres.presentationId = "pres_123" + return pres + + +# ============================================================================= +# Tests for replace_element_image (URL vs file path routing) +# ============================================================================= + + +class TestReplaceElementImageRouting: + """Test that replace_element_image routes URL vs file path correctly.""" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_url_routed_to_url_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that http/https URLs are passed as url= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "https://example.com/image.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + url="https://example.com/image.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_http_url_routed_to_url_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that http:// URLs are also routed to url= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "http://example.com/image.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + url="http://example.com/image.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_local_file_routed_to_file_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that local file paths are passed as file= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "/tmp/chart.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + file="/tmp/chart.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_relative_file_routed_to_file_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that relative file paths are passed as file= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "images/chart.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + file="images/chart.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_response_contains_image_source( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that the response includes image_source field.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "/tmp/chart.png" + )) + + assert result["details"]["image_source"] == "/tmp/chart.png" + + +# ============================================================================= +# Tests for write_table_markdown +# ============================================================================= + + +class TestWriteTableMarkdown: + """Tests for the write_table_markdown tool.""" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_table_same_shape( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_table_element, + ): + """Test writing a table that matches the existing table shape (no resize).""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_table_element + + # Table has 3 rows, 2 columns - markdown matches + md_table = "| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |" + + with patch("gslides_api.mcp.server.MarkdownTableElement") as mock_mte: + mock_md_elem = Mock() + mock_md_elem.shape = (3, 2) + mock_mte.from_markdown.return_value = mock_md_elem + + result = json.loads(write_table_markdown("pres_123", "slide1", "my_table", md_table)) + + assert result["success"] is True + assert result["details"]["table_shape"] == [3, 2] + assert result["details"]["resized"] is False + mock_table_element.resize.assert_not_called() + mock_table_element.content_update_requests.assert_called_once_with( + mock_md_elem, check_shape=False, font_scale_factor=1.0 + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_table_with_resize( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_table_element, + ): + """Test writing a table that requires resizing.""" + mock_get_client.return_value = mock_api_client + + # After resize, re-fetch returns updated element + resized_table_element = Mock() + resized_table_element.objectId = "table_001" + resized_table_element.content_update_requests = Mock(return_value=[]) + + # First call returns original, second call returns resized + mock_pres_class.from_id.side_effect = [ + Mock(slides=[mock_slide]), + Mock(slides=[mock_slide]), + ] + mock_find_slide.return_value = mock_slide + mock_find_element.side_effect = [mock_table_element, resized_table_element] + + # Table has 3 rows, 2 cols but markdown has 4 rows, 3 cols + md_table = "| A | B | C |\n|---|---|---|\n| 1 | 2 | 3 |\n| 4 | 5 | 6 |\n| 7 | 8 | 9 |" + mock_table_element.resize.return_value = 0.8 + + with patch("gslides_api.mcp.server.MarkdownTableElement") as mock_mte: + mock_md_elem = Mock() + mock_md_elem.shape = (4, 3) + mock_mte.from_markdown.return_value = mock_md_elem + + result = json.loads(write_table_markdown("pres_123", "slide1", "my_table", md_table)) + + assert result["success"] is True + assert result["details"]["resized"] is True + assert result["details"]["table_shape"] == [4, 3] + mock_table_element.resize.assert_called_once_with(4, 3, api_client=mock_api_client) + resized_table_element.content_update_requests.assert_called_once_with( + mock_md_elem, check_shape=False, font_scale_factor=0.8 + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_table_not_a_table( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test error when element is not a table.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_shape_element + + result = json.loads(write_table_markdown("pres_123", "slide1", "not_table", "| A |\n|---|\n| 1 |")) + + assert result["error"] is True + assert "not a table element" in result["message"] + + def test_write_table_invalid_presentation_url(self): + """Test error with invalid presentation URL.""" + result = json.loads(write_table_markdown( + "https://example.com/bad-url", "slide1", "table1", "| A |\n|---|\n| 1 |" + )) + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + +# ============================================================================= +# Tests for bulk_write_element_markdown +# ============================================================================= + + +class TestBulkWriteElementMarkdown: + """Tests for the bulk_write_element_markdown tool.""" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_successful_bulk_write( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test successful bulk write to multiple elements.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + mock_find_element.return_value = mock_shape_element + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title", "markdown": "# Hello"}, + {"slide_name": "slide1", "element_name": "body", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 2 + assert result["details"]["failed"] == 0 + assert mock_shape_element.write_text.call_count == 2 + + def test_invalid_json(self): + """Test error with invalid JSON input.""" + result = json.loads(bulk_write_element_markdown("pres_123", "not valid json{")) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + assert "Invalid JSON" in result["message"] + + def test_json_not_array(self): + """Test error when JSON is not an array.""" + result = json.loads(bulk_write_element_markdown("pres_123", '{"key": "value"}')) + + assert result["error"] is True + assert "Expected a JSON array" in result["message"] + + def test_missing_keys(self): + """Test error when entries are missing required keys.""" + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title"}, # missing "markdown" + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["error"] is True + assert "missing keys" in result["message"] + + def test_entry_not_object(self): + """Test error when an entry is not an object.""" + writes = json.dumps(["not an object"]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["error"] is True + assert "Entry 0 is not an object" in result["message"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_partial_failure_slide_not_found( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that missing slides are reported as failures without blocking others.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + mock_find_element.return_value = mock_shape_element + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title", "markdown": "# Hello"}, + {"slide_name": "nonexistent", "element_name": "body", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "not found" in result["details"]["failures"][0]["error"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_partial_failure_element_not_found( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that missing elements are reported as failures without blocking others.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + + # First call returns shape, second returns None (not found) + mock_find_element.side_effect = [mock_shape_element, None] + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title", "markdown": "# Hello"}, + {"slide_name": "slide1", "element_name": "missing_elem", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "not found" in result["details"]["failures"][0]["error"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_partial_failure_wrong_element_type( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_table_element, mock_shape_element, + ): + """Test that non-shape elements are reported as failures.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + + # First returns table (wrong type), second returns shape (correct) + mock_find_element.side_effect = [mock_table_element, mock_shape_element] + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "my_table", "markdown": "# Hello"}, + {"slide_name": "slide1", "element_name": "title", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "not a text element" in result["details"]["failures"][0]["error"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_text_exception_captured( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that exceptions during write_text are captured per-element.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + + # Create two separate shape elements + from gslides_api.element.shape import ShapeElement + good_element = Mock(spec=ShapeElement) + good_element.objectId = "shape_good" + good_element.type = ElementKind.SHAPE + good_element.write_text = Mock() + + bad_element = Mock(spec=ShapeElement) + bad_element.objectId = "shape_bad" + bad_element.type = ElementKind.SHAPE + bad_element.write_text = Mock(side_effect=RuntimeError("API error")) + + mock_find_element.side_effect = [bad_element, good_element] + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "bad", "markdown": "fail"}, + {"slide_name": "slide1", "element_name": "good", "markdown": "succeed"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "API error" in result["details"]["failures"][0]["error"] + + def test_invalid_presentation_url(self): + """Test error with invalid presentation URL.""" + writes = json.dumps([{"slide_name": "s", "element_name": "e", "markdown": "m"}]) + result = json.loads(bulk_write_element_markdown("https://example.com/bad", writes)) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_empty_writes_list( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, + ): + """Test with an empty writes list.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [] + mock_pres_class.from_id.return_value = mock_pres + + result = json.loads(bulk_write_element_markdown("pres_123", "[]")) + + assert result["success"] is True + assert result["details"]["succeeded"] == 0 + assert result["details"]["failed"] == 0 + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_markdown_with_escaped_newlines( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that JSON-escaped newlines in markdown are handled correctly.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + mock_find_element.return_value = mock_shape_element + + # JSON with escaped newlines + writes = '[{"slide_name": "slide1", "element_name": "title", "markdown": "line1\\nline2\\nline3"}]' + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + # Verify the markdown was passed with actual newlines + mock_shape_element.write_text.assert_called_once_with( + "line1\nline2\nline3", as_markdown=True, api_client=mock_api_client + ) diff --git a/tests/test_absolute_size_position.py b/tests/test_absolute_size_position.py new file mode 100644 index 0000000..e7e309f --- /dev/null +++ b/tests/test_absolute_size_position.py @@ -0,0 +1,70 @@ +"""Tests for absolute_size() and absolute_position() returning None when size/transform is missing.""" + +import pytest + +from gslides_api.agnostic.units import OutputUnit +from gslides_api.domain.domain import PageElementProperties, Size, Transform + + +class TestAbsoluteSizeNone: + """Test that absolute_size returns None when size or transform is missing.""" + + def test_returns_none_when_size_is_none(self): + props = PageElementProperties( + size=None, + transform=Transform(translateX=0, translateY=0, scaleX=1, scaleY=1), + ) + result = props.absolute_size(units=OutputUnit.CM) + assert result is None + + def test_returns_none_when_transform_is_none(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=None, + ) + result = props.absolute_size(units=OutputUnit.CM) + assert result is None + + def test_returns_none_when_both_missing(self): + props = PageElementProperties( + size=None, + transform=None, + ) + result = props.absolute_size(units=OutputUnit.CM) + assert result is None + + def test_returns_tuple_when_both_present(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=Transform(translateX=0, translateY=0, scaleX=1, scaleY=1), + ) + result = props.absolute_size(units=OutputUnit.IN) + assert result is not None + assert isinstance(result, tuple) + assert len(result) == 2 + assert result[0] == pytest.approx(1.0, abs=0.01) + assert result[1] == pytest.approx(1.0, abs=0.01) + + +class TestAbsolutePositionNone: + """Test that absolute_position returns None when transform is missing.""" + + def test_returns_none_when_transform_is_none(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=None, + ) + result = props.absolute_position(units=OutputUnit.CM) + assert result is None + + def test_returns_tuple_when_transform_present(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=Transform(translateX=914400, translateY=457200, scaleX=1, scaleY=1), + ) + result = props.absolute_position(units=OutputUnit.IN) + assert result is not None + assert isinstance(result, tuple) + assert len(result) == 2 + assert result[0] == pytest.approx(1.0, abs=0.01) + assert result[1] == pytest.approx(0.5, abs=0.01) diff --git a/tests/test_adapters/__init__.py b/tests/test_adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_adapters/test_abstract_slide_markdown.py b/tests/test_adapters/test_abstract_slide_markdown.py new file mode 100644 index 0000000..34ee88c --- /dev/null +++ b/tests/test_adapters/test_abstract_slide_markdown.py @@ -0,0 +1,365 @@ +"""Tests for AbstractSlide.markdown() method.""" + +from unittest.mock import MagicMock, PropertyMock + +import pytest + +from gslides_api.agnostic.element import MarkdownTableElement, TableData + +from gslides_api.adapters.abstract_slides import ( + AbstractAltText, + AbstractElement, + AbstractImageElement, + AbstractShapeElement, + AbstractSlide, + AbstractTableElement, + _extract_font_size_from_table, + _extract_font_size_pt, +) + + +def _make_shape_element( + object_id="shape1", + title=None, + description=None, + text="Hello World", + x=0.5, + y=0.3, + w=9.0, + h=1.2, + font_size_pt=18.0, +): + """Create a mock AbstractShapeElement.""" + elem = MagicMock(spec=AbstractShapeElement) + elem.objectId = object_id + elem.alt_text = AbstractAltText(title=title, description=description) + elem.type = "SHAPE" + type(elem).has_text = PropertyMock(return_value=True) + elem.read_text.return_value = text + elem.absolute_position.return_value = (x, y) + elem.absolute_size.return_value = (w, h) + + # Create a mock style with font_size_pt attribute + mock_style = MagicMock() + mock_style.font_size_pt = font_size_pt + elem.styles.return_value = [mock_style] + + return elem + + +def _make_image_element( + object_id="img1", + title="Chart", + description=None, + x=1.0, + y=3.0, + w=8.0, + h=4.0, +): + """Create a mock AbstractImageElement.""" + elem = MagicMock(spec=AbstractImageElement) + elem.objectId = object_id + elem.alt_text = AbstractAltText(title=title, description=description) + elem.type = "IMAGE" + elem.absolute_position.return_value = (x, y) + elem.absolute_size.return_value = (w, h) + return elem + + +def _make_table_element( + object_id="table1", + title="Data", + description=None, + x=0.5, + y=7.5, + w=9.0, + h=2.0, + headers=None, + rows=None, +): + """Create a mock AbstractTableElement.""" + if headers is None: + headers = ["Metric", "Q3", "Q4"] + if rows is None: + rows = [["Revenue", "$1.2M", "$1.5M"], ["Growth", "8%", "12%"]] + + table_data = TableData(headers=headers, rows=rows) + md_elem = MarkdownTableElement(name=title, content=table_data) + + elem = MagicMock(spec=AbstractTableElement) + elem.objectId = object_id + elem.alt_text = AbstractAltText(title=title, description=description) + elem.type = "TABLE" + elem.absolute_position.return_value = (x, y) + elem.absolute_size.return_value = (w, h) + elem.to_markdown_element.return_value = md_elem + return elem + + +def _make_slide(elements): + """Create a mock AbstractSlide with given elements.""" + slide = MagicMock(spec=AbstractSlide) + slide.page_elements_flat = elements + # Use the real markdown() method + slide.markdown = AbstractSlide.markdown.__get__(slide, type(slide)) + return slide + + +class TestExtractFontSizePt: + def test_none_styles(self): + assert _extract_font_size_pt(None) == 12.0 + + def test_empty_styles(self): + assert _extract_font_size_pt([]) == 12.0 + + def test_gslides_style(self): + style = MagicMock() + style.font_size_pt = 24.0 + assert _extract_font_size_pt([style]) == 24.0 + + def test_pptx_style(self): + fs = MagicMock() + fs.pt = 18.0 + style = {"font_size": fs} + assert _extract_font_size_pt([style]) == 18.0 + + def test_multiple_styles_returns_max(self): + s1 = MagicMock() + s1.font_size_pt = 14.0 + s2 = MagicMock() + s2.font_size_pt = 24.0 + assert _extract_font_size_pt([s1, s2]) == 24.0 + + def test_gslides_style_none_font_size(self): + style = MagicMock() + style.font_size_pt = None + assert _extract_font_size_pt([style]) == 12.0 + + +class TestExtractFontSizeFromTable: + def test_fallback_no_adapter_attributes(self): + elem = MagicMock(spec=AbstractTableElement) + elem.pptx_element = None + elem.gslides_element = None + assert _extract_font_size_from_table(elem) == 10.0 + + def test_no_attributes_at_all(self): + elem = MagicMock(spec=[]) + result = _extract_font_size_from_table(elem) + assert result == 10.0 + + +class TestSlideMarkdown: + def test_text_element(self): + shape = _make_shape_element( + title="Title", + text="# Quarterly Report", + x=0.5, + y=0.3, + w=9.0, + h=1.2, + font_size_pt=18.0, + ) + slide = _make_slide([shape]) + md = slide.markdown() + + assert "<!-- text: Title |" in md + assert "pos=(0.5,0.3)" in md + assert "size=(9.0,1.2)" in md + assert "chars -->" in md + assert "# Quarterly Report" in md + + def test_image_element(self): + img = _make_image_element( + title="Chart", + x=1.0, + y=3.0, + w=8.0, + h=4.0, + ) + slide = _make_slide([img]) + md = slide.markdown() + + assert "<!-- image: Chart |" in md + assert "pos=(1.0,3.0)" in md + assert "size=(8.0,4.0)" in md + + def test_table_element(self): + table = _make_table_element( + title="Data", + x=0.5, + y=7.5, + w=9.0, + h=2.0, + ) + slide = _make_slide([table]) + md = slide.markdown() + + assert "<!-- table: Data |" in md + assert "pos=(0.5,7.5)" in md + assert "size=(9.0,2.0)" in md + assert "chars/col -->" in md + assert "Metric" in md + assert "Revenue" in md + + def test_mixed_elements(self): + shape = _make_shape_element( + title="Title", + text="Hello", + x=0.5, + y=0.3, + w=9.0, + h=1.0, + ) + img = _make_image_element( + title="Chart", + x=1.0, + y=2.0, + w=8.0, + h=3.0, + ) + table = _make_table_element( + title="Data", + x=0.5, + y=6.0, + w=9.0, + h=2.0, + ) + slide = _make_slide([shape, img, table]) + md = slide.markdown() + + # Check all three elements are present, separated by double newlines + parts = md.split("\n\n") + assert len(parts) == 3 + assert "text: Title" in parts[0] + assert "image: Chart" in parts[1] + assert "table: Data" in parts[2] + + def test_unnamed_element_is_skipped(self): + shape = _make_shape_element(object_id="abc123", title=None) + slide = _make_slide([shape]) + md = slide.markdown() + + assert md == "" + + def test_empty_slide(self): + slide = _make_slide([]) + md = slide.markdown() + assert md == "" + + def test_unknown_element_type(self): + elem = MagicMock(spec=AbstractElement) + elem.objectId = "group1" + elem.alt_text = AbstractAltText(title="MyGroup") + elem.type = "GROUP" + elem.absolute_position.return_value = (0.0, 0.0) + elem.absolute_size.return_value = (10.0, 7.5) + + slide = _make_slide([elem]) + md = slide.markdown() + + assert "<!-- GROUP: MyGroup |" in md + assert "pos=(0.0,0.0)" in md + + def test_char_capacity_calculation(self): + """Verify that char capacity is included and reasonable.""" + shape = _make_shape_element( + title="Body", + text="Some text", + w=9.0, + h=5.0, + font_size_pt=12.0, + ) + slide = _make_slide([shape]) + md = slide.markdown() + + # Extract the char count from the markdown + import re + match = re.search(r"~(\d+) chars", md) + assert match is not None + chars = int(match.group(1)) + # For a 9x5 inch box with 12pt font, we expect a reasonable number + assert chars > 100 + assert chars < 10000 + + def test_table_col_chars_calculation(self): + """Verify per-column char capacity for tables.""" + table = _make_table_element( + title="BigTable", + w=9.0, + h=3.0, + headers=["A", "B", "C"], + rows=[["1", "2", "3"]], + ) + slide = _make_slide([table]) + md = slide.markdown() + + import re + match = re.search(r"~(\d+) chars/col", md) + assert match is not None + chars_per_col = int(match.group(1)) + # 9 inches / 3 cols = 3 inches per col, with 10pt font + assert chars_per_col > 10 + assert chars_per_col < 1000 + + def test_shape_without_text(self): + """Shape element without text should not appear as text type.""" + elem = MagicMock(spec=AbstractShapeElement) + elem.objectId = "empty_shape" + elem.alt_text = AbstractAltText(title="EmptyBox") + elem.type = "SHAPE" + type(elem).has_text = PropertyMock(return_value=False) + elem.absolute_position.return_value = (1.0, 1.0) + elem.absolute_size.return_value = (3.0, 2.0) + + slide = _make_slide([elem]) + md = slide.markdown() + + # Should fall through to the generic case since has_text is False + assert "<!-- SHAPE: EmptyBox |" in md + assert "text:" not in md + + def test_unnamed_elements_skipped_named_kept(self): + """Unnamed elements should be skipped, named ones kept.""" + named = _make_shape_element(title="Title", text="Hello") + unnamed = _make_shape_element(object_id="no_name", title=None, text="Hidden") + slide = _make_slide([named, unnamed]) + md = slide.markdown() + + assert "text: Title" in md + assert "no_name" not in md + assert "Hidden" not in md + + def test_alt_description_in_text_comment(self): + """Alt description should appear in text element comment.""" + shape = _make_shape_element( + title="Title", description="Main heading", text="Hello" + ) + slide = _make_slide([shape]) + md = slide.markdown() + + assert 'desc="Main heading"' in md + + def test_alt_description_in_image_comment(self): + """Alt description should appear in image element comment.""" + img = _make_image_element(title="Chart", description="Revenue chart") + slide = _make_slide([img]) + md = slide.markdown() + + assert 'desc="Revenue chart"' in md + + def test_alt_description_in_table_comment(self): + """Alt description should appear in table element comment.""" + table = _make_table_element(title="Data", description="Quarterly data") + slide = _make_slide([table]) + md = slide.markdown() + + assert 'desc="Quarterly data"' in md + + def test_no_description_no_desc_field(self): + """When no description, desc field should not appear.""" + shape = _make_shape_element(title="Title", description=None, text="Hello") + slide = _make_slide([shape]) + md = slide.markdown() + + assert "desc=" not in md diff --git a/tests/test_adapters/test_gslides_adapter_discriminated_union.py b/tests/test_adapters/test_gslides_adapter_discriminated_union.py new file mode 100644 index 0000000..10b59d3 --- /dev/null +++ b/tests/test_adapters/test_gslides_adapter_discriminated_union.py @@ -0,0 +1,410 @@ +"""Test discriminated union functionality for ConcreteElement in gslides_adapter.""" + +from unittest.mock import MagicMock, Mock + +import pytest +from pydantic import TypeAdapter, ValidationError + +from gslides_api.element.base import ElementKind +from gslides_api.element.element import PageElement +from gslides_api.element.image import ImageElement +from gslides_api.element.shape import ShapeElement +from gslides_api.element.table import TableElement + +from gslides_api.adapters.gslides_adapter import ( + GSlidesElement, + GSlidesElementParent, + GSlidesImageElement, + GSlidesShapeElement, + GSlidesTableElement, + concrete_element_discriminator, +) + + +class TestConcreteElementDiscriminator: + """Test the discriminator function for ConcreteElement.""" + + def test_discriminator_with_shape_element(self): + """Test discriminator correctly identifies shape elements.""" + mock_element = Mock() + mock_element.type = ElementKind.SHAPE + + result = concrete_element_discriminator(mock_element) + assert result == "shape" + + def test_discriminator_with_image_element(self): + """Test discriminator correctly identifies image elements.""" + mock_element = Mock() + mock_element.type = ElementKind.IMAGE + + result = concrete_element_discriminator(mock_element) + assert result == "image" + + def test_discriminator_with_table_element(self): + """Test discriminator correctly identifies table elements.""" + mock_element = Mock() + mock_element.type = ElementKind.TABLE + + result = concrete_element_discriminator(mock_element) + assert result == "table" + + def test_discriminator_with_enum_value(self): + """Test discriminator works with enum values that have .value attribute.""" + mock_element = Mock() + mock_enum = Mock() + mock_enum.value = "SHAPE" + mock_element.type = mock_enum + + result = concrete_element_discriminator(mock_element) + assert result == "shape" + + def test_discriminator_with_string_types(self): + """Test discriminator works with string type values.""" + mock_element = Mock() + mock_element.type = "shape" + + result = concrete_element_discriminator(mock_element) + assert result == "shape" + + mock_element.type = "IMAGE" + result = concrete_element_discriminator(mock_element) + assert result == "image" + + mock_element.type = "table" + result = concrete_element_discriminator(mock_element) + assert result == "table" + + def test_discriminator_with_invalid_type(self): + """Test discriminator returns 'generic' for unsupported types.""" + mock_element = Mock() + mock_element.type = "unsupported_type" + + result = concrete_element_discriminator(mock_element) + assert result == "generic" + + def test_discriminator_without_type_attribute(self): + """Test discriminator raises error when element has no type attribute.""" + mock_element = Mock(spec=[]) # Mock without type attribute + + with pytest.raises(ValueError, match="Cannot determine element type"): + concrete_element_discriminator(mock_element) + + +class TestConcreteElementValidation: + """Test ConcreteElement discriminated union validation.""" + + def setup_method(self): + """Set up TypeAdapter for ConcreteElement.""" + self.adapter = TypeAdapter(GSlidesElement) + + def create_mock_shape_element(self): + """Create a mock ShapeElement for testing.""" + mock_element = Mock(spec=ShapeElement) + mock_element.type = ElementKind.SHAPE + mock_element.objectId = "shape_123" + mock_element.presentation_id = "pres_123" + mock_element.slide_id = "slide_123" + # Create alt_text with string title and description, not Mock + alt_text_mock = Mock() + alt_text_mock.title = "Test Shape" + alt_text_mock.description = None + mock_element.alt_text = alt_text_mock + return mock_element + + def create_mock_image_element(self): + """Create a mock ImageElement for testing.""" + mock_element = Mock(spec=ImageElement) + mock_element.type = ElementKind.IMAGE + mock_element.objectId = "image_123" + mock_element.presentation_id = "pres_123" + mock_element.slide_id = "slide_123" + # Create alt_text with string title and description, not Mock + alt_text_mock = Mock() + alt_text_mock.title = "Test Image" + alt_text_mock.description = None + mock_element.alt_text = alt_text_mock + return mock_element + + def create_mock_table_element(self): + """Create a mock TableElement for testing.""" + mock_element = Mock(spec=TableElement) + mock_element.type = ElementKind.TABLE + mock_element.objectId = "table_123" + mock_element.presentation_id = "pres_123" + mock_element.slide_id = "slide_123" + # Create alt_text with string title and description, not Mock + alt_text_mock = Mock() + alt_text_mock.title = "Test Table" + alt_text_mock.description = None + mock_element.alt_text = alt_text_mock + return mock_element + + def test_concrete_element_validates_shape(self): + """Test that ConcreteElement validates and creates ConcreteShapeElement for shape elements.""" + mock_shape = self.create_mock_shape_element() + + result = self.adapter.validate_python(mock_shape) + + assert isinstance(result, GSlidesShapeElement) + assert result.objectId == "shape_123" + assert result.presentation_id == "pres_123" + assert result.slide_id == "slide_123" + assert result.alt_text.title == "Test Shape" + + def test_concrete_element_validates_image(self): + """Test that ConcreteElement validates and creates ConcreteImageElement for image elements.""" + mock_image = self.create_mock_image_element() + + result = self.adapter.validate_python(mock_image) + + assert isinstance(result, GSlidesImageElement) + assert result.objectId == "image_123" + assert result.presentation_id == "pres_123" + assert result.slide_id == "slide_123" + assert result.alt_text.title == "Test Image" + + def test_concrete_element_validates_table(self): + """Test that ConcreteElement validates and creates ConcreteTableElement for table elements.""" + mock_table = self.create_mock_table_element() + + result = self.adapter.validate_python(mock_table) + + assert isinstance(result, GSlidesTableElement) + assert result.objectId == "table_123" + assert result.presentation_id == "pres_123" + assert result.slide_id == "slide_123" + assert result.alt_text.title == "Test Table" + + def test_concrete_element_validation_unsupported_type(self): + """Test that ConcreteElement validation creates ConcreteGenericElement for unsupported element types.""" + mock_element = Mock() + mock_element.type = "unsupported_type" + mock_element.objectId = "unsupported_123" + mock_element.presentation_id = "pres_123" + mock_element.slide_id = "slide_123" + alt_text_mock = Mock() + alt_text_mock.title = "Unsupported Element" + alt_text_mock.description = None + mock_element.alt_text = alt_text_mock + + result = self.adapter.validate_python(mock_element) + + assert isinstance(result, GSlidesElementParent) + assert result.objectId == "unsupported_123" + assert result.presentation_id == "pres_123" + assert result.slide_id == "slide_123" + assert result.alt_text.title == "Unsupported Element" + + def test_concrete_element_validation_error_no_type(self): + """Test that ConcreteElement validation raises error when element has no type.""" + mock_element = Mock(spec=[]) # Mock without type attribute + + with pytest.raises(ValueError) as exc_info: + self.adapter.validate_python(mock_element) + + # Verify the error is related to discriminator + error_str = str(exc_info.value).lower() + assert "cannot determine element type" in error_str + + +class TestConcreteShapeElementValidator: + """Test ConcreteShapeElement validator functionality.""" + + def test_validator_with_shape_element(self): + """Test validator works correctly with ShapeElement.""" + mock_shape = Mock(spec=ShapeElement) + mock_shape.objectId = "shape_123" + mock_shape.presentation_id = "pres_123" + mock_shape.slide_id = "slide_123" + alt_text_mock = Mock() + alt_text_mock.title = "Test Shape" + alt_text_mock.description = None + mock_shape.alt_text = alt_text_mock + + # Test the validator method directly + result_data = GSlidesShapeElement.convert_from_page_element(mock_shape) + + assert result_data["objectId"] == "shape_123" + assert result_data["presentation_id"] == "pres_123" + assert result_data["slide_id"] == "slide_123" + assert result_data["gslides_element"] == mock_shape + + def test_validator_with_page_element_having_shape(self): + """Test validator works with PageElement that has shape attribute.""" + mock_page_element = Mock() + mock_page_element.shape = Mock() # Has shape attribute + mock_page_element.objectId = "shape_456" + mock_page_element.presentation_id = "pres_456" + mock_page_element.slide_id = "slide_456" + alt_text_mock = Mock() + alt_text_mock.title = "Page Element Shape" + alt_text_mock.description = None + mock_page_element.alt_text = alt_text_mock + + result_data = GSlidesShapeElement.convert_from_page_element(mock_page_element) + + assert result_data["objectId"] == "shape_456" + assert result_data["gslides_element"] == mock_page_element + + def test_validator_error_invalid_element(self): + """Test validator raises error for invalid element types.""" + # Create a mock without the 'shape' attribute using spec + mock_invalid = Mock(spec=["objectId", "presentation_id", "slide_id", "alt_text"]) + alt_text_mock = Mock() + alt_text_mock.title = "Invalid Element" + alt_text_mock.description = None + mock_invalid.alt_text = alt_text_mock + mock_invalid.objectId = "invalid_123" + mock_invalid.presentation_id = "pres_123" + mock_invalid.slide_id = "slide_123" + + with pytest.raises(ValueError, match="Expected ShapeElement or PageElement with shape"): + GSlidesShapeElement.convert_from_page_element(mock_invalid) + + +class TestConcreteImageElementValidator: + """Test ConcreteImageElement validator functionality.""" + + def test_validator_with_image_element(self): + """Test validator works correctly with ImageElement.""" + mock_image = Mock(spec=ImageElement) + mock_image.objectId = "image_123" + mock_image.presentation_id = "pres_123" + mock_image.slide_id = "slide_123" + alt_text_mock = Mock() + alt_text_mock.title = "Test Image" + alt_text_mock.description = None + mock_image.alt_text = alt_text_mock + + result_data = GSlidesImageElement.convert_from_page_element(mock_image) + + assert result_data["objectId"] == "image_123" + assert result_data["presentation_id"] == "pres_123" + assert result_data["slide_id"] == "slide_123" + assert result_data["gslides_element"] == mock_image + + def test_validator_with_page_element_having_image(self): + """Test validator works with PageElement that has image attribute.""" + mock_page_element = Mock() + mock_page_element.image = Mock() # Has image attribute + mock_page_element.objectId = "image_456" + mock_page_element.presentation_id = "pres_456" + mock_page_element.slide_id = "slide_456" + alt_text_mock = Mock() + alt_text_mock.title = "Page Element Image" + alt_text_mock.description = None + mock_page_element.alt_text = alt_text_mock + + result_data = GSlidesImageElement.convert_from_page_element(mock_page_element) + + assert result_data["objectId"] == "image_456" + assert result_data["gslides_element"] == mock_page_element + + +class TestConcreteTableElementValidator: + """Test ConcreteTableElement validator functionality.""" + + def test_validator_with_table_element(self): + """Test validator works correctly with TableElement.""" + mock_table = Mock(spec=TableElement) + mock_table.objectId = "table_123" + mock_table.presentation_id = "pres_123" + mock_table.slide_id = "slide_123" + alt_text_mock = Mock() + alt_text_mock.title = "Test Table" + alt_text_mock.description = None + mock_table.alt_text = alt_text_mock + + result_data = GSlidesTableElement.convert_from_page_element(mock_table) + + assert result_data["objectId"] == "table_123" + assert result_data["presentation_id"] == "pres_123" + assert result_data["slide_id"] == "slide_123" + assert result_data["gslides_element"] == mock_table + + def test_validator_with_page_element_having_table(self): + """Test validator works with PageElement that has table attribute.""" + mock_page_element = Mock() + mock_page_element.table = Mock() # Has table attribute + mock_page_element.objectId = "table_456" + mock_page_element.presentation_id = "pres_456" + mock_page_element.slide_id = "slide_456" + alt_text_mock = Mock() + alt_text_mock.title = "Page Element Table" + alt_text_mock.description = None + mock_page_element.alt_text = alt_text_mock + + result_data = GSlidesTableElement.convert_from_page_element(mock_page_element) + + assert result_data["objectId"] == "table_456" + assert result_data["gslides_element"] == mock_page_element + + +class TestConcreteElementIntegration: + """Integration tests for ConcreteElement discriminated union.""" + + def setup_method(self): + """Set up TypeAdapter for ConcreteElement.""" + self.adapter = TypeAdapter(GSlidesElement) + + def test_end_to_end_shape_validation(self): + """Test complete validation flow for shape elements.""" + mock_shape = Mock(spec=ShapeElement) + mock_shape.type = ElementKind.SHAPE + mock_shape.objectId = "shape_end_to_end" + mock_shape.presentation_id = "pres_end_to_end" + mock_shape.slide_id = "slide_end_to_end" + alt_text_mock = Mock() + alt_text_mock.title = "End to End Shape" + alt_text_mock.description = None + mock_shape.alt_text = alt_text_mock + + # This tests the full discriminated union pipeline + concrete_element = self.adapter.validate_python(mock_shape) + + # Verify it's the right type + assert isinstance(concrete_element, GSlidesShapeElement) + + # Verify properties are correctly set + assert concrete_element.objectId == "shape_end_to_end" + assert concrete_element.presentation_id == "pres_end_to_end" + assert concrete_element.slide_id == "slide_end_to_end" + assert concrete_element.alt_text.title == "End to End Shape" + + # Verify the internal gslides element is preserved + assert concrete_element.gslides_element == mock_shape + + def test_type_preservation_across_validation(self): + """Test that the discriminated union preserves type information correctly.""" + # Create elements of different types + mock_shape = Mock(spec=ShapeElement) + mock_shape.type = ElementKind.SHAPE + mock_shape.objectId = "shape_type_test" + mock_shape.presentation_id = "pres_type_test" + mock_shape.slide_id = "slide_type_test" + alt_text_mock_shape = Mock() + alt_text_mock_shape.title = "Shape Type Test" + alt_text_mock_shape.description = None + mock_shape.alt_text = alt_text_mock_shape + + mock_image = Mock(spec=ImageElement) + mock_image.type = ElementKind.IMAGE + mock_image.objectId = "image_type_test" + mock_image.presentation_id = "pres_type_test" + mock_image.slide_id = "slide_type_test" + alt_text_mock_image = Mock() + alt_text_mock_image.title = "Image Type Test" + alt_text_mock_image.description = None + mock_image.alt_text = alt_text_mock_image + + # Validate both + shape_result = self.adapter.validate_python(mock_shape) + image_result = self.adapter.validate_python(mock_image) + + # Verify correct types were created + assert isinstance(shape_result, GSlidesShapeElement) + assert isinstance(image_result, GSlidesImageElement) + + # Verify they maintain their type information + assert shape_result.type == "SHAPE" # From AbstractShapeElement + assert image_result.type == "IMAGE" # From AbstractImageElement diff --git a/tests/test_adapters/test_gslides_adapter_write_text.py b/tests/test_adapters/test_gslides_adapter_write_text.py new file mode 100644 index 0000000..ee33788 --- /dev/null +++ b/tests/test_adapters/test_gslides_adapter_write_text.py @@ -0,0 +1,93 @@ +"""Tests for GSlidesShapeElement.write_text style extraction behavior.""" + +from unittest.mock import Mock, call + +import pytest + +from gslides_api.domain.text import TextStyle +from gslides_api.element.shape import ShapeElement + +from gslides_api.adapters.gslides_adapter import GSlidesShapeElement + + +def _make_shape_element(styles_return=None): + """Create a mock ShapeElement with configurable styles return value.""" + mock_shape = Mock(spec=ShapeElement) + mock_shape.objectId = "shape_123" + mock_shape.presentation_id = "pres_123" + mock_shape.slide_id = "slide_123" + alt_text_mock = Mock() + alt_text_mock.title = "Title" + alt_text_mock.description = None + mock_shape.alt_text = alt_text_mock + mock_shape.styles.return_value = styles_return + mock_shape.write_text.return_value = None + return mock_shape + + +def _make_api_client(): + """Create a mock GSlidesAPIClient.""" + mock_api_client = Mock() + mock_api_client.gslides_client = Mock() + return mock_api_client + + +class TestWriteTextSkipsWhitespaceStyles: + """Test that write_text uses skip_whitespace=True to avoid invisible spacer styles.""" + + def test_write_text_calls_styles_with_skip_whitespace_true(self): + """write_text should call styles(skip_whitespace=True) to avoid picking up + invisible spacer styles like white theme colors from whitespace-only runs.""" + mock_shape = _make_shape_element(styles_return=[Mock(spec=TextStyle)]) + element = GSlidesShapeElement.model_validate(mock_shape) + api_client = _make_api_client() + + element.write_text(api_client=api_client, content="Hello") + + mock_shape.styles.assert_called_once_with(skip_whitespace=True) + + def test_write_text_passes_extracted_styles_to_underlying_write(self): + """write_text should pass the extracted styles to the underlying gslides_element.write_text.""" + mock_style = Mock(spec=TextStyle) + mock_shape = _make_shape_element(styles_return=[mock_style]) + element = GSlidesShapeElement.model_validate(mock_shape) + api_client = _make_api_client() + + element.write_text(api_client=api_client, content="Hello", autoscale=True) + + mock_shape.write_text.assert_called_once_with( + "Hello", + autoscale=True, + styles=[mock_style], + api_client=api_client.gslides_client, + ) + + def test_write_text_handles_none_styles(self): + """write_text should handle styles() returning None gracefully.""" + mock_shape = _make_shape_element(styles_return=None) + element = GSlidesShapeElement.model_validate(mock_shape) + api_client = _make_api_client() + + element.write_text(api_client=api_client, content="Hello") + + mock_shape.write_text.assert_called_once_with( + "Hello", + autoscale=False, + styles=None, + api_client=api_client.gslides_client, + ) + + def test_write_text_handles_empty_styles_list(self): + """write_text should handle styles() returning an empty list.""" + mock_shape = _make_shape_element(styles_return=[]) + element = GSlidesShapeElement.model_validate(mock_shape) + api_client = _make_api_client() + + element.write_text(api_client=api_client, content="Hello") + + mock_shape.write_text.assert_called_once_with( + "Hello", + autoscale=False, + styles=[], + api_client=api_client.gslides_client, + ) diff --git a/tests/test_page_element.py b/tests/test_page_element.py index 1806bf7..4daed24 100644 --- a/tests/test_page_element.py +++ b/tests/test_page_element.py @@ -280,7 +280,7 @@ def test_absolute_size_invalid_units(): def test_absolute_size_no_size(): - """Test absolute_size method when size is None.""" + """Test absolute_size method when size is None returns None.""" element = ShapeElement( objectId="test_id", size=None, @@ -288,8 +288,7 @@ def test_absolute_size_no_size(): shape=Shape(shapeType=Type.RECTANGLE, shapeProperties=ShapeProperties()), ) - with pytest.raises(ValueError, match="Element size is not available"): - element.absolute_size(OutputUnit.CM) + assert element.absolute_size(OutputUnit.CM) is None def test_alt_text_property(): diff --git a/tests/test_pptx/Samplead Master Deck Template.pptx b/tests/test_pptx/Samplead Master Deck Template.pptx new file mode 100644 index 0000000..39bbcda Binary files /dev/null and b/tests/test_pptx/Samplead Master Deck Template.pptx differ diff --git a/tests/test_pptx/__init__.py b/tests/test_pptx/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_pptx/test_markdown_to_pptx.py b/tests/test_pptx/test_markdown_to_pptx.py new file mode 100644 index 0000000..d4a7783 --- /dev/null +++ b/tests/test_pptx/test_markdown_to_pptx.py @@ -0,0 +1,641 @@ +""" +Test suite for markdown_to_pptx module. + +This module tests the conversion of markdown to PowerPoint text frames, +specifically focusing on the _apply_style_to_run function and font size handling. +""" + +import pytest +from unittest.mock import Mock, MagicMock + +from gslides_api.agnostic.text import ( + FullTextStyle, + MarkdownRenderableStyle, + RichStyle, + AbstractColor, +) + +from gslides_api.pptx.markdown_to_pptx import _apply_style_to_run +from gslides_api.adapters.pptx_adapter import _extract_base_style_from_textframe + + +class TestApplyStyleToRunFontSize: + """Test font size handling in _apply_style_to_run.""" + + def create_mock_run(self): + """Create a mock run with font properties.""" + mock_run = Mock() + mock_font = Mock() + mock_font.bold = None + mock_font.italic = None + mock_font.underline = None + mock_font.strikethrough = None + mock_font.name = None + mock_font.size = None + mock_hyperlink = Mock() + mock_hyperlink.address = None + mock_run.font = mock_font + mock_run.hyperlink = mock_hyperlink + return mock_run + + def test_font_size_applied_when_set(self): + """Test that font size is applied when font_size_pt is set.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + rich=RichStyle(font_size_pt=12.0) + ) + + _apply_style_to_run(mock_run, style) + + # Font size should be set + assert mock_run.font.size is not None + # The size should be set to Pt(12.0) + assert mock_run.font.size.pt == 12.0 + + def test_font_size_not_applied_when_not_set(self): + """Test that no font size is applied when font_size_pt is None.""" + mock_run = self.create_mock_run() + + style = FullTextStyle() + + _apply_style_to_run(mock_run, style) + + # Font size should NOT be set + assert mock_run.font.size is None + + def test_font_size_with_different_pt_values(self): + """Test various PT font size values are correctly applied.""" + test_sizes = [8.0, 10.0, 12.0, 14.0, 18.0, 24.0, 36.0, 48.0] + + for size in test_sizes: + mock_run = self.create_mock_run() + style = FullTextStyle( + rich=RichStyle(font_size_pt=size) + ) + + _apply_style_to_run(mock_run, style) + + assert mock_run.font.size is not None + assert mock_run.font.size.pt == size + + +class TestApplyStyleToRunOtherProperties: + """Test other style properties in _apply_style_to_run.""" + + def create_mock_run(self): + """Create a mock run with font properties.""" + mock_run = Mock() + mock_font = Mock() + mock_font.bold = None + mock_font.italic = None + mock_font.underline = None + mock_font.strikethrough = None + mock_font.name = None + mock_font.size = None + mock_color = Mock() + mock_color.rgb = None + mock_font.color = mock_color + mock_hyperlink = Mock() + mock_hyperlink.address = None + mock_run.font = mock_font + mock_run.hyperlink = mock_hyperlink + return mock_run + + def test_bold_applied(self): + """Test that bold is applied correctly.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=True) + ) + + _apply_style_to_run(mock_run, style) + + assert mock_run.font.bold is True + + def test_italic_applied(self): + """Test that italic is applied correctly.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + markdown=MarkdownRenderableStyle(italic=True) + ) + + _apply_style_to_run(mock_run, style) + + assert mock_run.font.italic is True + + def test_underline_applied(self): + """Test that underline is applied correctly.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + rich=RichStyle(underline=True) + ) + + _apply_style_to_run(mock_run, style) + + assert mock_run.font.underline is True + + def test_strikethrough_applied(self): + """Test that strikethrough is applied correctly.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + markdown=MarkdownRenderableStyle(strikethrough=True) + ) + + _apply_style_to_run(mock_run, style) + + assert mock_run.font.strikethrough is True + + def test_font_family_applied(self): + """Test that font family is applied correctly.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + rich=RichStyle(font_family="Arial") + ) + + _apply_style_to_run(mock_run, style) + + assert mock_run.font.name == "Arial" + + def test_hyperlink_applied(self): + """Test that hyperlink is applied correctly.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + markdown=MarkdownRenderableStyle(hyperlink="https://example.com") + ) + + _apply_style_to_run(mock_run, style) + + assert mock_run.hyperlink.address == "https://example.com" + + def test_foreground_color_applied(self): + """Test that foreground color is applied correctly.""" + mock_run = self.create_mock_run() + + style = FullTextStyle( + rich=RichStyle( + foreground_color=AbstractColor(red=1.0, green=0.0, blue=0.0) + ) + ) + + _apply_style_to_run(mock_run, style) + + # RGBColor should have been called with the converted tuple + assert mock_run.font.color.rgb is not None + + def test_none_style_is_handled(self): + """Test that None style doesn't raise an error.""" + mock_run = self.create_mock_run() + + # This should not raise an error + _apply_style_to_run(mock_run, None) + + # No properties should be modified + assert mock_run.font.bold is None + assert mock_run.font.italic is None + + def test_empty_style_is_handled(self): + """Test that empty style explicitly sets bold/italic to False. + + Bold and italic are always explicitly set (True or False) to prevent + inheritance from defRPr (default run properties). If defRPr has bold=True, + text would appear bold unless we explicitly set bold=False. + """ + mock_run = self.create_mock_run() + + style = FullTextStyle() + + _apply_style_to_run(mock_run, style) + + # Bold and italic should be explicitly False (not None) to prevent inheritance + assert mock_run.font.bold is False + assert mock_run.font.italic is False + # Other properties should remain unmodified + assert mock_run.font.size is None + + +class TestExtractBaseStyleFromTextframe: + """Test _extract_base_style_from_textframe function.""" + + def create_mock_text_frame( + self, + text: str = "Hello", + bold: bool = False, + italic: bool = False, + font_name: str = None, + font_size_pt: float = None, + color_rgb: tuple = None, + underline: bool = False, + ): + """Create a mock text frame with configurable properties.""" + mock_text_frame = Mock() + mock_paragraph = Mock() + mock_run = Mock() + + # Set run text + mock_run.text = text + + # Set font properties + mock_font = Mock() + mock_font.bold = bold + mock_font.italic = italic + mock_font.name = font_name + mock_font.underline = underline + + # Set font size + if font_size_pt is not None: + mock_size = Mock() + mock_size.pt = font_size_pt + mock_font.size = mock_size + else: + mock_font.size = None + + # Set color + from pptx.enum.dml import MSO_COLOR_TYPE + + mock_color = Mock() + if color_rgb is not None: + mock_color.type = MSO_COLOR_TYPE.RGB + mock_rgb = Mock() + mock_rgb.__getitem__ = lambda self, idx: color_rgb[idx] + mock_color.rgb = mock_rgb + else: + mock_color.type = None + mock_color.rgb = None + mock_font.color = mock_color + + mock_run.font = mock_font + mock_paragraph.runs = [mock_run] + mock_text_frame.paragraphs = [mock_paragraph] + + return mock_text_frame + + def test_extract_basic_style(self): + """Test extracting basic style from text frame. + + Note: bold/italic are NOT extracted from base_style. They are always + False because markdown content should control bold/italic formatting. + This prevents issues like a bold header making ALL text bold. + """ + text_frame = self.create_mock_text_frame( + text="Hello", + font_size_pt=24.0, + font_name="Arial", + bold=True, # This is intentionally ignored + italic=False, + ) + + style = _extract_base_style_from_textframe(text_frame) + + assert style is not None + assert style.rich.font_size_pt == 24.0 + assert style.rich.font_family == "Arial" + # Bold/italic are always False in base_style - markdown controls these + assert style.markdown.bold is False + assert style.markdown.italic is False + + def test_extract_color(self): + """Test extracting color from text frame.""" + text_frame = self.create_mock_text_frame( + text="Colored text", + color_rgb=(128, 0, 255), # Purple + ) + + style = _extract_base_style_from_textframe(text_frame) + + assert style is not None + assert style.rich.foreground_color is not None + # AbstractColor uses 0.0-1.0 scale + assert abs(style.rich.foreground_color.red - 128 / 255) < 0.01 + assert style.rich.foreground_color.green == 0.0 + assert abs(style.rich.foreground_color.blue - 1.0) < 0.01 + + def test_extract_underline(self): + """Test extracting underline from text frame.""" + text_frame = self.create_mock_text_frame( + text="Underlined", + underline=True, + ) + + style = _extract_base_style_from_textframe(text_frame) + + assert style is not None + assert style.rich.underline is True + + def test_empty_text_frame_returns_none(self): + """Test that empty text frame returns None.""" + mock_text_frame = Mock() + mock_paragraph = Mock() + mock_run = Mock() + mock_run.text = " " # Whitespace only + mock_paragraph.runs = [mock_run] + mock_text_frame.paragraphs = [mock_paragraph] + + style = _extract_base_style_from_textframe(mock_text_frame) + + assert style is None + + def test_no_runs_returns_none(self): + """Test that text frame with no runs returns None.""" + mock_text_frame = Mock() + mock_paragraph = Mock() + mock_paragraph.runs = [] + mock_text_frame.paragraphs = [mock_paragraph] + + style = _extract_base_style_from_textframe(mock_text_frame) + + assert style is None + + def test_extracts_from_first_non_empty_run(self): + """Test that style is extracted from first non-whitespace run. + + Note: bold/italic are NOT extracted from base_style - they are always + False because markdown content should control bold/italic formatting. + Only RichStyle properties (font, size, color) are extracted. + """ + mock_text_frame = Mock() + mock_paragraph = Mock() + + # First run is whitespace + mock_run1 = Mock() + mock_run1.text = " " + + # Second run has content and style + mock_run2 = Mock() + mock_run2.text = "Content" + mock_font2 = Mock() + mock_font2.bold = True # Intentionally ignored + mock_font2.italic = True # Intentionally ignored + mock_font2.name = "Times" + mock_font2.underline = False + mock_size2 = Mock() + mock_size2.pt = 18.0 + mock_font2.size = mock_size2 + mock_color2 = Mock() + mock_color2.type = None + mock_font2.color = mock_color2 + mock_run2.font = mock_font2 + + mock_paragraph.runs = [mock_run1, mock_run2] + mock_text_frame.paragraphs = [mock_paragraph] + + style = _extract_base_style_from_textframe(mock_text_frame) + + assert style is not None + # Bold/italic are always False in base_style - markdown controls these + assert style.markdown.bold is False + assert style.markdown.italic is False + # RichStyle properties ARE extracted + assert style.rich.font_family == "Times" + assert style.rich.font_size_pt == 18.0 + + +class TestApplyMarkdownToTextframeWithAutoscale: + """Test apply_markdown_to_textframe with autoscale=True. + + These tests ensure that bold/italic formatting set by markdown parsing + is preserved after fit_text() is called. The fit_text() API in python-pptx + sets bold/italic on ALL runs, which would break mixed formatting like + "**bold** and regular text". We save and restore per-run styles to fix this. + """ + + def test_autoscale_preserves_mixed_bold_italic(self, tmp_path): + """Test that autoscale preserves mixed bold/italic formatting. + + Markdown: **Gabrielle** is bold, rest is not. + After autoscale with fit_text(), bold should still be preserved per-run. + """ + from pptx import Presentation + from pptx.util import Inches + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + # Create a test presentation with a text box + prs = Presentation() + blank_layout = prs.slide_layouts[6] # blank layout + slide = prs.slides.add_slide(blank_layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(3), Inches(2)) + tf = textbox.text_frame + + # Set initial text (will be replaced) + tf.text = "Placeholder" + + # Apply markdown with mixed bold - WITH autoscale + markdown = "**Gabrielle Aura**\n- Approval rate 28%\n- **17 Booked meetings**\n- Other text" + base_style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=False, italic=False), + rich=RichStyle(font_size_pt=15.0, font_family="Arial"), + ) + apply_markdown_to_textframe(markdown, tf, base_style=base_style, autoscale=True) + + # Verify bold is preserved per-run + # Para 0: "Gabrielle Aura" should be bold + assert len(tf.paragraphs) >= 4 + para0 = tf.paragraphs[0] + assert len(para0.runs) >= 1 + assert para0.runs[0].font.bold is True, "Gabrielle Aura should be bold" + + # Para 1: "Approval rate 28%" should NOT be bold + para1 = tf.paragraphs[1] + assert len(para1.runs) >= 1 + assert para1.runs[0].font.bold is False, "Approval rate should not be bold" + + # Para 2: "17 Booked meetings" should be bold + para2 = tf.paragraphs[2] + assert len(para2.runs) >= 1 + assert para2.runs[0].font.bold is True, "17 Booked meetings should be bold" + + # Para 3: "Other text" should NOT be bold + para3 = tf.paragraphs[3] + assert len(para3.runs) >= 1 + assert para3.runs[0].font.bold is False, "Other text should not be bold" + + def test_autoscale_preserves_italic(self, tmp_path): + """Test that autoscale preserves italic formatting.""" + from pptx import Presentation + from pptx.util import Inches + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(3), Inches(2)) + tf = textbox.text_frame + tf.text = "Placeholder" + + # Apply markdown with italic - double newline creates separate paragraphs + # Note: marko parser creates 3 paragraphs: italic, empty, regular + markdown = "*italic text*\n\nregular text" + base_style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=False, italic=False), + rich=RichStyle(font_size_pt=12.0), + ) + apply_markdown_to_textframe(markdown, tf, base_style=base_style, autoscale=True) + + # Find paragraphs with content + paras_with_content = [p for p in tf.paragraphs if p.runs] + assert len(paras_with_content) >= 2 + + # First content para: "italic text" should be italic + assert paras_with_content[0].runs[0].font.italic is True, "italic text should be italic" + + # Second content para: "regular text" should NOT be italic + assert paras_with_content[1].runs[0].font.italic is False, "regular text should not be italic" + + def test_autoscale_without_autoscale_flag(self, tmp_path): + """Test that without autoscale=True, bold/italic are also correct. + + This is a baseline test - formatting should work correctly regardless. + """ + from pptx import Presentation + from pptx.util import Inches + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(3), Inches(2)) + tf = textbox.text_frame + tf.text = "Placeholder" + + # Apply markdown WITHOUT autoscale - double newline for separate paragraphs + # Note: marko parser may create 3 paragraphs: bold, empty, regular + markdown = "**Bold**\n\nRegular" + base_style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=False, italic=False), + ) + apply_markdown_to_textframe(markdown, tf, base_style=base_style, autoscale=False) + + # Find paragraphs with content + paras_with_content = [p for p in tf.paragraphs if p.runs] + assert len(paras_with_content) >= 2 + + # First content para: "Bold" should be bold + assert paras_with_content[0].runs[0].font.bold is True + + # Second content para: "Regular" should NOT be bold + assert paras_with_content[1].runs[0].font.bold is False + + +class TestSoftLineBreakHandling: + """Test handling of soft line breaks (vertical tab) in text.""" + + def test_soft_line_break_creates_proper_xml_break(self, tmp_path): + """Test that \\x0b (vertical tab) creates proper <a:br> element.""" + from pptx import Presentation + from pptx.util import Inches + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(3), Inches(2)) + tf = textbox.text_frame + tf.text = "" + + # Apply text with soft line break (vertical tab) + text_with_break = "Your results\x0b(Samplead)" + base_style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=False, italic=False), + rich=RichStyle(font_size_pt=12.0), + ) + apply_markdown_to_textframe(text_with_break, tf, base_style=base_style) + + # Verify the result - should have two runs with a break between + para = tf.paragraphs[0] + assert len(para.runs) == 2, "Should have two runs separated by line break" + assert para.runs[0].text == "Your results" + assert para.runs[1].text == "(Samplead)" + + # Verify paragraph text reads back with \x0b (not _x000B_) + assert para.text == "Your results\x0b(Samplead)", "Paragraph text should contain soft line break" + + def test_escaped_x000b_is_converted_to_break(self, tmp_path): + """Test that literal '_x000B_' string is converted to proper break.""" + from pptx import Presentation + from pptx.util import Inches + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(3), Inches(2)) + tf = textbox.text_frame + tf.text = "" + + # Apply text with escaped soft line break (this is what happens if + # text was previously corrupted by python-pptx) + text_with_escaped_break = "Your results_x000B_(Samplead)" + base_style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=False, italic=False), + rich=RichStyle(font_size_pt=12.0), + ) + apply_markdown_to_textframe(text_with_escaped_break, tf, base_style=base_style) + + # Verify the result - should NOT contain literal _x000B_ + para = tf.paragraphs[0] + full_text = "".join(run.text for run in para.runs) + assert "_x000B_" not in full_text, "Literal _x000B_ should be converted to break" + + # Should have proper line break + assert para.text == "Your results\x0b(Samplead)", "Should have proper soft line break" + + def test_multiple_soft_line_breaks(self, tmp_path): + """Test handling of multiple soft line breaks in text.""" + from pptx import Presentation + from pptx.util import Inches + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(3), Inches(2)) + tf = textbox.text_frame + tf.text = "" + + # Apply text with multiple soft line breaks + text_with_breaks = "Line 1\x0bLine 2\x0bLine 3" + base_style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=False, italic=False), + rich=RichStyle(font_size_pt=12.0), + ) + apply_markdown_to_textframe(text_with_breaks, tf, base_style=base_style) + + # Verify the result - should have three runs + para = tf.paragraphs[0] + assert len(para.runs) == 3, "Should have three runs separated by line breaks" + assert para.runs[0].text == "Line 1" + assert para.runs[1].text == "Line 2" + assert para.runs[2].text == "Line 3" + + def test_soft_line_break_with_bold_formatting(self, tmp_path): + """Test that formatting is preserved across soft line breaks.""" + from pptx import Presentation + from pptx.util import Inches + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(3), Inches(2)) + tf = textbox.text_frame + tf.text = "" + + # Apply bold text with soft line break + text_with_break = "**Line 1\x0bLine 2**" + base_style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=False, italic=False), + rich=RichStyle(font_size_pt=12.0), + ) + apply_markdown_to_textframe(text_with_break, tf, base_style=base_style) + + # Verify all runs are bold + para = tf.paragraphs[0] + for run in para.runs: + if run.text.strip(): # Skip empty runs + assert run.font.bold is True, f"Run '{run.text}' should be bold" diff --git a/tests/test_pptx/test_pptx_adapter.py b/tests/test_pptx/test_pptx_adapter.py new file mode 100644 index 0000000..cbef3fd --- /dev/null +++ b/tests/test_pptx/test_pptx_adapter.py @@ -0,0 +1,656 @@ +"""Test PowerPoint adapter implementation.""" + +import os +import tempfile +from unittest.mock import MagicMock, Mock, patch + +import pytest +from pptx import Presentation +from pptx.enum.shapes import MSO_SHAPE_TYPE +from pptx.shapes.autoshape import Shape +from pptx.shapes.graphfrm import GraphicFrame +from pptx.shapes.picture import Picture +from pptx.slide import Slide + +from gslides_api.agnostic.units import OutputUnit + +from gslides_api.adapters.abstract_slides import AbstractAltText, AbstractThumbnail +from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointElementParent, + PowerPointImageElement, + PowerPointPresentation, + PowerPointShapeElement, + PowerPointSlide, + PowerPointSpeakerNotes, + PowerPointTableElement, + pptx_element_discriminator, + validate_pptx_element, +) + + +class TestPowerPointAPIClient: + """Test PowerPoint API client implementation.""" + + def test_init(self): + """Test API client initialization.""" + client = PowerPointAPIClient() + assert client.auto_flush is True + + def test_auto_flush_property(self): + """Test auto_flush property getter and setter.""" + client = PowerPointAPIClient() + assert client.auto_flush is True + + client.auto_flush = False + assert client.auto_flush is False + + def test_flush_batch_update(self): + """Test flush_batch_update does nothing (no-op for filesystem).""" + client = PowerPointAPIClient() + # Should not raise any exception + client.flush_batch_update() + + def test_copy_presentation(self): + """Test presentation copying.""" + client = PowerPointAPIClient() + + with tempfile.TemporaryDirectory() as temp_dir: + # Create a dummy presentation file + source_path = os.path.join(temp_dir, "source.pptx") + with open(source_path, "w") as f: + f.write("dummy content") + + # Test copying to same folder + result = client.copy_presentation(source_path, "copy_title") + + assert result["name"] == "copy_title" + assert result["id"].endswith("copy_title.pptx") + assert os.path.exists(result["id"]) + + def test_copy_presentation_with_folder(self): + """Test presentation copying to specific folder.""" + client = PowerPointAPIClient() + + with tempfile.TemporaryDirectory() as temp_dir: + source_path = os.path.join(temp_dir, "source.pptx") + dest_folder = os.path.join(temp_dir, "dest") + os.makedirs(dest_folder) + + with open(source_path, "w") as f: + f.write("dummy content") + + result = client.copy_presentation(source_path, "copy_title", dest_folder) + + assert result["name"] == "copy_title" + assert result["parents"] == [dest_folder] + assert os.path.exists(result["id"]) + + def test_copy_presentation_file_not_found(self): + """Test copying non-existent presentation raises error.""" + client = PowerPointAPIClient() + + with pytest.raises(FileNotFoundError): + client.copy_presentation("non_existent.pptx", "copy_title") + + def test_create_folder(self): + """Test folder creation.""" + client = PowerPointAPIClient() + + with tempfile.TemporaryDirectory() as temp_dir: + folder_name = "test_folder" + result = client.create_folder(folder_name, parent_folder_id=temp_dir) + + expected_path = os.path.join(temp_dir, folder_name) + assert result["id"] == expected_path + assert result["name"] == folder_name + assert result["parents"] == [temp_dir] + assert os.path.exists(expected_path) + + def test_create_folder_ignore_existing(self): + """Test folder creation with ignore_existing=True.""" + client = PowerPointAPIClient() + + with tempfile.TemporaryDirectory() as temp_dir: + folder_name = "test_folder" + folder_path = os.path.join(temp_dir, folder_name) + os.makedirs(folder_path) # Create folder first + + # Should not raise error + result = client.create_folder(folder_name, parent_folder_id=temp_dir) + assert result["id"] == folder_path + + def test_delete_file(self): + """Test file deletion.""" + client = PowerPointAPIClient() + + with tempfile.TemporaryDirectory() as temp_dir: + test_file = os.path.join(temp_dir, "test.pptx") + with open(test_file, "w") as f: + f.write("test content") + + assert os.path.exists(test_file) + client.delete_file(test_file) + assert not os.path.exists(test_file) + + def test_delete_nonexistent_file(self): + """Test deleting non-existent file doesn't raise error.""" + client = PowerPointAPIClient() + # Should not raise any exception + client.delete_file("non_existent_file.pptx") + + def test_credentials_methods(self): + """Test credential methods (no-op for filesystem).""" + client = PowerPointAPIClient() + + # Should not raise errors + client.set_credentials(None) + assert client.get_credentials() is None + + @patch("gslides_api.adapters.pptx_adapter.Presentation") + def test_replace_text(self, mock_presentation_class): + """Test text replacement in slides.""" + client = PowerPointAPIClient() + + # Mock presentation and slides + mock_prs = Mock() + mock_slide = Mock() + mock_shape = Mock() + mock_text_frame = Mock() + mock_paragraph = Mock() + mock_run = Mock() + + mock_run.text = "Hello World" + mock_paragraph.runs = [mock_run] + mock_text_frame.paragraphs = [mock_paragraph] + mock_shape.text_frame = mock_text_frame + mock_slide.shapes = [mock_shape] + mock_prs.slides = [mock_slide] + + mock_presentation_class.return_value = mock_prs + + with tempfile.NamedTemporaryFile(suffix=".pptx") as temp_file: + client.replace_text(["0"], "Hello", "Hi", temp_file.name) + + # Verify text was replaced + assert mock_run.text == "Hi World" + mock_prs.save.assert_called_once_with(temp_file.name) + + def test_get_default_api_client(self): + """Test getting default API client.""" + client = PowerPointAPIClient.get_default_api_client() + assert isinstance(client, PowerPointAPIClient) + + +class TestPowerPointSpeakerNotes: + """Test PowerPoint speaker notes implementation.""" + + def test_init(self): + """Test speaker notes initialization.""" + mock_notes_slide = Mock() + notes = PowerPointSpeakerNotes(mock_notes_slide) + assert notes.notes_slide == mock_notes_slide + + def test_read_text_empty(self): + """Test reading text from empty notes.""" + mock_notes_slide = Mock() + mock_notes_slide.notes_text_frame = None + + notes = PowerPointSpeakerNotes(mock_notes_slide) + assert notes.read_text() == "" + + def test_read_text_with_content(self): + """Test reading text from notes with content.""" + mock_notes_slide = Mock() + mock_text_frame = Mock() + mock_text_frame.text = "Speaker notes content" + mock_notes_slide.notes_text_frame = mock_text_frame + + notes = PowerPointSpeakerNotes(mock_notes_slide) + assert notes.read_text(as_markdown=False) == "Speaker notes content" + + def test_write_text(self): + """Test writing text to speaker notes.""" + mock_notes_slide = Mock() + mock_text_frame = Mock() + mock_notes_slide.notes_text_frame = mock_text_frame + + notes = PowerPointSpeakerNotes(mock_notes_slide) + api_client = PowerPointAPIClient() + + notes.write_text(api_client, "New content") + + mock_text_frame.clear.assert_called_once() + assert mock_text_frame.text == "New content" + + +class TestElementDiscriminator: + """Test element discriminator functionality.""" + + def test_discriminator_shape_element(self): + """Test discriminator with shape element.""" + mock_element = Mock() + mock_element.shape_type = MSO_SHAPE_TYPE.AUTO_SHAPE + + result = pptx_element_discriminator(mock_element) + assert result == "shape" + + def test_discriminator_image_element(self): + """Test discriminator with image element.""" + mock_element = Mock() + mock_element.shape_type = MSO_SHAPE_TYPE.PICTURE + # Picture elements don't have text_frame attribute + del mock_element.text_frame + + result = pptx_element_discriminator(mock_element) + assert result == "image" + + def test_discriminator_table_element(self): + """Test discriminator with table element.""" + mock_element = Mock() + mock_element.shape_type = MSO_SHAPE_TYPE.TABLE + # Table elements don't have text_frame attribute + del mock_element.text_frame + + result = pptx_element_discriminator(mock_element) + assert result == "table" + + def test_discriminator_placeholder_with_text(self): + """Test discriminator with placeholder element that has text.""" + mock_element = Mock() + mock_element.shape_type = MSO_SHAPE_TYPE.PLACEHOLDER + mock_element.text_frame = Mock() + + result = pptx_element_discriminator(mock_element) + assert result == "shape" + + def test_discriminator_generic_element(self): + """Test discriminator with generic element.""" + mock_element = Mock() + mock_element.shape_type = MSO_SHAPE_TYPE.LINE + # Generic LINE elements don't have text_frame attribute + del mock_element.text_frame + + result = pptx_element_discriminator(mock_element) + assert result == "generic" + + +class TestPowerPointElementParent: + """Test PowerPoint element parent class.""" + + def test_convert_from_pptx_element(self): + """Test converting from pptx element.""" + # Test with a dictionary input (already converted case) + test_data = { + "objectId": "123", + "alt_text": {"title": "Test Shape"}, + "type": "generic", + "pptx_element": "mock_element", + } + + result = PowerPointElementParent.convert_from_pptx_element(test_data) + + assert result["objectId"] == "123" + assert result["alt_text"]["title"] == "Test Shape" + assert result["type"] == "generic" + assert result["pptx_element"] == "mock_element" + + def test_absolute_size(self): + """Test getting absolute size.""" + mock_shape = Mock() + mock_shape.width = 914400 # 1 inch in EMU + mock_shape.height = 914400 + + element = PowerPointElementParent( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + width, height = element.absolute_size(units=OutputUnit.IN) + assert abs(width - 1.0) < 0.01 # Close to 1 inch + assert abs(height - 1.0) < 0.01 + + def test_absolute_position(self): + """Test getting absolute position.""" + mock_shape = Mock() + mock_shape.left = 914400 # 1 inch in EMU + mock_shape.top = 914400 + + element = PowerPointElementParent( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + left, top = element.absolute_position(units=OutputUnit.IN) + assert abs(left - 1.0) < 0.01 # Close to 1 inch + assert abs(top - 1.0) < 0.01 + + def test_set_alt_text(self): + """Test setting alt text.""" + mock_shape = Mock() + mock_shape.name = "Original Name" + + element = PowerPointElementParent( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + api_client = PowerPointAPIClient() + element.set_alt_text(api_client, title="New Title") + + assert mock_shape.name == "New Title" + assert element.alt_text.title == "New Title" + + +class TestPowerPointShapeElement: + """Test PowerPoint shape element implementation.""" + + def test_has_text_with_text_frame(self): + """Test has_text property with text frame.""" + mock_shape = Mock() + mock_shape.text_frame = Mock() + mock_shape.text_frame.text = "Some text content" + + element = PowerPointShapeElement( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + assert element.has_text + + def test_has_text_without_text_frame(self): + """Test has_text property without text frame.""" + mock_shape = Mock() + mock_shape.text_frame = None + + element = PowerPointShapeElement( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + assert element.has_text is False + + def test_read_text_markdown(self): + """Test reading text as markdown with bold formatting.""" + mock_shape = Mock() + mock_text_frame = Mock() + mock_paragraph = Mock() + mock_run = Mock() + + # Configure the mock run + mock_run.text = "Hello World" + mock_run.font.bold = True + mock_run.font.italic = False + mock_run.hyperlink.address = None + + # Configure paragraph with iterable runs + mock_paragraph.runs = [mock_run] + mock_paragraph.text.strip.return_value = "Hello World" + mock_paragraph.level = 0 + + mock_text_frame.paragraphs = [mock_paragraph] + mock_shape.text_frame = mock_text_frame + + element = PowerPointShapeElement( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + # Bold text should be formatted with markdown + assert "**Hello World**" in result + + def test_has_text_frame_with_empty_text(self): + """Test has_text_frame returns True for empty text boxes.""" + mock_shape = Mock() + mock_shape.text_frame = Mock() + mock_shape.text_frame.text = "" # Empty text + + element = PowerPointShapeElement( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + assert element.has_text_frame is True # Can contain text + assert element.has_text is False # But has no content + + def test_has_text_frame_without_text_frame(self): + """Test has_text_frame returns False when no text_frame exists.""" + mock_shape = Mock() + mock_shape.text_frame = None + + element = PowerPointShapeElement( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + assert element.has_text_frame is False + + @patch("gslides_api.pptx.markdown_to_pptx.apply_markdown_to_textframe") + def test_write_text_to_empty_text_box(self, mock_apply_md): + """Test write_text works on empty text boxes (the bug fix).""" + mock_shape = Mock() + mock_text_frame = Mock() + mock_text_frame.text = "" # Empty text box + # Mock paragraphs for _extract_base_style_from_textframe (empty paragraphs = no style) + mock_text_frame.paragraphs = [] + mock_shape.text_frame = mock_text_frame + + element = PowerPointShapeElement( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + api_client = PowerPointAPIClient() + element.write_text(api_client, content="New content", autoscale=False) + + # Should have called apply_markdown_to_textframe, not skipped + # base_style will be None for empty text box (no existing runs to extract from) + mock_apply_md.assert_called_once_with( + markdown_text="New content", + text_frame=mock_text_frame, + base_style=None, + autoscale=False, + ) + + @patch("gslides_api.pptx.markdown_to_pptx.apply_markdown_to_textframe") + def test_write_text_skipped_without_text_frame(self, mock_apply_md): + """Test write_text is skipped when shape has no text_frame.""" + mock_shape = Mock() + mock_shape.text_frame = None + + element = PowerPointShapeElement( + objectId="1", pptx_element=mock_shape, alt_text=AbstractAltText() + ) + + api_client = PowerPointAPIClient() + element.write_text(api_client, content="New content", autoscale=False) + + # Should NOT have called apply_markdown_to_textframe + mock_apply_md.assert_not_called() + + +class TestValidatePptxElement: + """Test element validation functionality.""" + + def test_validate_shape_element(self): + """Test validating shape element.""" + mock_shape = Mock() + mock_shape.shape_type = MSO_SHAPE_TYPE.AUTO_SHAPE + mock_shape.shape_id = 123 + mock_shape.name = "Test Shape" + mock_shape.element.attrib = {"id": "123"} + mock_shape._element.getparent.return_value = None + + result = validate_pptx_element(mock_shape) + assert isinstance(result, PowerPointShapeElement) + + def test_validate_image_element(self): + """Test validating image element.""" + mock_shape = Mock() + mock_shape.shape_type = MSO_SHAPE_TYPE.PICTURE + mock_shape.shape_id = 123 + mock_shape.name = "Test Image" + mock_shape.element.attrib = {"id": "123"} + mock_shape._element.getparent.return_value = None + # Picture elements don't have text_frame attribute + del mock_shape.text_frame + + result = validate_pptx_element(mock_shape) + assert isinstance(result, PowerPointImageElement) + + def test_validate_table_element(self): + """Test validating table element.""" + mock_shape = Mock() + mock_shape.shape_type = MSO_SHAPE_TYPE.TABLE + mock_shape.shape_id = 123 + mock_shape.name = "Test Table" + mock_shape.element.attrib = {"id": "123"} + mock_shape._element.getparent.return_value = None + # Table elements don't have text_frame attribute + del mock_shape.text_frame + + result = validate_pptx_element(mock_shape) + assert isinstance(result, PowerPointTableElement) + + +class TestAbstractThumbnail: + """Test AbstractThumbnail model.""" + + def test_thumbnail_without_content(self): + """Test creating thumbnail without content.""" + thumbnail = AbstractThumbnail( + contentUrl="https://example.com/thumbnail.png", + width=800, + height=600, + mime_type="image/png", + ) + assert thumbnail.contentUrl == "https://example.com/thumbnail.png" + assert thumbnail.width == 800 + assert thumbnail.height == 600 + assert thumbnail.mime_type == "image/png" + assert thumbnail.content is None + + def test_thumbnail_with_content(self): + """Test creating thumbnail with content.""" + png_bytes = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 + thumbnail = AbstractThumbnail( + contentUrl="https://example.com/thumbnail.png", + width=800, + height=600, + mime_type="image/png", + content=png_bytes, + ) + assert thumbnail.content == png_bytes + assert isinstance(thumbnail.content, bytes) + + +class TestPowerPointSlideThumbnail: + """Test PowerPoint slide thumbnail functionality.""" + + def _create_mock_pptx_slide(self): + """Create a properly mocked python-pptx slide.""" + mock_slide = Mock() + mock_slide.slide_id = 256 + mock_slide.slide_layout = Mock() + mock_slide.shapes = [] + mock_slide.has_notes_slide = True + mock_notes = Mock() + mock_notes.notes_text_frame = Mock() + mock_notes.notes_text_frame.text = "Test Slide" + mock_slide.notes_slide = mock_notes + return mock_slide + + @patch("gslides_api.adapters.pptx_adapter.render_slide_to_image") + def test_thumbnail_without_include_data(self, mock_render): + """Test thumbnail with include_data=False returns content=None.""" + # Create a minimal valid PNG bytes + png_bytes = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 + + mock_render.return_value = png_bytes + + # Create mock slide and presentation + mock_slide = self._create_mock_pptx_slide() + + mock_prs = Mock() + mock_prs.slides = [mock_slide] + + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file: + temp_path = temp_file.name + + try: + # Create PowerPointSlide - the __init__ doesn't pass kwargs to parent + # So we need to set presentation_id and pptx_presentation manually + element = PowerPointSlide(pptx_slide=mock_slide) + element.presentation_id = temp_path + element.pptx_presentation = mock_prs + + api_client = PowerPointAPIClient() + thumbnail = element.thumbnail( + api_client=api_client, size="MEDIUM", include_data=False + ) + + assert thumbnail.content is None + assert thumbnail.mime_type == "image/png" + assert thumbnail.width > 0 + assert thumbnail.height > 0 + finally: + if os.path.exists(temp_path): + os.unlink(temp_path) + + @patch("gslides_api.adapters.pptx_adapter.render_slide_to_image") + def test_thumbnail_with_include_data(self, mock_render): + """Test thumbnail with include_data=True returns content with bytes.""" + # Create a minimal valid PNG bytes + png_bytes = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 + + mock_render.return_value = png_bytes + + # Create mock slide and presentation + mock_slide = self._create_mock_pptx_slide() + + # Create a list-like mock for slides that properly iterates + mock_prs = Mock() + mock_prs.slides = [mock_slide] # Same slide object so slide_id matches + + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file: + temp_path = temp_file.name + + try: + # Create PowerPointSlide - the __init__ doesn't pass kwargs to parent + # So we need to set presentation_id and pptx_presentation manually + element = PowerPointSlide(pptx_slide=mock_slide) + element.presentation_id = temp_path + element.pptx_presentation = mock_prs + + # Verify the slide index lookup works + assert element._get_slide_index() == 0 + + api_client = PowerPointAPIClient() + thumbnail = element.thumbnail( + api_client=api_client, size="MEDIUM", include_data=True + ) + + assert thumbnail.content is not None + assert isinstance(thumbnail.content, bytes) + assert thumbnail.content == png_bytes + assert thumbnail.mime_type == "image/png" + finally: + if os.path.exists(temp_path): + os.unlink(temp_path) + + def test_thumbnail_placeholder_on_missing_file(self): + """Test thumbnail returns placeholder when presentation file doesn't exist.""" + mock_slide = self._create_mock_pptx_slide() + + mock_prs = Mock() + mock_prs.slides = [mock_slide] + + # Create PowerPointSlide with non-existent file path + element = PowerPointSlide( + pptx_slide=mock_slide, + presentation_id="/nonexistent/path.pptx", + pptx_presentation=mock_prs, + ) + + api_client = PowerPointAPIClient() + thumbnail = element.thumbnail( + api_client=api_client, size="MEDIUM", include_data=True + ) + + # Should return placeholder without content + assert thumbnail.contentUrl == "placeholder_thumbnail.png" + assert thumbnail.width == 320 + assert thumbnail.height == 240 + assert thumbnail.content is None diff --git a/tests/test_pptx/test_pptx_adapter_slides.py b/tests/test_pptx/test_pptx_adapter_slides.py new file mode 100644 index 0000000..ddc14f2 --- /dev/null +++ b/tests/test_pptx/test_pptx_adapter_slides.py @@ -0,0 +1,315 @@ +""" +Tests for PowerPoint adapter alt text functionality. +""" + +import pytest +from pptx import Presentation +from pptx.util import Inches + +from gslides_api.adapters.pptx_adapter import PowerPointAPIClient, validate_pptx_element + + +class TestPowerPointAltText: + """Test alt text functionality in PowerPoint adapter.""" + + def test_set_alt_text_sets_xml_attribute(self): + """Test that set_alt_text sets the XML p:cNvPr/@title attribute.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + element = validate_pptx_element(textbox) + + # Set alt text + element.set_alt_text(api_client, title="TestTitle") + + # Verify XML attribute is set + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + assert cnvpr.attrib.get("title") == "TestTitle" + + def test_alt_text_reading_matches_setting(self): + """Test that reading alt text returns what was set.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + + # Set via set_alt_text + element = validate_pptx_element(textbox) + element.set_alt_text(api_client, title="TestAltText") + + # Re-read as new element (simulates reading from saved file) + element2 = validate_pptx_element(textbox) + assert element2.alt_text.title == "TestAltText" + + def test_set_alt_text_with_special_characters(self): + """Test that alt text with special characters is handled correctly.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + element = validate_pptx_element(textbox) + + # Test with special characters + special_title = "Test & Title <with> \"special\" 'chars'" + element.set_alt_text(api_client, title=special_title) + + # Verify it's preserved + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + assert cnvpr.attrib.get("title") == special_title + + def test_set_alt_text_multiple_times(self): + """Test that alt text can be updated multiple times.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + element = validate_pptx_element(textbox) + + # Set first time + element.set_alt_text(api_client, title="FirstTitle") + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + assert cnvpr.attrib.get("title") == "FirstTitle" + + # Update + element.set_alt_text(api_client, title="SecondTitle") + assert cnvpr.attrib.get("title") == "SecondTitle" + + def test_set_alt_text_empty_string(self): + """Test that empty string alt text is handled.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + element = validate_pptx_element(textbox) + + # Set to empty string (should still set the attribute) + element.set_alt_text(api_client, title="") + + # Verify empty string is set (not None) + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + title = cnvpr.attrib.get("title") + assert title == "", f"Expected empty string, got {repr(title)}" + + def test_set_alt_text_description_sets_xml_attribute(self): + """Test that set_alt_text sets the XML p:cNvPr/@descr attribute for description.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + element = validate_pptx_element(textbox) + + # Set alt text description + element.set_alt_text(api_client, description="This is the chart description") + + # Verify XML descr attribute is set + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + assert cnvpr.attrib.get("descr") == "This is the chart description" + + def test_alt_text_description_reading_matches_setting(self): + """Test that reading alt text description returns what was set.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + + # Set via set_alt_text + element = validate_pptx_element(textbox) + element.set_alt_text(api_client, description="Test Description") + + # Re-read as new element (simulates reading from saved file) + element2 = validate_pptx_element(textbox) + assert element2.alt_text.description == "Test Description" + + def test_set_alt_text_both_title_and_description(self): + """Test that both title and description can be set together.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + element = validate_pptx_element(textbox) + + # Set both title and description + element.set_alt_text( + api_client, + title="Chart Title", + description="Chart showing weekly sales by region", + ) + + # Verify both XML attributes are set + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + assert cnvpr.attrib.get("title") == "Chart Title" + assert cnvpr.attrib.get("descr") == "Chart showing weekly sales by region" + + # Verify reading them back works + element2 = validate_pptx_element(textbox) + assert element2.alt_text.title == "Chart Title" + assert element2.alt_text.description == "Chart showing weekly sales by region" + + def test_alt_text_description_only_without_title(self): + """Test that description can be set without title.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + api_client = PowerPointAPIClient() + element = validate_pptx_element(textbox) + + # Initially no alt text + assert element.alt_text.title is None + assert element.alt_text.description is None + + # Set only description + element.set_alt_text(api_client, description="Only description set") + + # Verify only descr is set, title remains unset + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + assert cnvpr.attrib.get("title") is None + assert cnvpr.attrib.get("descr") == "Only description set" + + # Verify alt_text model is updated + assert element.alt_text.description == "Only description set" + + def test_read_pre_existing_description_from_xml(self): + """Test reading description that was already set in XML (simulating loading from file).""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + + # Manually set the descr attribute in XML (as if loaded from file) + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + cnvpr.attrib["title"] = "Pre-existing Title" + cnvpr.attrib["descr"] = "Pre-existing Description from PowerPoint" + + # Now read via validate_pptx_element + element = validate_pptx_element(textbox) + + # Both should be read correctly + assert element.alt_text.title == "Pre-existing Title" + assert element.alt_text.description == "Pre-existing Description from PowerPoint" + + +class TestPresentationIdPropagation: + """Test that presentation_id is properly propagated to slides and elements.""" + + def test_presentation_id_propagates_to_slides_on_init(self): + """Test that creating PowerPointPresentation propagates presentation_id to slides.""" + from gslides_api.adapters.pptx_adapter import PowerPointPresentation, PowerPointSlide + + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + # Add speaker notes (required by PowerPointSlide) + slide.notes_slide.notes_text_frame.text = "Test Slide" + + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text_frame.text = "Test Text" + + # Create PowerPointPresentation with a file path + presentation = PowerPointPresentation(prs, file_path="/test/path/presentation.pptx") + + # Verify presentation_id is set on all slides + for ppt_slide in presentation.slides: + assert ppt_slide.presentation_id == "/test/path/presentation.pptx" + # Verify presentation_id is propagated to all elements + for element in ppt_slide.elements: + assert element.presentation_id == "/test/path/presentation.pptx" + + def test_setting_slide_presentation_id_propagates_to_elements(self): + """Test that setting presentation_id on a slide propagates to its elements.""" + from gslides_api.adapters.pptx_adapter import PowerPointSlide + + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + slide.notes_slide.notes_text_frame.text = "Test Slide" + + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text_frame.text = "Test Text" + + ppt_slide = PowerPointSlide(slide) + + # Elements should start with empty presentation_id + for element in ppt_slide.elements: + assert element.presentation_id == "" + + # Set presentation_id on slide + ppt_slide.presentation_id = "/new/path/test.pptx" + + # Verify all elements now have the presentation_id + for element in ppt_slide.elements: + assert element.presentation_id == "/new/path/test.pptx" + + def test_insert_copy_sets_presentation_id(self, tmp_path): + """Test that insert_copy sets presentation_id on the copied slide.""" + from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointPresentation, + PowerPointSlide, + ) + + # Create source presentation + src_prs = Presentation() + src_slide = src_prs.slides.add_slide(src_prs.slide_layouts[0]) + src_slide.notes_slide.notes_text_frame.text = "Source Slide" + src_textbox = src_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + src_textbox.text_frame.text = "Source Text" + + # Save source presentation + src_path = str(tmp_path / "source.pptx") + src_prs.save(src_path) + + # Create target presentation + tgt_prs = Presentation() + tgt_slide = tgt_prs.slides.add_slide(tgt_prs.slide_layouts[0]) + tgt_slide.notes_slide.notes_text_frame.text = "Target Slide" + + tgt_path = str(tmp_path / "target.pptx") + tgt_prs.save(tgt_path) + + # Load presentations + api_client = PowerPointAPIClient() + source = PowerPointPresentation.from_id(api_client, src_path) + target = PowerPointPresentation.from_id(api_client, tgt_path) + + # Insert a copy + new_slide = target.insert_copy( + source_slide=source.slides[0], + api_client=api_client, + ) + + # Verify the new slide has the target presentation_id + assert new_slide.presentation_id == tgt_path + # Verify elements also have the correct presentation_id + for element in new_slide.elements: + assert element.presentation_id == tgt_path + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_pptx/test_pptx_autoscale.py b/tests/test_pptx/test_pptx_autoscale.py new file mode 100644 index 0000000..b5a909c --- /dev/null +++ b/tests/test_pptx/test_pptx_autoscale.py @@ -0,0 +1,246 @@ +""" +Tests for PowerPoint autoscaling font size cap behavior. + +Autoscaling should only decrease font size to fit content, never increase it. +""" + +import pytest +from pptx import Presentation +from pptx.util import Inches, Pt + +from gslides_api.agnostic.text import FullTextStyle, RichStyle +from gslides_api.pptx.markdown_to_pptx import ( + _get_max_font_size_from_textframe, + apply_markdown_to_textframe, +) + + +class TestGetMaxFontSizeFromTextframe: + """Tests for _get_max_font_size_from_textframe helper function.""" + + def test_returns_none_for_empty_textframe(self): + """Empty text frame should return None.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + text_frame = textbox.text_frame + + # Clear to ensure empty + text_frame.clear() + + result = _get_max_font_size_from_textframe(text_frame) + assert result is None + + def test_returns_single_font_size(self): + """Single font size should be returned.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + text_frame = textbox.text_frame + + # Set text with specific font size + p = text_frame.paragraphs[0] + run = p.add_run() + run.text = "Test text" + run.font.size = Pt(14) + + result = _get_max_font_size_from_textframe(text_frame) + assert result == 14.0 + + def test_returns_max_of_multiple_font_sizes(self): + """When multiple font sizes exist, return the maximum.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + text_frame = textbox.text_frame + + # First paragraph with 12pt font + p1 = text_frame.paragraphs[0] + run1 = p1.add_run() + run1.text = "Small text" + run1.font.size = Pt(12) + + # Second paragraph with 18pt font + p2 = text_frame.add_paragraph() + run2 = p2.add_run() + run2.text = "Large text" + run2.font.size = Pt(18) + + # Third paragraph with 10pt font + p3 = text_frame.add_paragraph() + run3 = p3.add_run() + run3.text = "Tiny text" + run3.font.size = Pt(10) + + result = _get_max_font_size_from_textframe(text_frame) + assert result == 18.0 + + def test_ignores_runs_without_font_size(self): + """Runs without explicit font size should be ignored.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + text_frame = textbox.text_frame + + p = text_frame.paragraphs[0] + + # First run with explicit size + run1 = p.add_run() + run1.text = "Sized text" + run1.font.size = Pt(14) + + # Second run without explicit size + run2 = p.add_run() + run2.text = " unsized text" + # run2.font.size is None (inherited) + + result = _get_max_font_size_from_textframe(text_frame) + assert result == 14.0 + + +class TestAutoscaleFontSizeCap: + """Tests for autoscaling font size cap behavior.""" + + def test_autoscale_does_not_increase_beyond_original(self): + """Autoscaling should not increase font size beyond original.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + # Create a large textbox + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(6), Inches(3)) + text_frame = textbox.text_frame + + # Set original text with 10pt font + p = text_frame.paragraphs[0] + run = p.add_run() + run.text = "Original" + run.font.size = Pt(10) + run.font.name = "Arial" + + # Apply very short text with autoscale - without cap, fit_text would increase to 18pt + apply_markdown_to_textframe("Hi", text_frame, autoscale=True) + + # Check that font size did not increase beyond original 10pt + actual_font_size = None + for para in text_frame.paragraphs: + for r in para.runs: + if r.font.size is not None: + actual_font_size = r.font.size.pt + break + + # Font size should be at most 10pt (the original), not 18pt + if actual_font_size is not None: + assert actual_font_size <= 10.0, ( + f"Font size {actual_font_size}pt exceeds original 10pt" + ) + + def test_autoscale_still_decreases_for_long_text(self): + """Autoscaling should still decrease font size when text is too long.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + # Create a small textbox + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(0.5)) + text_frame = textbox.text_frame + + # Set original text with 14pt font + p = text_frame.paragraphs[0] + run = p.add_run() + run.text = "Short" + run.font.size = Pt(14) + run.font.name = "Arial" + + # Apply very long text that won't fit at 14pt + long_text = "This is a very long text that definitely will not fit in the small text box and needs to be scaled down significantly to fit properly." + apply_markdown_to_textframe(long_text, text_frame, autoscale=True) + + # The text should have been written (we can't easily verify the font size + # was decreased since fit_text behavior depends on font availability, + # but we verify no exception was raised and text was applied) + assert text_frame.text.strip() == long_text + + def test_autoscale_uses_base_style_when_textframe_empty(self): + """When text frame is empty, base_style font size should be used as cap.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + # Create a large textbox + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(6), Inches(3)) + text_frame = textbox.text_frame + + # Clear text frame so no original font size can be found + text_frame.clear() + + # Create base_style with 12pt font + base_style = FullTextStyle( + rich=RichStyle(font_size_pt=12) + ) + + # Apply short text with autoscale and base_style + apply_markdown_to_textframe("Hi", text_frame, base_style=base_style, autoscale=True) + + # The function should have used 12pt as the cap from base_style + # We just verify no exception was raised and text was applied + assert text_frame.text.strip() == "Hi" + + def test_autoscale_false_does_not_modify_font_size(self): + """When autoscale=False, font size should not be modified.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(6), Inches(3)) + text_frame = textbox.text_frame + + # Set original text + p = text_frame.paragraphs[0] + run = p.add_run() + run.text = "Original" + run.font.size = Pt(14) + + # Apply new text without autoscale + apply_markdown_to_textframe("New text", text_frame, autoscale=False) + + # Text should be updated but no fit_text was called + assert text_frame.text.strip() == "New text" + + +class TestAutoscaleDefaultMaxSize: + """Test that default max_size of 18pt is still applied when appropriate.""" + + def test_uses_original_when_smaller_than_default(self): + """Original font size is used when smaller than default 18pt.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(6), Inches(3)) + text_frame = textbox.text_frame + + # Set original text with 10pt font (smaller than default 18pt) + p = text_frame.paragraphs[0] + run = p.add_run() + run.text = "Original" + run.font.size = Pt(10) + run.font.name = "Arial" + + # Apply short text with autoscale + apply_markdown_to_textframe("Hi", text_frame, autoscale=True) + + # Should use 10pt cap (original), not 18pt + # This is tested indirectly - we just verify no exception + assert text_frame.text.strip() == "Hi" + + def test_uses_default_when_no_original_available(self): + """Default 18pt is used when no original font size is available.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[0]) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(6), Inches(3)) + text_frame = textbox.text_frame + + # Clear the text frame completely (no original font size) + text_frame.clear() + + # Apply text with autoscale but no base_style + apply_markdown_to_textframe("Test text", text_frame, autoscale=True) + + # Should fall back to default 18pt cap + # This is tested indirectly - we just verify no exception and text was applied + assert text_frame.text.strip() == "Test text" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_pptx/test_pptx_bullets.py b/tests/test_pptx/test_pptx_bullets.py new file mode 100644 index 0000000..a57df79 --- /dev/null +++ b/tests/test_pptx/test_pptx_bullets.py @@ -0,0 +1,1183 @@ +""" +Test suite for PPTX bullet point functionality. + +Tests bullet detection (reading) and bullet creation (writing) via XML manipulation. +""" + +import os +import tempfile + +import pytest +from lxml import etree +from pptx import Presentation +from pptx.util import Inches + +from gslides_api.pptx.markdown_to_pptx import ( + _enable_paragraph_bullets, + apply_markdown_to_textframe, +) +from gslides_api.pptx.converters import ( + _DRAWINGML_NS, + _paragraph_has_bullet, + pptx_paragraph_to_markdown, + pptx_text_frame_to_markdown, +) + + +class TestParagraphHasBullet: + """Test the _paragraph_has_bullet() function for detecting bullets in XML.""" + + def create_test_presentation_with_bullets(self): + """Create a test presentation with bullet-formatted text.""" + prs = Presentation() + # Use bullet slide layout (layout 1 typically has bullets) + bullet_slide_layout = prs.slide_layouts[1] + slide = prs.slides.add_slide(bullet_slide_layout) + body_shape = slide.shapes.placeholders[1] + tf = body_shape.text_frame + + # First paragraph is typically bullet-formatted in placeholder + tf.paragraphs[0].text = "First bullet item" + + # Add more bullet items + p = tf.add_paragraph() + p.text = "Second bullet item" + p.level = 0 + + p = tf.add_paragraph() + p.text = "Nested bullet item" + p.level = 1 + + return prs, tf + + def create_test_textbox_no_bullets(self): + """Create a text box without bullet formatting.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] # Blank layout + slide = prs.slides.add_slide(blank_layout) + + # Add a plain text box (no bullets) + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + tf.text = "Plain text without bullets" + + p = tf.add_paragraph() + p.text = "Another plain paragraph" + + return prs, tf + + def test_detect_bullet_in_placeholder(self): + """Test that bullets are detected in bullet placeholder.""" + prs, tf = self.create_test_presentation_with_bullets() + + # At least some paragraphs should have bullets + has_any_bullet = False + for para in tf.paragraphs: + if _paragraph_has_bullet(para): + has_any_bullet = True + break + + # Note: Bullet detection depends on whether the placeholder has + # bullet XML elements. Some placeholders inherit from master. + # This test verifies the function doesn't crash and handles the case. + assert isinstance(has_any_bullet, bool) + + def test_no_bullet_in_plain_textbox(self): + """Test that no bullets are detected in a plain text box.""" + prs, tf = self.create_test_textbox_no_bullets() + + for para in tf.paragraphs: + # Plain text boxes should not have bullet formatting + assert not _paragraph_has_bullet(para) + + def test_detect_bullet_after_enabling(self): + """Test that bullets are detected after enabling via XML.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + para.text = "Text that will get bullets" + + # Before enabling, should have no bullet + assert not _paragraph_has_bullet(para) + + # Enable bullets via XML + _enable_paragraph_bullets(para) + + # After enabling, should have bullet + assert _paragraph_has_bullet(para) + + +class TestEnableParagraphBullets: + """Test the _enable_paragraph_bullets() function for adding bullets via XML.""" + + def test_enable_bullets_adds_buchar_element(self): + """Test that enabling bullets adds buChar XML element.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + para.text = "Test paragraph" + + # Enable bullets + _enable_paragraph_bullets(para) + + # Check that buChar element exists + pPr = para._element.get_or_add_pPr() + buChar = pPr.find(f"{{{_DRAWINGML_NS}}}buChar") + assert buChar is not None + assert buChar.get("char") == "•" + + def test_enable_bullets_custom_character(self): + """Test enabling bullets with a custom character.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + para.text = "Test paragraph" + + # Enable bullets with custom character + _enable_paragraph_bullets(para, char="★") + + # Check that buChar element has custom character + pPr = para._element.get_or_add_pPr() + buChar = pPr.find(f"{{{_DRAWINGML_NS}}}buChar") + assert buChar is not None + assert buChar.get("char") == "★" + + def test_enable_bullets_removes_bunone(self): + """Test that enabling bullets removes buNone element if present.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + para.text = "Test paragraph" + + # Manually add buNone to simulate disabled bullets + pPr = para._element.get_or_add_pPr() + buNone = etree.Element(f"{{{_DRAWINGML_NS}}}buNone") + pPr.insert(0, buNone) + + # Verify buNone exists + assert pPr.find(f"{{{_DRAWINGML_NS}}}buNone") is not None + + # Enable bullets + _enable_paragraph_bullets(para) + + # Verify buNone was removed + assert pPr.find(f"{{{_DRAWINGML_NS}}}buNone") is None + + # Verify buChar was added + assert pPr.find(f"{{{_DRAWINGML_NS}}}buChar") is not None + + +class TestBulletRoundtrip: + """Test roundtrip: write markdown with bullets, read back as markdown.""" + + def test_bullet_list_roundtrip(self): + """Test writing and reading back a bullet list.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + # Write markdown with bullets + markdown_input = """- First bullet point +- Second bullet point +- Third bullet point""" + + apply_markdown_to_textframe(markdown_input, tf) + + # Read back as markdown + markdown_output = pptx_text_frame_to_markdown(tf) + + # Verify bullets are preserved + assert "- First bullet point" in markdown_output + assert "- Second bullet point" in markdown_output + assert "- Third bullet point" in markdown_output + + def test_nested_bullet_list_roundtrip(self): + """Test writing and reading back nested bullet lists.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + # Write markdown with nested bullets + markdown_input = """- Top level item + - Nested item one + - Nested item two +- Another top level""" + + apply_markdown_to_textframe(markdown_input, tf) + + # Read back as markdown + markdown_output = pptx_text_frame_to_markdown(tf) + + # Verify structure is preserved (at least bullet markers should exist) + assert "Top level item" in markdown_output + assert "Nested item" in markdown_output + + def test_mixed_content_with_bullets(self): + """Test markdown with mixed paragraphs and bullets.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + # Write markdown with mixed content + markdown_input = """Regular paragraph text + +- Bullet one +- Bullet two + +Another paragraph""" + + apply_markdown_to_textframe(markdown_input, tf) + + # Read back as markdown + markdown_output = pptx_text_frame_to_markdown(tf) + + # Verify both regular text and bullets exist + assert "Regular paragraph" in markdown_output or "paragraph" in markdown_output.lower() + assert "Bullet" in markdown_output or "-" in markdown_output + + def test_bullet_with_bold_text(self): + """Test bullets with bold formatting.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + # Write markdown with bold text in bullets + markdown_input = """- **Bold item** with more text +- Regular item +- Item with **bold** in middle""" + + apply_markdown_to_textframe(markdown_input, tf) + + # Read back as markdown + markdown_output = pptx_text_frame_to_markdown(tf) + + # Verify bold is preserved + assert "**Bold item**" in markdown_output or "Bold item" in markdown_output + assert "-" in markdown_output # Bullet markers should exist + + +class TestPptxParagraphToMarkdown: + """Test the pptx_paragraph_to_markdown() function.""" + + def test_paragraph_without_bullet_no_marker(self): + """Test that non-bullet paragraphs don't get bullet markers.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + run = para.runs[0] if para.runs else para.add_run() + run.text = "Plain text" + + # Convert to markdown + md = pptx_paragraph_to_markdown(para) + + # Should not have bullet marker + assert not md.startswith("-") + assert "Plain text" in md + + def test_paragraph_with_bullet_gets_marker(self): + """Test that bullet paragraphs get bullet markers.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + run = para.runs[0] if para.runs else para.add_run() + run.text = "Bullet text" + + # Enable bullets + _enable_paragraph_bullets(para) + + # Convert to markdown + md = pptx_paragraph_to_markdown(para) + + # Should have bullet marker + assert md.startswith("-") or "- " in md + assert "Bullet text" in md + + +class TestBulletSaveAndReload: + """Test that bullets persist after saving and reloading the PPTX file.""" + + def test_bullets_persist_after_save(self): + """Test that bullet formatting persists after saving and reloading.""" + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp: + tmp_path = tmp.name + + try: + # Create presentation with bullets + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + # Write markdown with bullets + markdown_input = """- Bullet one +- Bullet two +- Bullet three""" + + apply_markdown_to_textframe(markdown_input, tf) + + # Save presentation + prs.save(tmp_path) + + # Reload presentation + prs2 = Presentation(tmp_path) + slide2 = prs2.slides[0] + + # Find the textbox + textbox2 = None + for shape in slide2.shapes: + if hasattr(shape, "text_frame"): + textbox2 = shape + break + + assert textbox2 is not None + tf2 = textbox2.text_frame + + # Read back as markdown + markdown_output = pptx_text_frame_to_markdown(tf2) + + # Verify bullets are preserved + assert "-" in markdown_output + assert "Bullet one" in markdown_output + + finally: + os.unlink(tmp_path) + + +class TestBulletSpacing: + """Test bullet spacing and indentation via XML attributes.""" + + def test_bullet_has_margin_and_indent(self): + """Test that enabling bullets sets marL and indent attributes.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + para.text = "Bullet with spacing" + + # Enable bullets with level 0 + _enable_paragraph_bullets(para, level=0) + + # Check that marL and indent are set + pPr = para._element.get_or_add_pPr() + marL = pPr.get("marL") + indent = pPr.get("indent") + + # marL should be set (level 0 = 342900) + assert marL is not None + assert int(marL) > 0 + + # indent should be negative (hanging indent) + assert indent is not None + assert int(indent) < 0 + + def test_bullet_level_increases_margin(self): + """Test that higher bullet levels have larger left margins.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + + # Level 0 bullet + para0 = tf.paragraphs[0] + para0.text = "Level 0" + _enable_paragraph_bullets(para0, level=0) + + # Level 1 bullet + para1 = tf.add_paragraph() + para1.text = "Level 1" + _enable_paragraph_bullets(para1, level=1) + + # Get marL values + pPr0 = para0._element.get_or_add_pPr() + pPr1 = para1._element.get_or_add_pPr() + + marL0 = int(pPr0.get("marL")) + marL1 = int(pPr1.get("marL")) + + # Level 1 should have larger margin than level 0 + assert marL1 > marL0 + + def test_bullet_indent_is_consistent(self): + """Test that indent value is the same across levels (negative for hanging).""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + + # Create bullets at different levels + para0 = tf.paragraphs[0] + para0.text = "Level 0" + _enable_paragraph_bullets(para0, level=0) + + para1 = tf.add_paragraph() + para1.text = "Level 1" + _enable_paragraph_bullets(para1, level=1) + + # Get indent values + pPr0 = para0._element.get_or_add_pPr() + pPr1 = para1._element.get_or_add_pPr() + + indent0 = int(pPr0.get("indent")) + indent1 = int(pPr1.get("indent")) + + # Indent should be the same (same hanging amount at all levels) + assert indent0 == indent1 + # And should be negative + assert indent0 < 0 + + +class TestBodyPrInsetPreservation: + """Test that bodyPr insets are preserved when applying markdown.""" + + def test_insets_preserved_after_apply_markdown(self): + """Test that bodyPr insets are preserved after apply_markdown_to_textframe. + + When text_frame.clear() is called, bodyPr insets may be reset. + Our implementation should preserve and restore them. + """ + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + tf.text = "Initial text" + + # Set custom insets on the text frame + bodyPr = tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert bodyPr is not None + + # Set custom inset values + bodyPr.set("lIns", "182880") # ~0.2 inches + bodyPr.set("rIns", "182880") + bodyPr.set("tIns", "91440") # ~0.1 inches + bodyPr.set("bIns", "91440") + + # Verify insets are set + assert bodyPr.get("lIns") == "182880" + assert bodyPr.get("tIns") == "91440" + + # Apply markdown (this calls text_frame.clear() internally) + apply_markdown_to_textframe("New markdown text", tf) + + # Verify insets are preserved after apply + bodyPr_after = tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert bodyPr_after is not None + assert bodyPr_after.get("lIns") == "182880", ( + f"lIns should be preserved, got {bodyPr_after.get('lIns')}" + ) + assert bodyPr_after.get("rIns") == "182880" + assert bodyPr_after.get("tIns") == "91440", ( + f"tIns should be preserved, got {bodyPr_after.get('tIns')}" + ) + assert bodyPr_after.get("bIns") == "91440" + + def test_insets_preserved_with_bullets(self): + """Test that insets are preserved when applying markdown with bullets.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + tf.text = "Initial text" + + # Set custom insets + bodyPr = tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + bodyPr.set("lIns", "274320") # ~0.3 inches + bodyPr.set("tIns", "137160") # ~0.15 inches + + # Apply markdown with bullets + markdown_with_bullets = """- First bullet +- Second bullet +- Third bullet""" + apply_markdown_to_textframe(markdown_with_bullets, tf) + + # Verify insets are preserved + bodyPr_after = tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert bodyPr_after.get("lIns") == "274320" + assert bodyPr_after.get("tIns") == "137160" + + # Verify bullets were also applied + first_para = tf.paragraphs[0] + assert _paragraph_has_bullet(first_para) + + +class TestParagraphSpacingPreservation: + """Test that line spacing is preserved for regular (non-bullet) paragraphs.""" + + def test_line_spacing_preserved_for_regular_paragraphs(self): + """Test that line spacing is preserved after apply_markdown_to_textframe. + + When text_frame.clear() is called, line spacing may be reset. + Our implementation should preserve and restore it for regular paragraphs. + """ + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + tf.text = "Initial text" + + # Set custom line spacing on the first paragraph + para = tf.paragraphs[0] + pPr = para._element.get_or_add_pPr() + + # Set line spacing to 150% (150000 in PPTX units = 1500 * 100) + lnSpc = etree.SubElement(pPr, f"{{{_DRAWINGML_NS}}}lnSpc") + spcPct = etree.SubElement(lnSpc, f"{{{_DRAWINGML_NS}}}spcPct") + spcPct.set("val", "150000") + + # Verify line spacing is set + lnSpc_before = pPr.find(f"{{{_DRAWINGML_NS}}}lnSpc") + assert lnSpc_before is not None + spcPct_before = lnSpc_before.find(f"{{{_DRAWINGML_NS}}}spcPct") + assert spcPct_before is not None + assert spcPct_before.get("val") == "150000" + + # Apply plain text markdown (no bullets) + apply_markdown_to_textframe("New paragraph text", tf) + + # Verify line spacing is preserved after apply + para_after = tf.paragraphs[0] + pPr_after = para_after._element.find(f"{{{_DRAWINGML_NS}}}pPr") + assert pPr_after is not None, "pPr should exist after apply" + + lnSpc_after = pPr_after.find(f"{{{_DRAWINGML_NS}}}lnSpc") + assert lnSpc_after is not None, "lnSpc should be preserved" + + spcPct_after = lnSpc_after.find(f"{{{_DRAWINGML_NS}}}spcPct") + assert spcPct_after is not None, "spcPct should be preserved" + assert spcPct_after.get("val") == "150000", ( + f"Line spacing should be 150%, got {spcPct_after.get('val')}" + ) + + def test_space_before_preserved_for_regular_paragraphs(self): + """Test that space-before is preserved after apply_markdown_to_textframe.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + tf.text = "Initial text" + + # Set custom space-before on the first paragraph + para = tf.paragraphs[0] + pPr = para._element.get_or_add_pPr() + + # Set space-before to 9pt (900 in PPTX units = 100ths of a point) + spcBef = etree.SubElement(pPr, f"{{{_DRAWINGML_NS}}}spcBef") + spcPts = etree.SubElement(spcBef, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", "900") + + # Verify space-before is set + spcBef_before = pPr.find(f"{{{_DRAWINGML_NS}}}spcBef") + assert spcBef_before is not None + + # Apply plain text markdown (no bullets) + apply_markdown_to_textframe("New paragraph text", tf) + + # Verify space-before is preserved after apply + para_after = tf.paragraphs[0] + pPr_after = para_after._element.find(f"{{{_DRAWINGML_NS}}}pPr") + assert pPr_after is not None + + spcBef_after = pPr_after.find(f"{{{_DRAWINGML_NS}}}spcBef") + assert spcBef_after is not None, "spcBef should be preserved" + + spcPts_after = spcBef_after.find(f"{{{_DRAWINGML_NS}}}spcPts") + assert spcPts_after is not None + assert spcPts_after.get("val") == "900", ( + f"Space-before should be 9pt (900), got {spcPts_after.get('val')}" + ) + + def test_space_after_preserved_for_regular_paragraphs(self): + """Test that space-after is preserved after apply_markdown_to_textframe.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + tf.text = "Initial text" + + # Set custom space-after on the first paragraph + para = tf.paragraphs[0] + pPr = para._element.get_or_add_pPr() + + # Set space-after to 0pt (common in templates to control spacing explicitly) + spcAft = etree.SubElement(pPr, f"{{{_DRAWINGML_NS}}}spcAft") + spcPts = etree.SubElement(spcAft, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", "0") + + # Verify space-after is set + spcAft_before = pPr.find(f"{{{_DRAWINGML_NS}}}spcAft") + assert spcAft_before is not None + + # Apply plain text markdown (no bullets) + apply_markdown_to_textframe("New paragraph text", tf) + + # Verify space-after is preserved after apply + para_after = tf.paragraphs[0] + pPr_after = para_after._element.find(f"{{{_DRAWINGML_NS}}}pPr") + assert pPr_after is not None + + spcAft_after = pPr_after.find(f"{{{_DRAWINGML_NS}}}spcAft") + assert spcAft_after is not None, "spcAft should be preserved" + + spcPts_after = spcAft_after.find(f"{{{_DRAWINGML_NS}}}spcPts") + assert spcPts_after is not None + assert spcPts_after.get("val") == "0", ( + f"Space-after should be 0pt (0), got {spcPts_after.get('val')}" + ) + + def test_first_paragraph_and_bullet_spacing_preserved_separately(self): + """Test that first paragraph gets different spacing than bullet paragraphs. + + Template pattern: title (para 0) has spcBef=0, bullets have spcBef=900. + The implementation should preserve and apply these separately. + """ + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(3) + ) + tf = textbox.text_frame + + # Create template pattern: title with spcBef=0, then bullets with spcBef=900 + # First paragraph (title) - spcBef=0 + tf.text = "Title" + para0 = tf.paragraphs[0] + pPr0 = para0._element.get_or_add_pPr() + spcBef0 = etree.SubElement(pPr0, f"{{{_DRAWINGML_NS}}}spcBef") + spcPts0 = etree.SubElement(spcBef0, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts0.set("val", "0") # Title has 0pt space before + + # Second paragraph (bullet) - spcBef=900 + para1 = tf.add_paragraph() + para1.text = "Bullet item" + pPr1 = para1._element.get_or_add_pPr() + # Add bullet marker to mark as bullet paragraph + buChar = etree.SubElement(pPr1, f"{{{_DRAWINGML_NS}}}buChar") + buChar.set("char", "•") + # Add bullet spacing + spcBef1 = etree.SubElement(pPr1, f"{{{_DRAWINGML_NS}}}spcBef") + spcPts1 = etree.SubElement(spcBef1, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts1.set("val", "900") # Bullet has 9pt space before + # Add margin and indent for bullet + pPr1.set("marL", "520700") + pPr1.set("indent", "-209550") + + # Apply markdown with title and bullets (use markdown bullet syntax) + markdown = "New Title\n- First bullet\n- Second bullet" + apply_markdown_to_textframe(markdown, tf) + + # Verify first paragraph (title) has spcBef=0 + output_para0 = tf.paragraphs[0] + pPr_out0 = output_para0._element.find(f"{{{_DRAWINGML_NS}}}pPr") + assert pPr_out0 is not None, "First paragraph should have pPr" + + spcBef_out0 = pPr_out0.find(f"{{{_DRAWINGML_NS}}}spcBef") + if spcBef_out0 is not None: + spcPts_out0 = spcBef_out0.find(f"{{{_DRAWINGML_NS}}}spcPts") + if spcPts_out0 is not None: + assert spcPts_out0.get("val") == "0", ( + f"Title should have spcBef=0, got {spcPts_out0.get('val')}" + ) + + # Verify bullet paragraphs have spcBef=900 + assert len(tf.paragraphs) >= 2, "Should have at least 2 paragraphs" + output_para1 = tf.paragraphs[1] + pPr_out1 = output_para1._element.find(f"{{{_DRAWINGML_NS}}}pPr") + assert pPr_out1 is not None, "Bullet paragraph should have pPr" + + spcBef_out1 = pPr_out1.find(f"{{{_DRAWINGML_NS}}}spcBef") + assert spcBef_out1 is not None, "Bullet should have spcBef" + spcPts_out1 = spcBef_out1.find(f"{{{_DRAWINGML_NS}}}spcPts") + assert spcPts_out1 is not None + assert spcPts_out1.get("val") == "900", ( + f"Bullet should have spcBef=900, got {spcPts_out1.get('val')}" + ) + + +class TestPPrElementOrder: + """Test that pPr child elements are in the correct order for PowerPoint rendering. + + XML element order in <a:pPr> affects how PowerPoint renders spacing. + The correct order is: lnSpc, spcBef, spcAft, buClr, buSzPts, buFont, buChar. + """ + + def test_spacing_elements_come_before_bullet_elements(self): + """Test that spacing elements (lnSpc, spcBef, spcAft) come before buChar. + + PowerPoint requires this order for correct visual rendering of line spacing. + """ + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(3) + ) + tf = textbox.text_frame + + # Create template pattern with spacing values + tf.text = "Title" + para0 = tf.paragraphs[0] + pPr0 = para0._element.get_or_add_pPr() + + # Add line spacing (110%) + lnSpc = etree.SubElement(pPr0, f"{{{_DRAWINGML_NS}}}lnSpc") + spcPct = etree.SubElement(lnSpc, f"{{{_DRAWINGML_NS}}}spcPct") + spcPct.set("val", "110000") + + # Add space-before (0pt for title) + spcBef = etree.SubElement(pPr0, f"{{{_DRAWINGML_NS}}}spcBef") + spcPts = etree.SubElement(spcBef, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts.set("val", "0") + + # Add space-after + spcAft = etree.SubElement(pPr0, f"{{{_DRAWINGML_NS}}}spcAft") + spcPts2 = etree.SubElement(spcAft, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts2.set("val", "0") + + # Second paragraph (bullet) with spacing + para1 = tf.add_paragraph() + para1.text = "Bullet item" + pPr1 = para1._element.get_or_add_pPr() + + # Add line spacing + lnSpc1 = etree.SubElement(pPr1, f"{{{_DRAWINGML_NS}}}lnSpc") + spcPct1 = etree.SubElement(lnSpc1, f"{{{_DRAWINGML_NS}}}spcPct") + spcPct1.set("val", "110000") + + # Add space-before (9pt for bullets) + spcBef1 = etree.SubElement(pPr1, f"{{{_DRAWINGML_NS}}}spcBef") + spcPts1 = etree.SubElement(spcBef1, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts1.set("val", "900") + + # Add space-after + spcAft1 = etree.SubElement(pPr1, f"{{{_DRAWINGML_NS}}}spcAft") + spcPts1_aft = etree.SubElement(spcAft1, f"{{{_DRAWINGML_NS}}}spcPts") + spcPts1_aft.set("val", "0") + + # Add bullet marker + buChar = etree.SubElement(pPr1, f"{{{_DRAWINGML_NS}}}buChar") + buChar.set("char", "•") + pPr1.set("marL", "520700") + pPr1.set("indent", "-209550") + + # Apply markdown with title and bullets + markdown = "New Title\n- First bullet\n- Second bullet" + apply_markdown_to_textframe(markdown, tf) + + # Verify element order in bullet paragraphs + for i, para in enumerate(tf.paragraphs): + if i == 0: + continue # Skip title paragraph + + pPr = para._element.find(f"{{{_DRAWINGML_NS}}}pPr") + if pPr is None: + continue + + # Get indices of spacing and bullet elements + element_names = [ + etree.QName(child).localname for child in pPr + ] + + lnSpc_idx = element_names.index("lnSpc") if "lnSpc" in element_names else -1 + spcBef_idx = element_names.index("spcBef") if "spcBef" in element_names else -1 + spcAft_idx = element_names.index("spcAft") if "spcAft" in element_names else -1 + buChar_idx = element_names.index("buChar") if "buChar" in element_names else -1 + + # If buChar exists, it must come AFTER spacing elements + if buChar_idx >= 0: + if lnSpc_idx >= 0: + assert lnSpc_idx < buChar_idx, ( + f"Para {i}: lnSpc (idx={lnSpc_idx}) must come before " + f"buChar (idx={buChar_idx}). Order: {element_names}" + ) + if spcBef_idx >= 0: + assert spcBef_idx < buChar_idx, ( + f"Para {i}: spcBef (idx={spcBef_idx}) must come before " + f"buChar (idx={buChar_idx}). Order: {element_names}" + ) + if spcAft_idx >= 0: + assert spcAft_idx < buChar_idx, ( + f"Para {i}: spcAft (idx={spcAft_idx}) must come before " + f"buChar (idx={buChar_idx}). Order: {element_names}" + ) + + # Verify correct order: lnSpc < spcBef < spcAft + if lnSpc_idx >= 0 and spcBef_idx >= 0: + assert lnSpc_idx < spcBef_idx, ( + f"Para {i}: lnSpc must come before spcBef. Order: {element_names}" + ) + if spcBef_idx >= 0 and spcAft_idx >= 0: + assert spcBef_idx < spcAft_idx, ( + f"Para {i}: spcBef must come before spcAft. Order: {element_names}" + ) + + def test_enable_bullets_preserves_correct_element_order(self): + """Test that _enable_paragraph_bullets() creates elements in correct order.""" + from gslides_api.agnostic.text import ParagraphStyle, SpacingValue + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + para = tf.paragraphs[0] + para.text = "Bullet with spacing" + + # Create preserved props with spacing values + preserved_props = ParagraphStyle( + line_spacing=SpacingValue(percentage=110), + space_before=SpacingValue(points=9), + space_after=SpacingValue(points=0), + margin_left=520700, + indent=-209550, + ) + + # Enable bullets with preserved props + _enable_paragraph_bullets(para, level=0, preserved_props=preserved_props) + + # Check element order + pPr = para._element.find(f"{{{_DRAWINGML_NS}}}pPr") + assert pPr is not None + + element_names = [etree.QName(child).localname for child in pPr] + + # All spacing elements should exist + assert "lnSpc" in element_names, f"lnSpc missing. Elements: {element_names}" + assert "spcBef" in element_names, f"spcBef missing. Elements: {element_names}" + assert "spcAft" in element_names, f"spcAft missing. Elements: {element_names}" + assert "buChar" in element_names, f"buChar missing. Elements: {element_names}" + + lnSpc_idx = element_names.index("lnSpc") + spcBef_idx = element_names.index("spcBef") + spcAft_idx = element_names.index("spcAft") + buChar_idx = element_names.index("buChar") + + # Verify order: lnSpc, spcBef, spcAft, buChar + assert lnSpc_idx < spcBef_idx < spcAft_idx < buChar_idx, ( + f"Incorrect element order. Expected lnSpc < spcBef < spcAft < buChar, " + f"got indices: lnSpc={lnSpc_idx}, spcBef={spcBef_idx}, " + f"spcAft={spcAft_idx}, buChar={buChar_idx}. Elements: {element_names}" + ) + + +class TestSoftLineBreaksInBullets: + """Test soft line breaks (line breaks within a single bullet point). + + PowerPoint supports line breaks inside list items via <a:br/> XML elements. + This is different from Google Slides which does NOT support this. + The soft line break character is \\x0b (vertical tab). + """ + + def test_soft_line_break_creates_br_element(self): + """Test that soft line breaks within bullet content create <a:br/> elements. + + When a bullet point contains \\x0b, it should create a single paragraph + with <a:br/> XML elements, NOT multiple paragraphs. + """ + from gslides_api.agnostic.ir import ( + FormattedDocument, + FormattedList, + FormattedListItem, + FormattedParagraph, + FormattedTextRun, + ) + from gslides_api.agnostic.text import FullTextStyle + from gslides_api.pptx.markdown_to_pptx import _apply_ir_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + # Create IR with a bullet item containing soft line break + soft_break = "\x0b" # Vertical tab - PowerPoint's soft line break + doc_ir = FormattedDocument( + elements=[ + FormattedList( + ordered=False, + items=[ + FormattedListItem( + paragraphs=[ + FormattedParagraph( + runs=[ + FormattedTextRun( + content=f"First line{soft_break}Second line", + style=FullTextStyle(), + ) + ] + ) + ], + nesting_level=0, + ) + ], + ) + ] + ) + + # Apply IR to text frame + _apply_ir_to_textframe(doc_ir, tf) + + # Verify there's only ONE paragraph (the soft break is within it) + assert len(tf.paragraphs) == 1, ( + f"Expected 1 paragraph, got {len(tf.paragraphs)}. " + "Soft line break should NOT create a new paragraph." + ) + + # Verify the paragraph has bullet formatting + assert _paragraph_has_bullet(tf.paragraphs[0]) + + # Verify <a:br/> element exists in the paragraph XML + para_xml = tf.paragraphs[0]._element + br_elements = para_xml.findall(f".//{{{_DRAWINGML_NS}}}br") + assert len(br_elements) >= 1, ( + f"Expected at least 1 <a:br/> element, found {len(br_elements)}. " + "Soft line break should create <a:br/> XML element." + ) + + def test_soft_line_break_content_preserved_on_roundtrip(self): + """Test that content with soft line breaks is preserved after save/reload. + + This verifies that the <a:br/> elements are correctly saved to the PPTX file + and can be read back. + """ + from gslides_api.agnostic.ir import ( + FormattedDocument, + FormattedList, + FormattedListItem, + FormattedParagraph, + FormattedTextRun, + ) + from gslides_api.agnostic.text import FullTextStyle + from gslides_api.pptx.markdown_to_pptx import _apply_ir_to_textframe + + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp: + tmp_path = tmp.name + + try: + # Create presentation with soft line break in bullet + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + soft_break = "\x0b" + doc_ir = FormattedDocument( + elements=[ + FormattedList( + ordered=False, + items=[ + FormattedListItem( + paragraphs=[ + FormattedParagraph( + runs=[ + FormattedTextRun( + content=f"Line A{soft_break}Line B", + style=FullTextStyle(), + ) + ] + ) + ], + nesting_level=0, + ) + ], + ) + ] + ) + + _apply_ir_to_textframe(doc_ir, tf) + + # Save presentation + prs.save(tmp_path) + + # Reload and verify + prs2 = Presentation(tmp_path) + slide2 = prs2.slides[0] + + # Find the textbox + textbox2 = None + for shape in slide2.shapes: + if hasattr(shape, "text_frame"): + textbox2 = shape + break + + assert textbox2 is not None + tf2 = textbox2.text_frame + + # Should still be 1 paragraph + assert len(tf2.paragraphs) == 1 + + # Verify <a:br/> element still exists after save/reload + para_xml = tf2.paragraphs[0]._element + br_elements = para_xml.findall(f".//{{{_DRAWINGML_NS}}}br") + assert len(br_elements) >= 1, ( + "The <a:br/> element should persist after save/reload" + ) + + # Content should contain both parts + full_text = tf2.text + assert "Line A" in full_text + assert "Line B" in full_text + + finally: + os.unlink(tmp_path) + + def test_multiple_soft_breaks_in_bullet(self): + """Test bullet with multiple soft line breaks creates multiple <a:br/> elements.""" + from gslides_api.agnostic.ir import ( + FormattedDocument, + FormattedList, + FormattedListItem, + FormattedParagraph, + FormattedTextRun, + ) + from gslides_api.agnostic.text import FullTextStyle + from gslides_api.pptx.markdown_to_pptx import _apply_ir_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(6), height=Inches(4) + ) + tf = textbox.text_frame + + soft_break = "\x0b" + doc_ir = FormattedDocument( + elements=[ + FormattedList( + ordered=False, + items=[ + FormattedListItem( + paragraphs=[ + FormattedParagraph( + runs=[ + FormattedTextRun( + content=f"Line 1{soft_break}Line 2{soft_break}Line 3", + style=FullTextStyle(), + ) + ] + ) + ], + nesting_level=0, + ) + ], + ) + ] + ) + + _apply_ir_to_textframe(doc_ir, tf) + + # Should have 2 <a:br/> elements (between 3 lines) + para_xml = tf.paragraphs[0]._element + br_elements = para_xml.findall(f".//{{{_DRAWINGML_NS}}}br") + assert len(br_elements) == 2, ( + f"Expected 2 <a:br/> elements for 3 lines, got {len(br_elements)}" + ) diff --git a/tests/test_pptx/test_pptx_converters.py b/tests/test_pptx/test_pptx_converters.py new file mode 100644 index 0000000..6854c12 --- /dev/null +++ b/tests/test_pptx/test_pptx_converters.py @@ -0,0 +1,913 @@ +"""Tests for PowerPoint font <-> platform-agnostic style converters.""" + +from unittest.mock import MagicMock + +import pytest + +from gslides_api.agnostic.text import ( + AbstractColor, + BaselineOffset, + FullTextStyle, + MarkdownRenderableStyle, + RichStyle, +) +from gslides_api.pptx.converters import ( + MONOSPACE_FONTS, + _abstract_to_pptx_rgb, + _escape_markdown_for_table, + _is_monospace, + _pptx_baseline_to_abstract, + _pptx_color_to_abstract, + apply_full_style_to_pptx_run, + apply_markdown_style_to_pptx_run, + apply_rich_style_to_pptx_run, + pptx_font_to_full, + pptx_font_to_rich, + pptx_paragraph_to_markdown, + pptx_run_to_markdown, + pptx_table_to_markdown, + pptx_text_frame_to_markdown, +) + + +class TestMonospaceDetection: + """Tests for monospace font detection.""" + + def test_monospace_fonts_detected(self): + """Common monospace fonts should be detected.""" + for font in MONOSPACE_FONTS: + assert _is_monospace(font) is True, f"{font} should be monospace" + + def test_monospace_case_insensitive(self): + """Font detection should be case-insensitive.""" + assert _is_monospace("Courier New") is True + assert _is_monospace("COURIER NEW") is True + assert _is_monospace("courier new") is True + + def test_non_monospace_fonts(self): + """Non-monospace fonts should not be detected.""" + assert _is_monospace("Arial") is False + assert _is_monospace("Times New Roman") is False + assert _is_monospace("Helvetica") is False + + def test_none_font(self): + """None font should return False.""" + assert _is_monospace(None) is False + + def test_empty_font(self): + """Empty font should return False.""" + assert _is_monospace("") is False + + +class TestColorConversion: + """Tests for color conversion between pptx and abstract.""" + + def test_pptx_color_to_abstract_with_rgb(self): + """Should convert pptx color with RGB to AbstractColor.""" + mock_color = MagicMock() + mock_color.rgb = (255, 128, 0) + + result = _pptx_color_to_abstract(mock_color) + + assert result is not None + assert result.red == 1.0 + assert result.green == pytest.approx(128 / 255, rel=1e-3) + assert result.blue == 0.0 + + def test_pptx_color_to_abstract_none_rgb(self): + """Should return None when color has no RGB.""" + mock_color = MagicMock() + mock_color.rgb = None + + result = _pptx_color_to_abstract(mock_color) + assert result is None + + def test_pptx_color_to_abstract_no_attribute(self): + """Should return None when color raises AttributeError.""" + # Use spec to make mock raise AttributeError when accessing rgb + mock_color = MagicMock(spec=["something_else"]) + + result = _pptx_color_to_abstract(mock_color) + assert result is None + + def test_abstract_to_pptx_rgb(self): + """Should convert AbstractColor to pptx RGBColor.""" + abstract = AbstractColor(red=1.0, green=0.5, blue=0.0) + + result = _abstract_to_pptx_rgb(abstract) + + assert result is not None + assert result[0] == 255 # red + assert result[1] == 127 # green (int(0.5 * 255)) + assert result[2] == 0 # blue + + def test_abstract_to_pptx_rgb_none(self): + """Should return None for None input.""" + result = _abstract_to_pptx_rgb(None) + assert result is None + + +class TestBaselineConversion: + """Tests for baseline offset conversion.""" + + def test_superscript_detected(self): + """Superscript font should be detected.""" + mock_font = MagicMock() + mock_font.superscript = True + mock_font.subscript = False + + result = _pptx_baseline_to_abstract(mock_font) + assert result == BaselineOffset.SUPERSCRIPT + + def test_subscript_detected(self): + """Subscript font should be detected.""" + mock_font = MagicMock() + mock_font.superscript = False + mock_font.subscript = True + + result = _pptx_baseline_to_abstract(mock_font) + assert result == BaselineOffset.SUBSCRIPT + + def test_no_baseline_offset(self): + """No baseline offset should return NONE.""" + mock_font = MagicMock() + mock_font.superscript = False + mock_font.subscript = False + + result = _pptx_baseline_to_abstract(mock_font) + assert result == BaselineOffset.NONE + + def test_baseline_attribute_error(self): + """Should return NONE on AttributeError.""" + mock_font = MagicMock() + del mock_font.superscript + del mock_font.subscript + + result = _pptx_baseline_to_abstract(mock_font) + assert result == BaselineOffset.NONE + + +class TestPptxFontToFull: + """Tests for pptx_font_to_full conversion.""" + + def test_basic_font_properties(self): + """Basic font properties should be extracted.""" + mock_font = MagicMock() + mock_font.bold = True + mock_font.italic = True + mock_font.strike = True + mock_font.name = "Arial" + mock_size = MagicMock() + mock_size.pt = 14.0 + mock_font.size = mock_size + mock_font.underline = True + mock_font.small_caps = False + mock_font.all_caps = False + mock_font.superscript = False + mock_font.subscript = False + mock_font.shadow = False + mock_font.emboss = False + mock_font.imprint = False + mock_font.double_strike = False + mock_font.color = MagicMock() + mock_font.color.rgb = (255, 0, 0) + + result = pptx_font_to_full(mock_font) + + # Check markdown properties + assert result.markdown.bold is True + assert result.markdown.italic is True + assert result.markdown.strikethrough is True + assert result.markdown.is_code is False # Arial is not monospace + assert result.markdown.hyperlink is None + + # Check rich properties + assert result.rich.font_family == "Arial" + assert result.rich.font_size_pt == 14.0 + assert result.rich.underline is True + assert result.rich.foreground_color.red == 1.0 + + def test_monospace_font_is_code(self): + """Monospace font should set is_code to True.""" + mock_font = MagicMock() + mock_font.bold = False + mock_font.italic = False + mock_font.strike = False + mock_font.name = "Courier New" + mock_font.size = None + mock_font.underline = False + mock_font.small_caps = False + mock_font.all_caps = False + mock_font.superscript = False + mock_font.subscript = False + mock_font.shadow = False + mock_font.emboss = False + mock_font.imprint = False + mock_font.double_strike = False + mock_font.color = MagicMock() + mock_font.color.rgb = None + + result = pptx_font_to_full(mock_font) + assert result.markdown.is_code is True + + def test_with_hyperlink(self): + """Hyperlink should be captured.""" + mock_font = MagicMock() + mock_font.bold = False + mock_font.italic = False + mock_font.strike = False + mock_font.name = "Arial" + mock_font.size = None + mock_font.underline = False + mock_font.small_caps = False + mock_font.all_caps = False + mock_font.superscript = False + mock_font.subscript = False + mock_font.shadow = False + mock_font.emboss = False + mock_font.imprint = False + mock_font.double_strike = False + mock_font.color = MagicMock() + mock_font.color.rgb = None + + result = pptx_font_to_full(mock_font, hyperlink_address="https://example.com") + assert result.markdown.hyperlink == "https://example.com" + + +class TestPptxFontToRich: + """Tests for pptx_font_to_rich - should only extract RichStyle.""" + + def test_extracts_only_rich(self): + """Should extract only RichStyle, ignoring markdown properties.""" + mock_font = MagicMock() + mock_font.bold = True # markdown - should not affect RichStyle equality + mock_font.italic = True + mock_font.strike = True + mock_font.name = "Arial" + mock_size = MagicMock() + mock_size.pt = 14.0 + mock_font.size = mock_size + mock_font.underline = False + mock_font.small_caps = False + mock_font.all_caps = False + mock_font.superscript = False + mock_font.subscript = False + mock_font.shadow = False + mock_font.emboss = False + mock_font.imprint = False + mock_font.double_strike = False + mock_font.color = MagicMock() + mock_font.color.rgb = None + + result = pptx_font_to_rich(mock_font) + + # Should be a RichStyle + assert isinstance(result, RichStyle) + + # Should have rich properties + assert result.font_family == "Arial" + assert result.font_size_pt == 14.0 + + def test_uniqueness_ignores_bold(self): + """Two fonts differing only in bold should produce equal RichStyle.""" + mock_font1 = MagicMock() + mock_font1.bold = True + mock_font1.italic = False + mock_font1.strike = False + mock_font1.name = "Arial" + mock_size = MagicMock() + mock_size.pt = 14.0 + mock_font1.size = mock_size + mock_font1.underline = False + mock_font1.small_caps = False + mock_font1.all_caps = False + mock_font1.superscript = False + mock_font1.subscript = False + mock_font1.shadow = False + mock_font1.emboss = False + mock_font1.imprint = False + mock_font1.double_strike = False + mock_font1.color = MagicMock() + mock_font1.color.rgb = None + + mock_font2 = MagicMock() + mock_font2.bold = False # Different from font1 + mock_font2.italic = False + mock_font2.strike = False + mock_font2.name = "Arial" + mock_font2.size = mock_size + mock_font2.underline = False + mock_font2.small_caps = False + mock_font2.all_caps = False + mock_font2.superscript = False + mock_font2.subscript = False + mock_font2.shadow = False + mock_font2.emboss = False + mock_font2.imprint = False + mock_font2.double_strike = False + mock_font2.color = MagicMock() + mock_font2.color.rgb = None + + rich1 = pptx_font_to_rich(mock_font1) + rich2 = pptx_font_to_rich(mock_font2) + + # RichStyles should be equal since they differ only in bold + assert rich1 == rich2 + + +class TestApplyRichStyleToPptxRun: + """Tests for apply_rich_style_to_pptx_run.""" + + def test_applies_font_family(self): + """Should apply font family.""" + mock_run = MagicMock() + rich = RichStyle(font_family="Arial") + + apply_rich_style_to_pptx_run(rich, mock_run) + + assert mock_run.font.name == "Arial" + + def test_applies_font_size(self): + """Should apply font size.""" + mock_run = MagicMock() + rich = RichStyle(font_size_pt=14.0) + + apply_rich_style_to_pptx_run(rich, mock_run) + + # The size should be set (Pt object) + mock_run.font.size = rich.font_size_pt # Check assignment happened + + def test_applies_foreground_color(self): + """Should apply foreground color.""" + mock_run = MagicMock() + rich = RichStyle(foreground_color=AbstractColor(red=1.0, green=0.0, blue=0.0)) + + apply_rich_style_to_pptx_run(rich, mock_run) + + # Color should be set via rgb property + mock_run.font.color.rgb # Just verify access doesn't fail + + def test_applies_underline(self): + """Should apply underline.""" + mock_run = MagicMock() + rich = RichStyle(underline=True) + + apply_rich_style_to_pptx_run(rich, mock_run) + + assert mock_run.font.underline is True + + +class TestApplyMarkdownStyleToPptxRun: + """Tests for apply_markdown_style_to_pptx_run.""" + + def test_applies_bold(self): + """Should apply bold.""" + mock_run = MagicMock() + md = MarkdownRenderableStyle(bold=True) + + apply_markdown_style_to_pptx_run(md, mock_run) + + assert mock_run.font.bold is True + + def test_applies_italic(self): + """Should apply italic.""" + mock_run = MagicMock() + md = MarkdownRenderableStyle(italic=True) + + apply_markdown_style_to_pptx_run(md, mock_run) + + assert mock_run.font.italic is True + + def test_applies_code_font(self): + """Should apply Courier New for code.""" + mock_run = MagicMock() + mock_run.font.name = None # No existing font + md = MarkdownRenderableStyle(is_code=True) + + apply_markdown_style_to_pptx_run(md, mock_run) + + assert mock_run.font.name == "Courier New" + + def test_code_does_not_override_existing_font(self): + """Should not override existing font when is_code is True.""" + mock_run = MagicMock() + mock_run.font.name = "Fira Code" # Already has a font + md = MarkdownRenderableStyle(is_code=True) + + apply_markdown_style_to_pptx_run(md, mock_run) + + # Should keep existing font + assert mock_run.font.name == "Fira Code" + + def test_applies_hyperlink(self): + """Should apply hyperlink.""" + mock_run = MagicMock() + md = MarkdownRenderableStyle(hyperlink="https://example.com") + + apply_markdown_style_to_pptx_run(md, mock_run) + + mock_run.hyperlink.address = "https://example.com" + + +class TestApplyFullStyleToPptxRun: + """Tests for apply_full_style_to_pptx_run.""" + + def test_applies_both_markdown_and_rich(self): + """Should apply both markdown and rich properties.""" + mock_run = MagicMock() + mock_run.font.name = None + + style = FullTextStyle( + markdown=MarkdownRenderableStyle(bold=True, is_code=True), + rich=RichStyle(font_size_pt=14.0, underline=True), + ) + + apply_full_style_to_pptx_run(style, mock_run) + + # Markdown properties + assert mock_run.font.bold is True + assert mock_run.font.name == "Courier New" # from is_code + + # Rich properties + assert mock_run.font.underline is True + + +# ============================================================================= +# Tests for Markdown Generation Functions (PPT -> Markdown) +# ============================================================================= + + +class TestEscapeMarkdownForTable: + """Tests for _escape_markdown_for_table.""" + + def test_escapes_pipe_characters(self): + """Should escape pipe characters.""" + text = "col1|col2|col3" + result = _escape_markdown_for_table(text) + assert result == "col1\\|col2\\|col3" + + def test_converts_newlines_to_br(self): + """Should convert newlines to <br> tags.""" + text = "line1\nline2\nline3" + result = _escape_markdown_for_table(text) + assert result == "line1<br>line2<br>line3" + + def test_preserves_curly_braces(self): + """Should NOT escape curly braces (template variables).""" + text = "Hello {name}, welcome to {place}" + result = _escape_markdown_for_table(text) + assert result == "Hello {name}, welcome to {place}" + + def test_combined_escaping(self): + """Should handle pipes and newlines together.""" + text = "a|b\nc|d" + result = _escape_markdown_for_table(text) + assert result == "a\\|b<br>c\\|d" + + def test_empty_string(self): + """Should handle empty string.""" + result = _escape_markdown_for_table("") + assert result == "" + + +class TestPptxRunToMarkdown: + """Tests for pptx_run_to_markdown.""" + + def _create_mock_run( + self, + text="Test", + bold=False, + italic=False, + strike=False, + font_name="Arial", + hyperlink=None, + ): + """Helper to create mock run with specified properties.""" + mock_run = MagicMock() + mock_run.text = text + mock_run.font.bold = bold + mock_run.font.italic = italic + mock_run.font.strike = strike + mock_run.font.name = font_name + mock_run.font.size = None + mock_run.font.underline = False + mock_run.font.small_caps = False + mock_run.font.all_caps = False + mock_run.font.superscript = False + mock_run.font.subscript = False + mock_run.font.shadow = False + mock_run.font.emboss = False + mock_run.font.imprint = False + mock_run.font.double_strike = False + mock_run.font.color = MagicMock() + mock_run.font.color.rgb = None + + if hyperlink: + mock_run.hyperlink.address = hyperlink + else: + mock_run.hyperlink = MagicMock() + mock_run.hyperlink.address = None + + return mock_run + + def test_plain_text(self): + """Plain text should be returned unchanged.""" + mock_run = self._create_mock_run(text="Hello World") + result = pptx_run_to_markdown(mock_run) + assert result == "Hello World" + + def test_bold_text(self): + """Bold text should be wrapped in **.""" + mock_run = self._create_mock_run(text="Bold", bold=True) + result = pptx_run_to_markdown(mock_run) + assert result == "**Bold**" + + def test_italic_text(self): + """Italic text should be wrapped in *.""" + mock_run = self._create_mock_run(text="Italic", italic=True) + result = pptx_run_to_markdown(mock_run) + assert result == "*Italic*" + + def test_bold_italic_text(self): + """Bold + italic text should be wrapped in ***.""" + mock_run = self._create_mock_run(text="BoldItalic", bold=True, italic=True) + result = pptx_run_to_markdown(mock_run) + assert result == "***BoldItalic***" + + def test_strikethrough_text(self): + """Strikethrough text should be wrapped in ~~.""" + mock_run = self._create_mock_run(text="Strike", strike=True) + result = pptx_run_to_markdown(mock_run) + assert result == "~~Strike~~" + + def test_code_text(self): + """Monospace font should produce backtick code.""" + mock_run = self._create_mock_run(text="code", font_name="Courier New") + result = pptx_run_to_markdown(mock_run) + assert result == "`code`" + + def test_hyperlink_text(self): + """Hyperlink should produce markdown link.""" + mock_run = self._create_mock_run( + text="Click here", hyperlink="https://example.com" + ) + result = pptx_run_to_markdown(mock_run) + assert result == "[Click here](https://example.com)" + + def test_bold_with_hyperlink(self): + """Bold with hyperlink should have bold inside link.""" + mock_run = self._create_mock_run( + text="Bold Link", bold=True, hyperlink="https://example.com" + ) + result = pptx_run_to_markdown(mock_run) + assert result == "[**Bold Link**](https://example.com)" + + def test_empty_text(self): + """Empty text should return empty string.""" + mock_run = self._create_mock_run(text="") + result = pptx_run_to_markdown(mock_run) + assert result == "" + + +class TestPptxParagraphToMarkdown: + """Tests for pptx_paragraph_to_markdown.""" + + def test_single_run_paragraph(self): + """Single run should be converted.""" + mock_para = MagicMock() + mock_run = MagicMock() + mock_run.text = "Hello" + mock_run.font.bold = False + mock_run.font.italic = False + mock_run.font.strike = False + mock_run.font.name = "Arial" + mock_run.font.size = None + mock_run.font.underline = False + mock_run.font.small_caps = False + mock_run.font.all_caps = False + mock_run.font.superscript = False + mock_run.font.subscript = False + mock_run.font.shadow = False + mock_run.font.emboss = False + mock_run.font.imprint = False + mock_run.font.double_strike = False + mock_run.font.color = MagicMock() + mock_run.font.color.rgb = None + mock_run.hyperlink = MagicMock() + mock_run.hyperlink.address = None + + mock_para.runs = [mock_run] + mock_para.level = 0 + + result = pptx_paragraph_to_markdown(mock_para) + assert result == "Hello" + + def test_multiple_runs_paragraph(self): + """Multiple runs should be concatenated.""" + mock_para = MagicMock() + + def create_run(text, bold=False): + run = MagicMock() + run.text = text + run.font.bold = bold + run.font.italic = False + run.font.strike = False + run.font.name = "Arial" + run.font.size = None + run.font.underline = False + run.font.small_caps = False + run.font.all_caps = False + run.font.superscript = False + run.font.subscript = False + run.font.shadow = False + run.font.emboss = False + run.font.imprint = False + run.font.double_strike = False + run.font.color = MagicMock() + run.font.color.rgb = None + run.hyperlink = MagicMock() + run.hyperlink.address = None + return run + + mock_para.runs = [ + create_run("Hello "), + create_run("World", bold=True), + create_run("!"), + ] + mock_para.level = 0 + + result = pptx_paragraph_to_markdown(mock_para) + assert result == "Hello **World**!" + + def test_bullet_point_level_1(self): + """Level 1 paragraph with bullet XML should have bullet indentation.""" + mock_para = MagicMock() + mock_run = MagicMock() + mock_run.text = "Item" + mock_run.font.bold = False + mock_run.font.italic = False + mock_run.font.strike = False + mock_run.font.name = "Arial" + mock_run.font.size = None + mock_run.font.underline = False + mock_run.font.small_caps = False + mock_run.font.all_caps = False + mock_run.font.superscript = False + mock_run.font.subscript = False + mock_run.font.shadow = False + mock_run.font.emboss = False + mock_run.font.imprint = False + mock_run.font.double_strike = False + mock_run.font.color = MagicMock() + mock_run.font.color.rgb = None + mock_run.hyperlink = MagicMock() + mock_run.hyperlink.address = None + + mock_para.runs = [mock_run] + mock_para.level = 1 + + # Mock the XML element to have bullet properties + # _paragraph_has_bullet() checks for buChar element in pPr + mock_pPr = MagicMock() + mock_buChar = MagicMock() # Represents the bullet character element + + def find_side_effect(tag): + if "buChar" in tag: + return mock_buChar + return None + + mock_pPr.find = MagicMock(side_effect=find_side_effect) + mock_para._element.get_or_add_pPr.return_value = mock_pPr + + result = pptx_paragraph_to_markdown(mock_para) + assert result == " - Item" + + def test_bullet_point_level_2(self): + """Level 2 paragraph with bullet XML should have double indentation.""" + mock_para = MagicMock() + mock_run = MagicMock() + mock_run.text = "Item" + mock_run.font.bold = False + mock_run.font.italic = False + mock_run.font.strike = False + mock_run.font.name = "Arial" + mock_run.font.size = None + mock_run.font.underline = False + mock_run.font.small_caps = False + mock_run.font.all_caps = False + mock_run.font.superscript = False + mock_run.font.subscript = False + mock_run.font.shadow = False + mock_run.font.emboss = False + mock_run.font.imprint = False + mock_run.font.double_strike = False + mock_run.font.color = MagicMock() + mock_run.font.color.rgb = None + mock_run.hyperlink = MagicMock() + mock_run.hyperlink.address = None + + mock_para.runs = [mock_run] + mock_para.level = 2 + + # Mock the XML element to have bullet properties + mock_pPr = MagicMock() + mock_buChar = MagicMock() # Represents the bullet character element + + def find_side_effect(tag): + if "buChar" in tag: + return mock_buChar + return None + + mock_pPr.find = MagicMock(side_effect=find_side_effect) + mock_para._element.get_or_add_pPr.return_value = mock_pPr + + result = pptx_paragraph_to_markdown(mock_para) + assert result == " - Item" + + +class TestPptxTextFrameToMarkdown: + """Tests for pptx_text_frame_to_markdown.""" + + def test_none_text_frame(self): + """None text frame should return empty string.""" + result = pptx_text_frame_to_markdown(None) + assert result == "" + + def test_single_paragraph(self): + """Single paragraph should be converted.""" + mock_frame = MagicMock() + mock_para = MagicMock() + mock_run = MagicMock() + mock_run.text = "Hello" + mock_run.font.bold = False + mock_run.font.italic = False + mock_run.font.strike = False + mock_run.font.name = "Arial" + mock_run.font.size = None + mock_run.font.underline = False + mock_run.font.small_caps = False + mock_run.font.all_caps = False + mock_run.font.superscript = False + mock_run.font.subscript = False + mock_run.font.shadow = False + mock_run.font.emboss = False + mock_run.font.imprint = False + mock_run.font.double_strike = False + mock_run.font.color = MagicMock() + mock_run.font.color.rgb = None + mock_run.hyperlink = MagicMock() + mock_run.hyperlink.address = None + + mock_para.runs = [mock_run] + mock_para.level = 0 + mock_frame.paragraphs = [mock_para] + + result = pptx_text_frame_to_markdown(mock_frame) + assert result == "Hello" + + def test_multiple_paragraphs(self): + """Multiple paragraphs should be joined with newlines.""" + mock_frame = MagicMock() + + def create_para(text): + para = MagicMock() + run = MagicMock() + run.text = text + run.font.bold = False + run.font.italic = False + run.font.strike = False + run.font.name = "Arial" + run.font.size = None + run.font.underline = False + run.font.small_caps = False + run.font.all_caps = False + run.font.superscript = False + run.font.subscript = False + run.font.shadow = False + run.font.emboss = False + run.font.imprint = False + run.font.double_strike = False + run.font.color = MagicMock() + run.font.color.rgb = None + run.hyperlink = MagicMock() + run.hyperlink.address = None + para.runs = [run] + para.level = 0 + return para + + mock_frame.paragraphs = [create_para("Line 1"), create_para("Line 2")] + + result = pptx_text_frame_to_markdown(mock_frame) + assert result == "Line 1\nLine 2" + + +class TestPptxTableToMarkdown: + """Tests for pptx_table_to_markdown.""" + + def _create_mock_cell(self, text): + """Helper to create mock cell with text.""" + mock_cell = MagicMock() + mock_frame = MagicMock() + mock_para = MagicMock() + mock_run = MagicMock() + + mock_run.text = text + mock_run.font.bold = False + mock_run.font.italic = False + mock_run.font.strike = False + mock_run.font.name = "Arial" + mock_run.font.size = None + mock_run.font.underline = False + mock_run.font.small_caps = False + mock_run.font.all_caps = False + mock_run.font.superscript = False + mock_run.font.subscript = False + mock_run.font.shadow = False + mock_run.font.emboss = False + mock_run.font.imprint = False + mock_run.font.double_strike = False + mock_run.font.color = MagicMock() + mock_run.font.color.rgb = None + mock_run.hyperlink = MagicMock() + mock_run.hyperlink.address = None + + mock_para.runs = [mock_run] + mock_para.level = 0 + mock_frame.paragraphs = [mock_para] + mock_cell.text_frame = mock_frame + + return mock_cell + + def _create_mock_row(self, cell_texts): + """Helper to create mock row with cells.""" + mock_row = MagicMock() + mock_row.cells = [self._create_mock_cell(text) for text in cell_texts] + return mock_row + + def test_simple_2x2_table(self): + """Should generate markdown for 2x2 table.""" + mock_table = MagicMock() + mock_table.rows = [ + self._create_mock_row(["Header1", "Header2"]), + self._create_mock_row(["Cell1", "Cell2"]), + ] + + result = pptx_table_to_markdown(mock_table) + + expected = "| Header1 | Header2 |\n| --- | --- |\n| Cell1 | Cell2 |" + assert result == expected + + def test_3x3_table(self): + """Should generate markdown for 3x3 table.""" + mock_table = MagicMock() + mock_table.rows = [ + self._create_mock_row(["A", "B", "C"]), + self._create_mock_row(["D", "E", "F"]), + self._create_mock_row(["G", "H", "I"]), + ] + + result = pptx_table_to_markdown(mock_table) + + expected = ( + "| A | B | C |\n| --- | --- | --- |\n| D | E | F |\n| G | H | I |" + ) + assert result == expected + + def test_table_with_pipe_in_content(self): + """Should escape pipes in cell content.""" + mock_table = MagicMock() + mock_table.rows = [ + self._create_mock_row(["Header", "Value"]), + self._create_mock_row(["A|B", "C|D"]), + ] + + result = pptx_table_to_markdown(mock_table) + + assert "A\\|B" in result + assert "C\\|D" in result + + def test_table_with_empty_cells(self): + """Should handle empty cells.""" + mock_table = MagicMock() + mock_table.rows = [ + self._create_mock_row(["Header1", "Header2"]), + self._create_mock_row(["", "Data"]), + ] + + result = pptx_table_to_markdown(mock_table) + + expected = "| Header1 | Header2 |\n| --- | --- |\n| | Data |" + assert result == expected + + def test_empty_table(self): + """Should return empty string for empty table.""" + mock_table = MagicMock() + mock_table.rows = [] + + result = pptx_table_to_markdown(mock_table) + assert result == "" + + def test_none_table(self): + """Should return empty string for None table.""" + result = pptx_table_to_markdown(None) + assert result == "" diff --git a/tests/test_pptx/test_pptx_integration.py b/tests/test_pptx/test_pptx_integration.py new file mode 100644 index 0000000..4c722ee --- /dev/null +++ b/tests/test_pptx/test_pptx_integration.py @@ -0,0 +1,491 @@ +"""Integration tests for PowerPoint adapter.""" + +import pytest +import os +import tempfile +from pptx import Presentation +from pptx.util import Inches + +from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointPresentation, + PowerPointSlide, + PowerPointShapeElement, + PowerPointImageElement, + PowerPointTableElement, +) +from gslides_api.agnostic.element import MarkdownTableElement + + +class TestPowerPointIntegration: + """Integration tests for PowerPoint adapter functionality.""" + + def create_sample_presentation(self) -> str: + """Create a sample presentation for testing.""" + prs = Presentation() + + # Title slide + title_slide_layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(title_slide_layout) + title = slide.shapes.title + subtitle = slide.placeholders[1] + title.text = "Integration Test Presentation" + subtitle.text = "Testing PowerPoint Adapter" + + # Content slide + bullet_slide_layout = prs.slide_layouts[1] + slide = prs.slides.add_slide(bullet_slide_layout) + shapes = slide.shapes + title_shape = shapes.title + body_shape = shapes.placeholders[1] + + title_shape.text = 'Features Test' + tf = body_shape.text_frame + tf.text = 'Text formatting' + + # Add some formatted text + p = tf.add_paragraph() + p.text = 'Bullet points' + p.level = 0 + + p = tf.add_paragraph() + p.text = 'Nested bullets' + p.level = 1 + + # Table slide + table_slide_layout = prs.slide_layouts[5] + slide = prs.slides.add_slide(table_slide_layout) + shapes = slide.shapes + shapes.title.text = 'Data Table' + + # Add table + rows, cols = 3, 3 + left = top = Inches(1) + width = Inches(8) + height = Inches(3) + + table = shapes.add_table(rows, cols, left, top, width, height).table + + # Fill table + headers = ['Name', 'Age', 'City'] + data = [ + ['Alice', '25', 'New York'], + ['Bob', '30', 'San Francisco'] + ] + + for col_idx, header in enumerate(headers): + table.cell(0, col_idx).text = header + + for row_idx, row_data in enumerate(data, 1): + for col_idx, cell_data in enumerate(row_data): + table.cell(row_idx, col_idx).text = cell_data + + # Save to temp file + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') + prs.save(temp_file.name) + temp_file.close() + + return temp_file.name + + def test_full_presentation_workflow(self): + """Test complete presentation workflow.""" + pptx_path = self.create_sample_presentation() + + try: + # Load presentation + presentation = PowerPointPresentation.from_id(PowerPointAPIClient(), pptx_path) + assert presentation is not None + assert len(presentation.slides) == 3 + + # Test API client + api_client = PowerPointAPIClient() + assert api_client is not None + + # Test slide access + first_slide = presentation.slides[0] + assert isinstance(first_slide, PowerPointSlide) + + # Test element access + elements = first_slide.elements + assert len(elements) > 0 + + # Test text elements + text_elements = [e for e in elements if hasattr(e, 'has_text') and e.has_text] + assert len(text_elements) > 0 + + # Test shape elements + shape_elements = [e for e in elements if isinstance(e, PowerPointShapeElement)] + assert len(shape_elements) > 0 + + finally: + os.unlink(pptx_path) + + def test_text_manipulation_workflow(self): + """Test text manipulation workflow.""" + pptx_path = self.create_sample_presentation() + + try: + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + content_slide = presentation.slides[1] # Second slide + + # Find text elements + text_elements = [e for e in content_slide.elements if isinstance(e, PowerPointShapeElement) and e.has_text] + assert len(text_elements) > 0 + + text_element = text_elements[0] + + # Read original text + original_text = text_element.read_text(as_markdown=False) + assert len(original_text) > 0 + + # Write new content + new_content = """# Updated Content + +This slide has been updated with: + +- **Bold** formatting +- *Italic* formatting +- Multiple bullet points + - With nesting + - And sub-bullets + +Regular paragraph text.""" + + text_element.write_text(api_client, new_content) + + # Read back as markdown + markdown_text = text_element.read_text(as_markdown=True) + assert "Updated Content" in markdown_text + assert "Bold" in markdown_text or "**Bold**" in markdown_text + + # Save presentation + presentation.save(api_client) + + # Reload and verify persistence + reloaded_presentation = PowerPointPresentation.from_id(api_client, pptx_path) + reloaded_slide = reloaded_presentation.slides[1] + reloaded_text_elements = [e for e in reloaded_slide.elements if isinstance(e, PowerPointShapeElement) and e.has_text] + + if reloaded_text_elements: + reloaded_text = reloaded_text_elements[0].read_text(as_markdown=False) + assert "Updated Content" in reloaded_text + + finally: + os.unlink(pptx_path) + + def test_table_manipulation_workflow(self): + """Test table manipulation workflow.""" + pptx_path = self.create_sample_presentation() + + try: + presentation = PowerPointPresentation.from_id(PowerPointAPIClient(), pptx_path) + table_slide = presentation.slides[2] # Third slide with table + + # Find table elements + table_elements = [e for e in table_slide.elements if isinstance(e, PowerPointTableElement)] + + if table_elements: # Only test if we successfully found/converted table elements + table_element = table_elements[0] + api_client = PowerPointAPIClient() + + # Test table resizing + try: + table_element.resize(api_client, rows=4, cols=4) + except Exception as e: + # Table resizing might fail due to python-pptx limitations + print(f"Table resize failed (expected): {e}") + + # Test content update (if we have MarkdownTableElement support) + try: + # Create markdown table data + table_data = [ + ["Product", "Price", "Stock", "Category"], + ["Laptop", "$999", "50", "Electronics"], + ["Phone", "$599", "100", "Electronics"], + ["Book", "$25", "200", "Education"] + ] + + # This would need MarkdownTableElement implementation + # For now, just verify the table element exists + assert table_element.pptx_element is not None + + except Exception as e: + print(f"Table content update test skipped: {e}") + + finally: + os.unlink(pptx_path) + + def test_presentation_copying_workflow(self): + """Test presentation copying workflow.""" + pptx_path = self.create_sample_presentation() + + try: + with tempfile.TemporaryDirectory() as temp_dir: + api_client = PowerPointAPIClient() + # Load original + original_presentation = PowerPointPresentation.from_id(api_client, pptx_path) + + # Copy presentation + copied_presentation = original_presentation.copy_via_drive( + api_client, + "copied_test_presentation", + temp_dir + ) + + # Verify copy + assert copied_presentation is not None + assert copied_presentation.file_path != original_presentation.file_path + assert os.path.exists(copied_presentation.file_path) + assert len(copied_presentation.slides) == len(original_presentation.slides) + + # Modify copy + if copied_presentation.slides: + first_slide = copied_presentation.slides[0] + text_elements = [e for e in first_slide.elements if isinstance(e, PowerPointShapeElement) and e.has_text] + + if text_elements: + text_elements[0].write_text(api_client, "Modified copy content") + + # Save modified copy + copied_presentation.save(api_client) + + # Verify original is unchanged + original_presentation.sync_from_cloud(api_client) + original_first_slide = original_presentation.slides[0] + original_text_elements = [e for e in original_first_slide.elements if isinstance(e, PowerPointShapeElement) and e.has_text] + + if original_text_elements: + original_text = original_text_elements[0].read_text(as_markdown=False) + assert "Modified copy content" not in original_text + + finally: + os.unlink(pptx_path) + + def test_slide_duplication_workflow(self): + """Test slide duplication workflow.""" + pptx_path = self.create_sample_presentation() + + try: + presentation = PowerPointPresentation.from_id(PowerPointAPIClient(), pptx_path) + api_client = PowerPointAPIClient() + + original_slide_count = len(presentation.slides) + assert original_slide_count > 0 + + # Duplicate first slide + first_slide = presentation.slides[0] + try: + duplicated_slide = first_slide.duplicate(api_client) + assert isinstance(duplicated_slide, PowerPointSlide) + + # Note: The duplicated slide might not automatically be added to the presentation + # depending on the implementation, so we might not see an increase in slide count + print(f"Original slides: {original_slide_count}") + print(f"Duplicated slide created: {duplicated_slide.objectId}") + + except Exception as e: + # Slide duplication might have limitations in python-pptx + print(f"Slide duplication test failed (might be expected): {e}") + + finally: + os.unlink(pptx_path) + + def test_speaker_notes_workflow(self): + """Test speaker notes workflow.""" + pptx_path = self.create_sample_presentation() + + try: + presentation = PowerPointPresentation.from_id(PowerPointAPIClient(), pptx_path) + api_client = PowerPointAPIClient() + + first_slide = presentation.slides[0] + + # Test speaker notes (if available) + if first_slide.speaker_notes: + # Write notes + notes_content = """Speaker notes for this slide: + +- Key point 1 +- Key point 2 with **emphasis** +- Key point 3 + +Remember to speak slowly and clearly.""" + + first_slide.speaker_notes.write_text(api_client, notes_content) + + # Read back notes + read_notes = first_slide.speaker_notes.read_text(as_markdown=True) + assert "Speaker notes" in read_notes + assert "Key point 1" in read_notes + + # Save and reload + presentation.save(api_client) + reloaded_presentation = PowerPointPresentation.from_id(api_client, pptx_path) + reloaded_slide = reloaded_presentation.slides[0] + + if reloaded_slide.speaker_notes: + reloaded_notes = reloaded_slide.speaker_notes.read_text(as_markdown=False) + assert "Speaker notes" in reloaded_notes + + finally: + os.unlink(pptx_path) + + def test_error_handling_workflow(self): + """Test error handling in various scenarios.""" + # Test loading non-existent file + with pytest.raises(FileNotFoundError): + PowerPointPresentation.from_id(PowerPointAPIClient(), "non_existent_file.pptx") + + # Test invalid file format + with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file: + temp_file.write(b"This is not a PowerPoint file") + temp_file.flush() + + try: + with pytest.raises(ValueError): + PowerPointPresentation.from_id(PowerPointAPIClient(), temp_file.name) + finally: + os.unlink(temp_file.name) + + # Test API client operations with invalid paths + api_client = PowerPointAPIClient() + + with pytest.raises(FileNotFoundError): + api_client.copy_presentation("non_existent.pptx", "copy") + + with pytest.raises(FileNotFoundError): + api_client.create_folder("test", parent_folder_id="non_existent_folder") + + def test_file_operations_workflow(self): + """Test file operations workflow.""" + api_client = PowerPointAPIClient() + + with tempfile.TemporaryDirectory() as temp_dir: + # Create test file + test_file = os.path.join(temp_dir, "test.pptx") + with open(test_file, "w") as f: + f.write("test content") + + # Test copy + copy_result = api_client.copy_presentation(test_file, "copied_file") + assert os.path.exists(copy_result["id"]) + + # Test folder creation + folder_result = api_client.create_folder("test_folder", parent_folder_id=temp_dir) + assert os.path.exists(folder_result["id"]) + assert os.path.isdir(folder_result["id"]) + + # Test file deletion + api_client.delete_file(copy_result["id"]) + assert not os.path.exists(copy_result["id"]) + + # Test folder deletion + api_client.delete_file(folder_result["id"]) + assert not os.path.exists(folder_result["id"]) + + def test_presentation_metadata_workflow(self): + """Test presentation metadata handling.""" + pptx_path = self.create_sample_presentation() + + try: + presentation = PowerPointPresentation.from_id(PowerPointAPIClient(), pptx_path) + + # Test basic metadata + assert presentation.presentationId == pptx_path + assert presentation.title is not None + assert presentation.url.startswith("file://") + + # Test slide metadata + first_slide = presentation.slides[0] + assert first_slide.objectId is not None + assert hasattr(first_slide, 'slideProperties') + + # Test element metadata + if first_slide.elements: + first_element = first_slide.elements[0] + assert first_element.objectId is not None + assert hasattr(first_element, 'alt_text') + + finally: + os.unlink(pptx_path) + + def test_image_replace_workflow(self): + """Test image replacement workflow - verifies that images can be replaced in slides.""" + # Create a presentation with an image + prs = Presentation() + blank_slide_layout = prs.slide_layouts[6] # Blank layout + slide = prs.slides.add_slide(blank_slide_layout) + + # Add speaker notes for slide identification + notes_slide = slide.notes_slide + notes_slide.notes_text_frame.text = "Test Image Slide" + + # Create a simple test image + test_image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name + replacement_image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name + + try: + # Create test images using PIL + from PIL import Image + + # Create initial red image + img1 = Image.new("RGB", (100, 100), color="red") + img1.save(test_image_path) + + # Create replacement blue image + img2 = Image.new("RGB", (100, 100), color="blue") + img2.save(replacement_image_path) + + # Add the test image to the slide with alt text for identification + picture = slide.shapes.add_picture( + test_image_path, Inches(1), Inches(1), Inches(2), Inches(2) + ) + # Set alt text title for identification + picture._element.xpath(".//p:cNvPr")[0].attrib["title"] = "TestImage" + + # Save presentation + pptx_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name + prs.save(pptx_path) + + # Load the presentation using our adapter + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + + # Find the image element + first_slide = presentation.slides[0] + image_elements = [ + e for e in first_slide.elements if isinstance(e, PowerPointImageElement) + ] + + assert len(image_elements) > 0, "No image elements found in slide" + image_element = image_elements[0] + + # Verify pptx_slide was propagated to element + assert image_element.pptx_slide is not None, "pptx_slide not propagated to element" + + # Replace the image + image_element.replace_image(api_client, file=replacement_image_path) + + # Save and reload to verify the change persisted + presentation.save(api_client) + + # Reload and verify + reloaded_prs = Presentation(pptx_path) + reloaded_slide = reloaded_prs.slides[0] + + # Find the picture shape + picture_shapes = [s for s in reloaded_slide.shapes if hasattr(s, "image")] + assert len(picture_shapes) > 0, "No picture shapes found after replacement" + + # Verify the image was actually replaced by checking the image blob + # (The blue image should be different from the red one) + new_image = picture_shapes[0].image + assert new_image is not None, "Image blob not found" + assert len(new_image.blob) > 0, "Image blob is empty" + + finally: + # Cleanup + for path in [test_image_path, replacement_image_path, pptx_path]: + if os.path.exists(path): + os.unlink(path) \ No newline at end of file diff --git a/tests/test_pptx/test_pptx_roundtrip.py b/tests/test_pptx/test_pptx_roundtrip.py new file mode 100644 index 0000000..a304f5e --- /dev/null +++ b/tests/test_pptx/test_pptx_roundtrip.py @@ -0,0 +1,345 @@ +"""Test PowerPoint roundtrip functionality for markdown conversion.""" + +import pytest +import os +import tempfile +from pptx import Presentation +from pptx.util import Inches + +from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointPresentation, + PowerPointSlide, + PowerPointShapeElement, + PowerPointSpeakerNotes, +) + + +class TestPowerPointRoundtrip: + """Test roundtrip functionality for PowerPoint presentations.""" + + def create_test_presentation(self) -> str: + """Create a test presentation file and return its path.""" + prs = Presentation() + + # Set presentation title + prs.core_properties.title = "Test Presentation" + + # Add title slide + title_slide_layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(title_slide_layout) + title = slide.shapes.title + subtitle = slide.placeholders[1] + title.text = "Test Presentation" + subtitle.text = "Roundtrip Testing" + + # Add content slide with bullet points + bullet_slide_layout = prs.slide_layouts[1] + slide = prs.slides.add_slide(bullet_slide_layout) + shapes = slide.shapes + title_shape = shapes.title + body_shape = shapes.placeholders[1] + + title_shape.text = 'Bullet Points Test' + tf = body_shape.text_frame + tf.text = 'First bullet point' + + p = tf.add_paragraph() + p.text = 'Second bullet point' + p.level = 1 + + p = tf.add_paragraph() + p.text = 'Third bullet point with **bold** text' + p.level = 0 + + # Add table slide + table_slide_layout = prs.slide_layouts[5] # Title only layout + slide = prs.slides.add_slide(table_slide_layout) + shapes = slide.shapes + shapes.title.text = 'Table Test' + + # Add table + rows = cols = 2 + left = top = Inches(2.0) + width = Inches(6.0) + height = Inches(0.8) + + table = shapes.add_table(rows, cols, left, top, width, height).table + + # Fill table with data + table.cell(0, 0).text = 'Header 1' + table.cell(0, 1).text = 'Header 2' + table.cell(1, 0).text = 'Data 1' + table.cell(1, 1).text = 'Data 2' + + # Save to temporary file + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') + prs.save(temp_file.name) + temp_file.close() + + return temp_file.name + + def test_load_presentation(self): + """Test loading a PowerPoint presentation.""" + pptx_path = self.create_test_presentation() + + try: + # Load presentation + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + + assert presentation is not None + assert presentation.title == "Test Presentation" + assert len(presentation.slides) == 3 + assert presentation.file_path == pptx_path + + finally: + os.unlink(pptx_path) + + def test_read_slide_text_markdown(self): + """Test reading slide text as markdown.""" + pptx_path = self.create_test_presentation() + + try: + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + bullet_slide = presentation.slides[1] # Second slide with bullets + + # Find text shapes + text_elements = [elem for elem in bullet_slide.elements if hasattr(elem, 'has_text') and elem.has_text] + + assert len(text_elements) > 0 + + # Read text as markdown from body shape (should contain bullets) + body_element = None + for elem in text_elements: + if isinstance(elem, PowerPointShapeElement): + text = elem.read_text(as_markdown=True) + if 'bullet' in text.lower(): + body_element = elem + break + + assert body_element is not None + + finally: + os.unlink(pptx_path) + + def test_write_read_text_roundtrip(self): + """Test writing text and reading it back.""" + pptx_path = self.create_test_presentation() + + try: + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + slide = presentation.slides[1] # Bullet slide + + # Find a text element to modify + text_elements = [elem for elem in slide.elements if hasattr(elem, 'has_text') and elem.has_text] + assert len(text_elements) > 0 + + text_element = text_elements[0] + if isinstance(text_element, PowerPointShapeElement): + # Test markdown content + markdown_content = """# Test Header + +This is a paragraph. + +- First bullet +- Second bullet + - Nested bullet +- **Bold text** and *italic text*""" + + # Write content + text_element.write_text(api_client, markdown_content) + + # Read back and verify + read_content = text_element.read_text(as_markdown=True) + + # Check that basic structure is preserved + assert "Test Header" in read_content + assert "This is a paragraph" in read_content + assert "First bullet" in read_content + assert "Second bullet" in read_content + + finally: + os.unlink(pptx_path) + + def test_speaker_notes_roundtrip(self): + """Test writing and reading speaker notes.""" + pptx_path = self.create_test_presentation() + + try: + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + slide = presentation.slides[0] # First slide + + # Add speaker notes + if slide.speaker_notes: + api_client = PowerPointAPIClient() + notes_content = "These are speaker notes with **bold** text and bullet points:\n- Note 1\n- Note 2" + + slide.speaker_notes.write_text(api_client, notes_content) + + # Read back + read_notes = slide.speaker_notes.read_text(as_markdown=True) + + assert "speaker notes" in read_notes.lower() + assert "Note 1" in read_notes + assert "Note 2" in read_notes + + finally: + os.unlink(pptx_path) + + def test_presentation_copy_roundtrip(self): + """Test copying a presentation and verifying content.""" + pptx_path = self.create_test_presentation() + + try: + # Load original presentation + api_client = PowerPointAPIClient() + original_presentation = PowerPointPresentation.from_id(api_client, pptx_path) + + # Copy presentation + api_client = PowerPointAPIClient() + with tempfile.TemporaryDirectory() as temp_dir: + copied_presentation = original_presentation.copy_via_drive( + api_client, + "copied_presentation", + temp_dir + ) + + # Verify copy + assert copied_presentation is not None + assert len(copied_presentation.slides) == len(original_presentation.slides) + assert copied_presentation.file_path != original_presentation.file_path + assert os.path.exists(copied_presentation.file_path) + + # Verify content is preserved + assert copied_presentation.title == original_presentation.title + + finally: + os.unlink(pptx_path) + + def test_table_content_roundtrip(self): + """Test table content preservation in roundtrip.""" + pptx_path = self.create_test_presentation() + + try: + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + table_slide = presentation.slides[2] # Third slide with table + + # Find table elements + table_elements = [elem for elem in table_slide.elements if elem.type == "table"] + + if table_elements: + table_element = table_elements[0] + # Basic verification that table structure is accessible + assert table_element is not None + assert hasattr(table_element, 'pptx_element') + + finally: + os.unlink(pptx_path) + + def test_save_and_reload_presentation(self): + """Test saving modifications and reloading presentation.""" + pptx_path = self.create_test_presentation() + + try: + # Load presentation + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + + # Modify content + first_slide = presentation.slides[0] + text_elements = [elem for elem in first_slide.elements if hasattr(elem, 'has_text') and elem.has_text] + + if text_elements: + text_element = text_elements[0] + if isinstance(text_element, PowerPointShapeElement): + text_element.write_text(api_client, "Modified content for testing") + + # Save presentation + presentation.save(api_client) + + # Reload and verify changes + reloaded_presentation = PowerPointPresentation.from_id(api_client, pptx_path) + reloaded_slide = reloaded_presentation.slides[0] + reloaded_text_elements = [elem for elem in reloaded_slide.elements if hasattr(elem, 'has_text') and elem.has_text] + + if reloaded_text_elements: + reloaded_text = reloaded_text_elements[0] + if isinstance(reloaded_text, PowerPointShapeElement): + content = reloaded_text.read_text(as_markdown=False) + assert "Modified content for testing" in content + + finally: + os.unlink(pptx_path) + + def test_markdown_formatting_preservation(self): + """Test that markdown formatting is preserved in roundtrip.""" + pptx_path = self.create_test_presentation() + + try: + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + slide = presentation.slides[1] + + # Find text element + text_elements = [elem for elem in slide.elements if hasattr(elem, 'has_text') and elem.has_text] + + if text_elements: + text_element = text_elements[0] + if isinstance(text_element, PowerPointShapeElement): + # Test various markdown features + markdown_content = """ +**Bold text** +*Italic text* +Regular text + +- Bullet 1 +- Bullet 2 + - Sub bullet +- Bullet 3 + +Another paragraph +""" + + # Write and read back + text_element.write_text(api_client, markdown_content) + read_back = text_element.read_text(as_markdown=True) + + # Verify basic structure preservation + # Note: exact formatting may vary due to PowerPoint's text handling + assert "Bold text" in read_back or "**Bold text**" in read_back + assert "Italic text" in read_back or "*Italic text*" in read_back + assert "Bullet 1" in read_back + assert "Bullet 2" in read_back + + finally: + os.unlink(pptx_path) + + def test_empty_content_handling(self): + """Test handling of empty content in roundtrip.""" + pptx_path = self.create_test_presentation() + + try: + api_client = PowerPointAPIClient() + presentation = PowerPointPresentation.from_id(api_client, pptx_path) + slide = presentation.slides[0] + + # Test with empty content + text_elements = [elem for elem in slide.elements if hasattr(elem, 'has_text') and elem.has_text] + + if text_elements: + text_element = text_elements[0] + if isinstance(text_element, PowerPointShapeElement): + # Write empty content + text_element.write_text(api_client, "") + + # Read back + read_content = text_element.read_text() + assert read_content == "" + + finally: + os.unlink(pptx_path) \ No newline at end of file diff --git a/tests/test_pptx/test_pptx_shape_styling.py b/tests/test_pptx/test_pptx_shape_styling.py new file mode 100644 index 0000000..d875443 --- /dev/null +++ b/tests/test_pptx/test_pptx_shape_styling.py @@ -0,0 +1,1128 @@ +""" +Test suite for PPTX shape styling functionality. + +Tests shape fill copying and text frame anchor (vertical alignment) copying. +""" + +import os +import tempfile + +import pytest +from copy import deepcopy +from lxml import etree +from pptx import Presentation +from pptx.dml.color import RGBColor +from pptx.enum.dml import MSO_FILL +from pptx.util import Inches, Pt + +from gslides_api.pptx.shape_copier import ShapeCopier, _DRAWINGML_NS, _FILL_TAGS +from gslides_api.pptx.id_manager import IdManager +from gslides_api.pptx.xml_utils import XmlUtils + + +class TestShapeFillCopying: + """Test shape fill copying functionality.""" + + def test_copy_solid_fill(self): + """Test copying solid fill color from one shape to another.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source shape with solid fill + source = slide.shapes.add_shape( + 1, # Rectangle + left=Inches(1), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + source.fill.solid() + source.fill.fore_color.rgb = RGBColor(255, 0, 0) # Red + + # Create target shape without fill + target = slide.shapes.add_shape( + 1, + left=Inches(4), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + + # Copy using ShapeCopier + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_shape_fill(source, target) + + # Verify fill was copied by checking XML + target_spPr = target._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if target_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + target_spPr = target._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + assert target_spPr is not None, "target shape should have spPr element" + + solidFill = target_spPr.find(f"{{{_DRAWINGML_NS}}}solidFill") + + # solidFill should exist after copying solid fill + assert solidFill is not None, ( + "solidFill element should exist in target after copying solid fill" + ) + + # Verify fill type matches source + assert target.fill.type == MSO_FILL.SOLID, ( + f"Expected target fill type SOLID, got {target.fill.type}" + ) + + # Verify color matches source (Red: RGB 255, 0, 0) + assert target.fill.fore_color.rgb == RGBColor(255, 0, 0), ( + f"Expected RGB(255, 0, 0), got {target.fill.fore_color.rgb}" + ) + + def test_copy_no_fill(self): + """Test that noFill is properly copied.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source shape with no fill + source = slide.shapes.add_shape( + 1, + left=Inches(1), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + source.fill.background() # Set to no fill + + # Create target shape with solid fill + target = slide.shapes.add_shape( + 1, + left=Inches(4), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + target.fill.solid() + target.fill.fore_color.rgb = RGBColor(0, 255, 0) # Green + + # Copy fill (should copy noFill) + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_shape_fill(source, target) + + # Verify the target's XML has noFill element + # Note: We check XML directly because python-pptx FillFormat caches its state + # and won't reflect our direct XML modifications via target.fill.type + target_spPr = target._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if target_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + target_spPr = target._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + assert target_spPr is not None, "target shape should have spPr element" + + # Verify noFill element exists in target after copying + noFill = target_spPr.find(f"{{{_DRAWINGML_NS}}}noFill") + assert noFill is not None, ( + "noFill element should exist in target after copying background fill" + ) + + # Verify the original solidFill is gone (the green fill was replaced) + solidFill = target_spPr.find(f"{{{_DRAWINGML_NS}}}solidFill") + assert solidFill is None, ( + "solidFill element should be removed after copying noFill" + ) + + # Verify source still has its noFill + source_spPr = source._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if source_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + source_spPr = source._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + source_noFill = source_spPr.find(f"{{{_DRAWINGML_NS}}}noFill") + assert source_noFill is not None, "source should still have noFill" + + def test_copy_fill_tags_constant(self): + """Test that _FILL_TAGS contains expected fill types.""" + expected_tags = ["solidFill", "gradFill", "pattFill", "blipFill", "noFill"] + + for tag in expected_tags: + matching = [t for t in _FILL_TAGS if tag in t] + assert len(matching) == 1, f"Expected {tag} in _FILL_TAGS" + + +class TestTextFrameAnchorCopying: + """Test text frame anchor (vertical alignment) copying.""" + + def test_copy_anchor_top(self): + """Test copying top anchor alignment.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_tf.text = "Source text" + + # Set anchor to top via XML + source_bodyPr = source_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if source_bodyPr is not None: + source_bodyPr.set("anchor", "t") + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_tf.text = "Target text" + + # Copy anchor + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame_anchor(source_tf, target_tf) + + # Verify anchor was copied + target_bodyPr = target_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert target_bodyPr is not None, ( + "target text frame should have bodyPr element" + ) + anchor = target_bodyPr.get("anchor") + assert anchor == "t", f"Expected anchor 't', got '{anchor}'" + + def test_copy_anchor_center(self): + """Test copying center anchor alignment.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with center anchor + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_tf.text = "Source text" + + source_bodyPr = source_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if source_bodyPr is not None: + source_bodyPr.set("anchor", "ctr") + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_tf.text = "Target text" + + # Copy anchor + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame_anchor(source_tf, target_tf) + + # Verify anchor was copied + target_bodyPr = target_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert target_bodyPr is not None, ( + "target text frame should have bodyPr element" + ) + anchor = target_bodyPr.get("anchor") + assert anchor == "ctr", f"Expected anchor 'ctr', got '{anchor}'" + + def test_copy_anchor_bottom(self): + """Test copying bottom anchor alignment.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with bottom anchor + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_tf.text = "Source text" + + source_bodyPr = source_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if source_bodyPr is not None: + source_bodyPr.set("anchor", "b") + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_tf.text = "Target text" + + # Copy anchor + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame_anchor(source_tf, target_tf) + + # Verify anchor was copied + target_bodyPr = target_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert target_bodyPr is not None, ( + "target text frame should have bodyPr element" + ) + anchor = target_bodyPr.get("anchor") + assert anchor == "b", f"Expected anchor 'b', got '{anchor}'" + + +class TestShapeCopyIntegration: + """Integration tests for shape copying with fill and anchor.""" + + def test_copy_text_shape_preserves_anchor(self): + """Test that copying a text shape preserves its anchor setting.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with center anchor + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_tf.text = "Centered text" + + source_bodyPr = source_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if source_bodyPr is not None: + source_bodyPr.set("anchor", "ctr") + + # Copy shape + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copied_shape = copier.copy_shape(source, slide) + + # Assert copied shape exists + assert copied_shape is not None, "copy_shape should return a shape" + + # Assert it has a text_frame + assert hasattr(copied_shape, "text_frame"), ( + "Copied shape should have a text_frame" + ) + + # Assert bodyPr exists and has the correct anchor + copied_tf = copied_shape.text_frame + copied_bodyPr = copied_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert copied_bodyPr is not None, ( + "Copied shape's text_frame should have a bodyPr element" + ) + + anchor = copied_bodyPr.get("anchor") + assert anchor == "ctr", ( + f"Expected anchor 'ctr', got '{anchor}'" + ) + + def test_copy_shape_preserves_fill_and_text(self): + """Test that copying a shape preserves both fill and text.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source shape with fill and text + source = slide.shapes.add_shape( + 1, # Rectangle + left=Inches(1), + top=Inches(1), + width=Inches(3), + height=Inches(2), + ) + source.fill.solid() + source.fill.fore_color.rgb = RGBColor(100, 150, 200) + source.text_frame.text = "Text in shape" + + # Copy shape + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copied_shape = copier.copy_shape(source, slide) + + # Shape should be copied (returns something or adds to slide) + # If copy_shape returns the shape, verify it + if copied_shape is not None: + assert hasattr(copied_shape, "text_frame") + + # Verify text is preserved + assert copied_shape.text_frame.text == "Text in shape", ( + f"Expected 'Text in shape', got '{copied_shape.text_frame.text}'" + ) + + # Verify fill color is preserved + assert copied_shape.fill.type == MSO_FILL.SOLID, ( + f"Expected solid fill, got {copied_shape.fill.type}" + ) + assert copied_shape.fill.fore_color.rgb == RGBColor(100, 150, 200), ( + f"Expected RGB(100, 150, 200), got {copied_shape.fill.fore_color.rgb}" + ) + + +class TestShapeStylingPersistence: + """Test that shape styling persists after save/reload.""" + + def test_fill_persists_after_save(self): + """Test that copied fill persists after saving and reloading.""" + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp: + tmp_path = tmp.name + + try: + # Create presentation with styled shape + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + source = slide.shapes.add_shape( + 1, + left=Inches(1), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + source.fill.solid() + source.fill.fore_color.rgb = RGBColor(255, 128, 0) # Orange + + target = slide.shapes.add_shape( + 1, + left=Inches(4), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + + # Copy fill + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_shape_fill(source, target) + + # Save + prs.save(tmp_path) + + # Reload and verify + prs2 = Presentation(tmp_path) + slide2 = prs2.slides[0] + + # Find shapes (target should be second shape) + shapes = list(slide2.shapes) + assert len(shapes) >= 2 + + # Verify the target shape retained the copied solid fill and color + target_shape = shapes[1] + assert target_shape.fill.type == MSO_FILL.SOLID, ( + f"Expected solid fill, got {target_shape.fill.type}" + ) + assert target_shape.fill.fore_color.rgb == RGBColor(255, 128, 0), ( + f"Expected RGB(255, 128, 0), got {target_shape.fill.fore_color.rgb}" + ) + + finally: + os.unlink(tmp_path) + + def test_anchor_persists_after_save(self): + """Test that copied anchor persists after saving and reloading.""" + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp: + tmp_path = tmp.name + + try: + # Create presentation with text box + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_tf.text = "Centered" + + # Set center anchor + source_bodyPr = source_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if source_bodyPr is not None: + source_bodyPr.set("anchor", "ctr") + + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_tf.text = "Target" + + # Copy anchor + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame_anchor(source_tf, target_tf) + + # Save + prs.save(tmp_path) + + # Reload and verify + prs2 = Presentation(tmp_path) + slide2 = prs2.slides[0] + + # Find second textbox + textboxes = [s for s in slide2.shapes if hasattr(s, "text_frame")] + assert len(textboxes) >= 2 + + target_tf2 = textboxes[1].text_frame + target_bodyPr2 = target_tf2._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + + assert target_bodyPr2 is not None, ( + "reloaded target text frame should have bodyPr element" + ) + anchor = target_bodyPr2.get("anchor") + assert anchor == "ctr", ( + f"Expected anchor 'ctr' after reload, got '{anchor}'" + ) + + finally: + os.unlink(tmp_path) + + +class TestFontColorCopying: + """Test font color copying functionality.""" + + def test_copy_rgb_color(self): + """Test copying RGB font color from one shape to another.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with colored text + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(1) + ) + source_tf = source.text_frame + source_para = source_tf.paragraphs[0] + source_run = source_para.add_run() + source_run.text = "Red text" + source_run.font.color.rgb = RGBColor(255, 0, 0) # Red + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(1) + ) + target_tf = target.text_frame + target_para = target_tf.paragraphs[0] + target_run = target_para.add_run() + target_run.text = "Target text" + + # Copy using ShapeCopier + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame(source_tf, target_tf) + + # Verify color was copied + copied_run = target_tf.paragraphs[0].runs[0] + assert copied_run.font.color.rgb == RGBColor(255, 0, 0) + + def test_copy_font_underline(self): + """Test copying underline formatting.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with underlined text + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(1) + ) + source_tf = source.text_frame + source_para = source_tf.paragraphs[0] + source_run = source_para.add_run() + source_run.text = "Underlined text" + source_run.font.underline = True + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(1) + ) + target_tf = target.text_frame + target_para = target_tf.paragraphs[0] + target_run = target_para.add_run() + target_run.text = "Target text" + + # Copy using ShapeCopier + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame(source_tf, target_tf) + + # Verify underline was copied + copied_run = target_tf.paragraphs[0].runs[0] + assert copied_run.font.underline is True + + +class TestBodyPrInsetsCopying: + """Test bodyPr inset (margin) copying functionality.""" + + def test_copy_text_insets(self): + """Test copying text inset attributes from bodyPr.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box and set insets via XML + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_tf.text = "Source text" + + # Set insets on source bodyPr + source_bodyPr = source_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if source_bodyPr is not None: + source_bodyPr.set("lIns", "91440") # Left inset + source_bodyPr.set("rIns", "91440") # Right inset + source_bodyPr.set("tIns", "45720") # Top inset + source_bodyPr.set("bIns", "45720") # Bottom inset + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_tf.text = "Target text" + + # Copy bodyPr attributes + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame_anchor(source_tf, target_tf) + + # Verify insets were copied + target_bodyPr = target_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert target_bodyPr is not None, ( + "target text frame should have bodyPr element" + ) + assert target_bodyPr.get("lIns") == "91440", ( + f"Expected lIns '91440', got '{target_bodyPr.get('lIns')}'" + ) + assert target_bodyPr.get("rIns") == "91440", ( + f"Expected rIns '91440', got '{target_bodyPr.get('rIns')}'" + ) + assert target_bodyPr.get("tIns") == "45720", ( + f"Expected tIns '45720', got '{target_bodyPr.get('tIns')}'" + ) + assert target_bodyPr.get("bIns") == "45720", ( + f"Expected bIns '45720', got '{target_bodyPr.get('bIns')}'" + ) + + def test_copy_anchor_ctr_attribute(self): + """Test copying anchorCtr attribute.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with anchorCtr + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_tf.text = "Source text" + + source_bodyPr = source_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + if source_bodyPr is not None: + source_bodyPr.set("anchorCtr", "1") + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_tf.text = "Target text" + + # Copy bodyPr attributes + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame_anchor(source_tf, target_tf) + + # Verify anchorCtr was copied + target_bodyPr = target_tf._element.find(f"{{{_DRAWINGML_NS}}}bodyPr") + assert target_bodyPr is not None, ( + "target text frame should have bodyPr element" + ) + assert target_bodyPr.get("anchorCtr") == "1", ( + f"Expected anchorCtr '1', got '{target_bodyPr.get('anchorCtr')}'" + ) + + +class TestLineSpacingCopying: + """Test line spacing copying functionality.""" + + def test_copy_line_spacing(self): + """Test copying line_spacing property between paragraphs.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with line spacing + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_para = source_tf.paragraphs[0] + source_para.add_run().text = "Line with spacing" + source_para.line_spacing = 1.5 # 1.5x line spacing + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_para = target_tf.paragraphs[0] + target_para.add_run().text = "Target" + + # Copy using ShapeCopier + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame(source_tf, target_tf) + + # Verify line spacing was copied + copied_para = target_tf.paragraphs[0] + assert copied_para.line_spacing == 1.5 + + def test_copy_space_before(self): + """Test copying space_before property between paragraphs.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with space_before + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_para = source_tf.paragraphs[0] + source_para.add_run().text = "Paragraph with space before" + source_para.space_before = Pt(12) # 12pt space before + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_para = target_tf.paragraphs[0] + target_para.add_run().text = "Target" + + # Copy using ShapeCopier + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame(source_tf, target_tf) + + # Verify space_before was copied + copied_para = target_tf.paragraphs[0] + assert copied_para.space_before == Pt(12) + + def test_copy_space_after(self): + """Test copying space_after property between paragraphs.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source text box with space_after + source = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(3), height=Inches(2) + ) + source_tf = source.text_frame + source_para = source_tf.paragraphs[0] + source_para.add_run().text = "Paragraph with space after" + source_para.space_after = Pt(18) # 18pt space after + + # Create target text box + target = slide.shapes.add_textbox( + left=Inches(5), top=Inches(1), width=Inches(3), height=Inches(2) + ) + target_tf = target.text_frame + target_para = target_tf.paragraphs[0] + target_para.add_run().text = "Target" + + # Copy using ShapeCopier + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_text_frame(source_tf, target_tf) + + # Verify space_after was copied + copied_para = target_tf.paragraphs[0] + assert copied_para.space_after == Pt(18) + + +class TestBoldExplicitSetting: + """Test that bold is explicitly set to True or False, not inherited.""" + + def test_bold_false_explicitly_set(self): + """Test that non-bold text has font.bold explicitly set to False. + + This is critical because runs inherit from defRPr if bold is None. + If defRPr has bold=True, text will appear bold unless explicitly False. + """ + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + + # Apply non-bold markdown text + apply_markdown_to_textframe("Regular text without bold", tf) + + # Verify font.bold is explicitly False, not None + for para in tf.paragraphs: + for run in para.runs: + if run.text.strip(): + # Bold should be explicitly False, not None (inherited) + assert run.font.bold is False, ( + f"font.bold should be False, not {run.font.bold}" + ) + + def test_bold_true_explicitly_set(self): + """Test that bold text has font.bold explicitly set to True.""" + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + + # Apply bold markdown text + apply_markdown_to_textframe("**Bold text**", tf) + + # Find the bold run and verify it's explicitly True + bold_found = False + for para in tf.paragraphs: + for run in para.runs: + if "Bold" in run.text: + assert run.font.bold is True + bold_found = True + + assert bold_found, "Bold text run not found" + + def test_mixed_bold_and_regular(self): + """Test that mixed bold and regular text has correct font.bold values.""" + from gslides_api.pptx.markdown_to_pptx import apply_markdown_to_textframe + + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + textbox = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + tf = textbox.text_frame + + # Apply mixed markdown text + apply_markdown_to_textframe("Regular **bold** regular", tf) + + # Check that runs have appropriate bold values + for para in tf.paragraphs: + for run in para.runs: + if run.text.strip(): + if "bold" in run.text.lower(): + assert run.font.bold is True, f"'{run.text}' should be bold" + else: + assert run.font.bold is False, ( + f"'{run.text}' should not be bold" + ) + + +class TestImageShapeCopying: + """Test image shape copying preserves blipFill properties.""" + + def test_copy_image_preserves_src_rect(self): + """Test that copying an image shape preserves srcRect (crop settings). + + This is critical for images that are cropped - the srcRect defines + which portion of the image is displayed. + """ + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create a simple image shape with a small test image + import tempfile + import os + from PIL import Image + + # Create a simple test image + img = Image.new('RGB', (100, 100), color='purple') + with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp: + img.save(tmp.name) + tmp_path = tmp.name + + try: + # Add image to slide + picture = slide.shapes.add_picture( + tmp_path, + left=Inches(1), + top=Inches(1), + width=Inches(2), + height=Inches(2) + ) + + # Manually add srcRect to simulate a cropped image + # blipFill is in PresentationML namespace (p:), not DrawingML (a:) + _PRESENTATIONML_NS = "http://schemas.openxmlformats.org/presentationml/2006/main" + blip_fill = picture._element.find( + f".//{{{_PRESENTATIONML_NS}}}blipFill" + ) + assert blip_fill is not None, "blipFill not found in source picture" + + # srcRect is in DrawingML namespace (a:) and should be child of blipFill + src_rect = etree.SubElement( + blip_fill, f"{{{_DRAWINGML_NS}}}srcRect" + ) + src_rect.set("l", "10000") # 10% from left + src_rect.set("t", "5000") # 5% from top + src_rect.set("r", "15000") # 15% from right + src_rect.set("b", "20000") # 20% from bottom + + # Now copy the shape using ShapeCopier + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + + # Create a second slide to copy to + target_slide = prs.slides.add_slide(blank_layout) + + # Copy the shape - but we need relationship mapping + # For this test, create a simple mapping + relationship_mapping = {} + + # Get source relationship ID and copy the image relationship + source_blip = picture._element.find( + f".//{{{_DRAWINGML_NS}}}blip" + ) + if source_blip is not None: + r_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + old_rel_id = source_blip.get(f"{{{r_ns}}}embed") + if old_rel_id: + # Copy the image relationship to target slide + # Get the relationship and its image part + relationship = slide.part.rels[old_rel_id] + image_part = relationship._target + # Use get_or_add_image_part to add image to target + image_data = image_part.blob + import io + image_stream = io.BytesIO(image_data) + _, new_rel_id = target_slide.part.get_or_add_image_part(image_stream) + relationship_mapping[old_rel_id] = new_rel_id + + # Copy the image shape + copier._copy_image_shape( + picture, + target_slide, + relationship_mapping=relationship_mapping + ) + + # Verify the copied shape has srcRect preserved + # Find the picture shape on target slide + target_pics = [ + s for s in target_slide.shapes._spTree.iterchildren() + if s.tag.endswith('}pic') + ] + + assert len(target_pics) > 0, "No picture shape found on target slide" + + target_pic = target_pics[0] + target_src_rect = target_pic.find( + f".//{{{_DRAWINGML_NS}}}srcRect" + ) + + assert target_src_rect is not None, "srcRect not found on copied image" + assert target_src_rect.get("l") == "10000", "srcRect 'l' not preserved" + assert target_src_rect.get("t") == "5000", "srcRect 't' not preserved" + assert target_src_rect.get("r") == "15000", "srcRect 'r' not preserved" + assert target_src_rect.get("b") == "20000", "srcRect 'b' not preserved" + + finally: + os.unlink(tmp_path) + + +class TestBlipFillRelationshipRemapping: + """Test blipFill relationship remapping during shape fill copying.""" + + def test_copy_shape_fill_with_relationship_mapping(self): + """Test that blipFill elements have their relationship IDs remapped. + + When copying a shape fill that contains a blipFill (image fill), + the r:embed attribute must be remapped to the new relationship ID. + """ + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source shape + source = slide.shapes.add_shape( + 1, # Rectangle + left=Inches(1), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + + # Create target shape + target = slide.shapes.add_shape( + 1, + left=Inches(4), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + + # Manually add a blipFill element to source shape with a fake relationship ID + source_spPr = source._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if source_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + source_spPr = source._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + if source_spPr is not None: + # Create a blipFill element with a relationship reference + blip_fill = etree.SubElement(source_spPr, f"{{{_DRAWINGML_NS}}}blipFill") + r_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + blip = etree.SubElement(blip_fill, f"{{{_DRAWINGML_NS}}}blip") + blip.set(f"{{{r_ns}}}embed", "rId_OLD") + + # Create relationship mapping + relationship_mapping = {"rId_OLD": "rId_NEW"} + + # Copy fill with relationship mapping + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_shape_fill(source, target, relationship_mapping) + + # Verify blipFill was copied with remapped relationship ID + target_spPr = target._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if target_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + target_spPr = target._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + if target_spPr is not None: + target_blip_fill = target_spPr.find(f"{{{_DRAWINGML_NS}}}blipFill") + assert target_blip_fill is not None, "blipFill should have been copied" + + target_blip = target_blip_fill.find(f"{{{_DRAWINGML_NS}}}blip") + assert target_blip is not None, "blip element should exist" + + r_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + embed_id = target_blip.get(f"{{{r_ns}}}embed") + assert embed_id == "rId_NEW", ( + f"r:embed should be remapped from rId_OLD to rId_NEW, got {embed_id}" + ) + + def test_copy_shape_fill_without_relationship_mapping(self): + """Test that blipFill copying works without relationship mapping. + + When no relationship mapping is provided, the fill should still be copied + (though relationship IDs won't be remapped). + """ + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source shape + source = slide.shapes.add_shape( + 1, + left=Inches(1), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + + # Create target shape + target = slide.shapes.add_shape( + 1, + left=Inches(4), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + + # Manually add a blipFill element to source shape + source_spPr = source._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if source_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + source_spPr = source._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + if source_spPr is not None: + blip_fill = etree.SubElement(source_spPr, f"{{{_DRAWINGML_NS}}}blipFill") + r_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + blip = etree.SubElement(blip_fill, f"{{{_DRAWINGML_NS}}}blip") + blip.set(f"{{{r_ns}}}embed", "rId_ORIGINAL") + + # Copy fill without relationship mapping + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_shape_fill(source, target, relationship_mapping=None) + + # Verify blipFill was copied (with original relationship ID) + target_spPr = target._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if target_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + target_spPr = target._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + if target_spPr is not None: + target_blip_fill = target_spPr.find(f"{{{_DRAWINGML_NS}}}blipFill") + assert target_blip_fill is not None, "blipFill should have been copied" + + target_blip = target_blip_fill.find(f"{{{_DRAWINGML_NS}}}blip") + assert target_blip is not None, "blip element should exist" + + r_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + embed_id = target_blip.get(f"{{{r_ns}}}embed") + assert embed_id == "rId_ORIGINAL", ( + f"r:embed should remain unchanged without mapping, got {embed_id}" + ) + + def test_copy_shape_properties_passes_relationship_mapping(self): + """Test that _copy_shape_properties passes relationship_mapping to _copy_shape_fill.""" + prs = Presentation() + blank_layout = prs.slide_layouts[6] + slide = prs.slides.add_slide(blank_layout) + + # Create source and target shapes + source = slide.shapes.add_shape( + 1, + left=Inches(1), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + target = slide.shapes.add_shape( + 1, + left=Inches(4), + top=Inches(1), + width=Inches(2), + height=Inches(1), + ) + + # Add blipFill to source + source_spPr = source._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if source_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + source_spPr = source._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + if source_spPr is not None: + blip_fill = etree.SubElement(source_spPr, f"{{{_DRAWINGML_NS}}}blipFill") + r_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + blip = etree.SubElement(blip_fill, f"{{{_DRAWINGML_NS}}}blip") + blip.set(f"{{{r_ns}}}embed", "rId_SOURCE") + + # Call _copy_shape_properties with relationship mapping + relationship_mapping = {"rId_SOURCE": "rId_TARGET"} + id_manager = IdManager(prs) + copier = ShapeCopier(id_manager) + copier._copy_shape_properties(source, target, relationship_mapping) + + # Verify the relationship was remapped + target_spPr = target._element.find(f".//{{{_DRAWINGML_NS}}}spPr") + if target_spPr is None: + from gslides_api.pptx.shape_copier import _PRESENTATIONML_NS + target_spPr = target._element.find(f".//{{{_PRESENTATIONML_NS}}}spPr") + + if target_spPr is not None: + target_blip_fill = target_spPr.find(f"{{{_DRAWINGML_NS}}}blipFill") + if target_blip_fill is not None: + target_blip = target_blip_fill.find(f"{{{_DRAWINGML_NS}}}blip") + if target_blip is not None: + r_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + embed_id = target_blip.get(f"{{{r_ns}}}embed") + assert embed_id == "rId_TARGET", ( + f"Relationship should be remapped via _copy_shape_properties, got {embed_id}" + ) diff --git a/tests/test_pptx/test_pptx_slide_copying.py b/tests/test_pptx/test_pptx_slide_copying.py new file mode 100644 index 0000000..5d1bedd --- /dev/null +++ b/tests/test_pptx/test_pptx_slide_copying.py @@ -0,0 +1,1390 @@ +""" +Comprehensive tests for robust PowerPoint slide copying functionality. + +Tests the new pptx subpackage to ensure no XML corruption, proper ID management, +and successful copying of various slide types. +""" + +import os +import tempfile +from pathlib import Path + +import pytest +from pptx import Presentation +from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE, MSO_SHAPE_TYPE +from pptx.util import Inches + +from gslides_api.pptx.id_manager import IdManager +from gslides_api.pptx.relationship_copier import RelationshipCopier +from gslides_api.pptx.shape_copier import ShapeCopier +from gslides_api.pptx.slide_copier import SlideCopierManager, _remove_layout_placeholders +from gslides_api.pptx.xml_utils import XmlUtils +from gslides_api.adapters.pptx_adapter import PowerPointAPIClient, PowerPointPresentation + + +class TestIdManager: + """Test the ID manager for unique ID generation.""" + + def test_id_manager_initialization(self): + """Test basic ID manager initialization.""" + prs = Presentation() + id_manager = IdManager(prs) + + assert id_manager.next_slide_id >= 256 + assert id_manager.next_shape_id >= 1 + assert len(id_manager.used_slide_ids) == 0 + assert len(id_manager.used_shape_ids) == 0 + + def test_unique_slide_id_generation(self): + """Test generation of unique slide IDs.""" + prs = Presentation() + id_manager = IdManager(prs) + + id1 = id_manager.generate_unique_slide_id() + id2 = id_manager.generate_unique_slide_id() + + assert id1 != id2 + assert id1 in id_manager.used_slide_ids + assert id2 in id_manager.used_slide_ids + + def test_unique_shape_id_generation(self): + """Test generation of unique shape IDs.""" + prs = Presentation() + id_manager = IdManager(prs) + + id1 = id_manager.generate_unique_shape_id() + id2 = id_manager.generate_unique_shape_id() + + assert id1 != id2 + assert id1 in id_manager.used_shape_ids + assert id2 in id_manager.used_shape_ids + + def test_unique_creation_id_generation(self): + """Test generation of unique creation IDs.""" + prs = Presentation() + id_manager = IdManager(prs) + + id1 = id_manager.generate_unique_creation_id() + id2 = id_manager.generate_unique_creation_id() + + assert id1 != id2 + assert len(id1) == 36 # GUID length + assert len(id2) == 36 # GUID length + assert id1 in id_manager.used_creation_ids + assert id2 in id_manager.used_creation_ids + + +class TestXmlUtils: + """Test XML utilities for safe manipulation.""" + + def test_safe_copy_element(self): + """Test safe copying of XML elements.""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + # Add a shape to get an XML element + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text = "Test text" + + source_element = textbox._element + copied_element = XmlUtils.safe_copy_element(source_element, new_id=999) + + assert copied_element is not None + assert copied_element.tag == source_element.tag + + # Check for ID in the cNvPr element (where PowerPoint shape IDs are actually stored) + cnv_pr_elements = copied_element.xpath('.//p:cNvPr', namespaces={'p': 'http://schemas.openxmlformats.org/presentationml/2006/main'}) + if cnv_pr_elements: + assert cnv_pr_elements[0].get('id') == '999' + else: + # If no cNvPr element, that's also acceptable for this test + pass + + def test_update_element_id(self): + """Test updating element IDs.""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + element = textbox._element + + success = XmlUtils.update_element_id(element, 12345) + assert success + assert element.get('id') == '12345' + + def test_validate_element(self): + """Test XML element validation.""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + element = textbox._element + + validation = XmlUtils.validate_element(element) + assert validation['valid'] + assert isinstance(validation['issues'], list) + assert isinstance(validation['warnings'], list) + + +class TestShapeCopier: + """Test shape copying functionality.""" + + def test_copy_text_shape(self): + """Test copying a text box shape.""" + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + target_slide = prs.slides.add_slide(layout) + + # Count initial shapes (layouts may have placeholders) + initial_target_shapes = len(target_slide.shapes) + + # Add text box to source slide + textbox = source_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text = "Test text content" + + # Copy the shape + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + + copied_shape = shape_copier.copy_shape(textbox, target_slide) + + assert copied_shape is not None + assert copied_shape.text == "Test text content" + # Check that one new shape was added + assert len(target_slide.shapes) == initial_target_shapes + 1 + + def test_copy_table_shape(self): + """Test copying a table shape.""" + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + target_slide = prs.slides.add_slide(layout) + + # Add table to source slide + table_shape = source_slide.shapes.add_table(2, 3, Inches(1), Inches(1), Inches(5), Inches(3)) + table = table_shape.table + table.cell(0, 0).text = "Header 1" + table.cell(0, 1).text = "Header 2" + table.cell(1, 0).text = "Data 1" + table.cell(1, 1).text = "Data 2" + + # Copy the shape + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + + copied_shape = shape_copier.copy_shape(table_shape, target_slide) + + assert copied_shape is not None + assert hasattr(copied_shape, 'table') + copied_table = copied_shape.table + assert copied_table.cell(0, 0).text == "Header 1" + assert copied_table.cell(1, 0).text == "Data 1" + + def test_copy_table_preserves_cell_borders(self): + """Test that table cell borders are preserved when copying.""" + from copy import deepcopy + + from lxml import etree + + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + target_slide = prs.slides.add_slide(layout) + + # Add table to source slide + table_shape = source_slide.shapes.add_table(2, 2, Inches(1), Inches(1), Inches(4), Inches(2)) + table = table_shape.table + table.cell(0, 0).text = "A" + table.cell(0, 1).text = "B" + + # Add border to first cell via XML + ns = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"} + source_cell = table.cell(0, 0) + source_tc = source_cell._tc + source_tcPr = source_tc.find(f"{{{ns['a']}}}tcPr") + if source_tcPr is None: + source_tcPr = etree.SubElement(source_tc, f"{{{ns['a']}}}tcPr") + + # Add left border (lnL) with specific properties + lnL = etree.SubElement(source_tcPr, f"{{{ns['a']}}}lnL", w="12700", cap="flat", cmpd="sng") + solidFill = etree.SubElement(lnL, f"{{{ns['a']}}}solidFill") + srgbClr = etree.SubElement(solidFill, f"{{{ns['a']}}}srgbClr", val="0000FF") # Blue + + # Copy the shape + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + copied_shape = shape_copier.copy_shape(table_shape, target_slide) + + assert copied_shape is not None + copied_table = copied_shape.table + + # Verify border was copied + copied_cell = copied_table.cell(0, 0) + copied_tc = copied_cell._tc + copied_tcPr = copied_tc.find(f"{{{ns['a']}}}tcPr") + assert copied_tcPr is not None, "tcPr should exist in copied cell" + + copied_lnL = copied_tcPr.find(f"{{{ns['a']}}}lnL") + assert copied_lnL is not None, "Left border (lnL) should be copied" + assert copied_lnL.get("w") == "12700", "Border width should be preserved" + + # Verify border color + copied_solidFill = copied_lnL.find(f"{{{ns['a']}}}solidFill") + assert copied_solidFill is not None, "Border fill should be copied" + copied_srgbClr = copied_solidFill.find(f"{{{ns['a']}}}srgbClr") + assert copied_srgbClr is not None, "Border color should be copied" + assert copied_srgbClr.get("val") == "0000FF", "Border color should be blue" + + def test_copy_table_preserves_cell_margins(self): + """Test that table cell margins are preserved when copying.""" + from lxml import etree + + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + target_slide = prs.slides.add_slide(layout) + + # Add table to source slide + table_shape = source_slide.shapes.add_table(2, 2, Inches(1), Inches(1), Inches(4), Inches(2)) + table = table_shape.table + table.cell(0, 0).text = "A" + + # Set cell margins via XML + ns = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"} + source_cell = table.cell(0, 0) + source_tc = source_cell._tc + source_tcPr = source_tc.find(f"{{{ns['a']}}}tcPr") + if source_tcPr is None: + source_tcPr = etree.SubElement(source_tc, f"{{{ns['a']}}}tcPr") + + # Set custom margins + source_tcPr.set("marL", "91440") # Left margin + source_tcPr.set("marR", "91440") # Right margin + source_tcPr.set("marT", "45720") # Top margin + source_tcPr.set("marB", "45720") # Bottom margin + source_tcPr.set("anchor", "ctr") # Center vertical alignment + + # Copy the shape + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + copied_shape = shape_copier.copy_shape(table_shape, target_slide) + + assert copied_shape is not None + copied_table = copied_shape.table + + # Verify margins were copied + copied_cell = copied_table.cell(0, 0) + copied_tc = copied_cell._tc + copied_tcPr = copied_tc.find(f"{{{ns['a']}}}tcPr") + assert copied_tcPr is not None, "tcPr should exist in copied cell" + + assert copied_tcPr.get("marL") == "91440", "Left margin should be preserved" + assert copied_tcPr.get("marR") == "91440", "Right margin should be preserved" + assert copied_tcPr.get("marT") == "45720", "Top margin should be preserved" + assert copied_tcPr.get("marB") == "45720", "Bottom margin should be preserved" + assert copied_tcPr.get("anchor") == "ctr", "Anchor should be preserved" + + def test_copy_table_preserves_style_properties(self): + """Test that table-level style properties are preserved when copying.""" + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + target_slide = prs.slides.add_slide(layout) + + # Add table to source slide + table_shape = source_slide.shapes.add_table(3, 3, Inches(1), Inches(1), Inches(5), Inches(3)) + table = table_shape.table + table.cell(0, 0).text = "Header" + + # Set table style properties + table.first_row = False # Disable first row styling + table.first_col = False # Disable first column styling + table.horz_banding = False # Disable horizontal banding + table.vert_banding = False # Disable vertical banding + + # Copy the shape + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + copied_shape = shape_copier.copy_shape(table_shape, target_slide) + + assert copied_shape is not None + copied_table = copied_shape.table + + # Verify style properties were copied + assert copied_table.first_row == False, "first_row should be preserved as False" + assert copied_table.first_col == False, "first_col should be preserved as False" + assert copied_table.horz_banding == False, "horz_banding should be preserved as False" + assert copied_table.vert_banding == False, "vert_banding should be preserved as False" + + def test_copy_text_shape_preserves_word_wrap(self): + """Test that word_wrap is preserved when copying text shapes.""" + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + target_slide = prs.slides.add_slide(layout) + + # Add text box with word_wrap explicitly set + textbox = source_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text = "Test text that should wrap within the text box" + textbox.text_frame.word_wrap = True # Explicitly enable word wrap + + # Copy the shape + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + copied_shape = shape_copier.copy_shape(textbox, target_slide) + + assert copied_shape is not None + assert copied_shape.text_frame.word_wrap is True, "word_wrap was not preserved during copy" + + def test_copy_text_shape_preserves_margins(self): + """Test that margins are preserved when copying text shapes.""" + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + target_slide = prs.slides.add_slide(layout) + + # Add text box with margins explicitly set + textbox = source_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text = "Test text with custom margins" + textbox.text_frame.margin_top = Inches(0.1) + textbox.text_frame.margin_bottom = Inches(0.1) + textbox.text_frame.margin_left = Inches(0.2) + textbox.text_frame.margin_right = Inches(0.2) + + # Copy the shape + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + copied_shape = shape_copier.copy_shape(textbox, target_slide) + + assert copied_shape is not None + assert copied_shape.text_frame.margin_top == Inches(0.1), "margin_top was not preserved during copy" + assert copied_shape.text_frame.margin_bottom == Inches(0.1), "margin_bottom was not preserved during copy" + assert copied_shape.text_frame.margin_left == Inches(0.2), "margin_left was not preserved during copy" + assert copied_shape.text_frame.margin_right == Inches(0.2), "margin_right was not preserved during copy" + + +class TestSlideCopierManager: + """Test the main slide copier orchestration.""" + + def test_slide_copier_initialization(self): + """Test slide copier manager initialization.""" + prs = Presentation() + copier = SlideCopierManager(prs) + + assert copier.target_presentation == prs + assert isinstance(copier.id_manager, IdManager) + assert isinstance(copier.relationship_copier, RelationshipCopier) + assert isinstance(copier.shape_copier, ShapeCopier) + + def test_copy_simple_slide(self): + """Test copying a simple slide with text.""" + source_prs = Presentation() + target_prs = Presentation() + + # Create source slide with content + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + textbox = source_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text = "Source slide content" + + # Copy the slide + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + assert copied_slide is not None + assert len(target_prs.slides) == 1 + + # Verify content was copied + copied_shapes = list(copied_slide.shapes) + text_shapes = [s for s in copied_shapes if hasattr(s, 'text') and s.text] + assert len(text_shapes) > 0 + + def test_copy_slide_with_notes(self): + """Test copying a slide with speaker notes.""" + source_prs = Presentation() + target_prs = Presentation() + + # Create source slide with notes + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Add notes + if source_slide.has_notes_slide: + source_slide.notes_slide.notes_text_frame.text = "These are speaker notes" + + # Copy the slide + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + assert copied_slide is not None + + # Check if notes were copied + if copied_slide.has_notes_slide and copied_slide.notes_slide.notes_text_frame: + # Notes copying may not always work due to complexity + pass # This is acceptable + + def test_copy_slide_safe_fallback(self): + """Test safe copying with fallback strategies.""" + source_prs = Presentation() + target_prs = Presentation() + + # Create source slide + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Copy using safe method + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide_safe(source_slide, fallback_to_layout_only=True) + + # Should succeed even if content copying fails + assert copied_slide is not None + assert len(target_prs.slides) == 1 + + def test_batch_copy_slides(self): + """Test batch copying of multiple slides.""" + source_prs = Presentation() + target_prs = Presentation() + + # Create multiple source slides + layout = source_prs.slide_layouts[0] + source_slides = [] + for i in range(3): + slide = source_prs.slides.add_slide(layout) + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text = f"Slide {i + 1} content" + source_slides.append(slide) + + # Batch copy + copier = SlideCopierManager(target_prs) + results = copier.batch_copy_slides(source_slides) + + # Should have copied all slides + assert len(results) == 3 + assert len(target_prs.slides) == 3 + + # At least some should be successful + successful_copies = [r for r in results if r is not None] + assert len(successful_copies) > 0 + + def test_copy_statistics(self): + """Test getting copy operation statistics.""" + source_prs = Presentation() + target_prs = Presentation() + + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + copier = SlideCopierManager(target_prs) + copier.copy_slide(source_slide) + + stats = copier.get_copy_statistics() + + assert 'total_operations' in stats + assert 'successful_operations' in stats + assert 'failed_operations' in stats + assert stats['total_operations'] >= 1 + + +class TestPowerPointPresentationIntegration: + """Test integration with PowerPointPresentation class.""" + + def test_insert_copy_integration(self): + """Test the updated insert_copy method.""" + # Create source presentation + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide_pptx = source_prs.slides.add_slide(layout) + textbox = source_slide_pptx.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + textbox.text = "Integration test content" + + # Save source to temp file + with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as temp_file: + source_prs.save(temp_file.name) + source_path = temp_file.name + + try: + # Load presentations + api_client = PowerPointAPIClient() + source_presentation = PowerPointPresentation.from_id(api_client, source_path) + + # Create target presentation + target_prs = Presentation() + with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as temp_file: + target_prs.save(temp_file.name) + target_path = temp_file.name + + try: + target_presentation = PowerPointPresentation.from_id(api_client, target_path) + + # Copy slide using the new robust method + source_slide = source_presentation.slides[0] + copied_slide = target_presentation.insert_copy(source_slide, api_client) + + assert copied_slide is not None + assert len(target_presentation.slides) >= 1 + + # Save to verify no corruption + target_presentation.save(api_client) + + finally: + if os.path.exists(target_path): + os.unlink(target_path) + + finally: + if os.path.exists(source_path): + os.unlink(source_path) + + def test_insert_copy_fallback(self): + """Test the fallback method when robust copying fails.""" + # This test verifies that even if the robust copying fails, + # the fallback still creates a usable slide + + # Create minimal presentation + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide_pptx = source_prs.slides.add_slide(layout) + + with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as temp_file: + source_prs.save(temp_file.name) + source_path = temp_file.name + + try: + api_client = PowerPointAPIClient() + source_presentation = PowerPointPresentation.from_id(api_client, source_path) + + target_prs = Presentation() + target_presentation = PowerPointPresentation(target_prs) + + # This should work even if content copying fails + source_slide = source_presentation.slides[0] + copied_slide = target_presentation.insert_copy(source_slide, api_client) + + assert copied_slide is not None + + finally: + if os.path.exists(source_path): + os.unlink(source_path) + + +class TestRobustnessAndErrorHandling: + """Test robustness and error handling scenarios.""" + + def test_handle_corrupted_source_slide(self): + """Test handling of potentially corrupted source slides.""" + source_prs = Presentation() + target_prs = Presentation() + + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # The copier should handle edge cases gracefully + copier = SlideCopierManager(target_prs) + + # This should not crash even with minimal slide content + copied_slide = copier.copy_slide_safe(source_slide) + assert copied_slide is not None + + def test_context_manager_cleanup(self): + """Test proper cleanup using context manager.""" + source_prs = Presentation() + target_prs = Presentation() + + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Use context manager + with SlideCopierManager(target_prs) as copier: + copied_slide = copier.copy_slide(source_slide) + assert copied_slide is not None + + # Cleanup should have been called automatically + + def test_large_number_of_shapes(self): + """Test copying slides with many shapes.""" + source_prs = Presentation() + target_prs = Presentation() + + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Add multiple shapes + for i in range(10): + textbox = source_slide.shapes.add_textbox( + Inches(0.5 + i * 0.5), + Inches(0.5 + i * 0.2), + Inches(2), + Inches(0.5) + ) + textbox.text = f"Shape {i}" + + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + # Should handle multiple shapes without issues + assert copied_slide is not None + stats = copier.get_copy_statistics() + assert stats['total_shapes_copied'] > 0 + + +class TestShapeAltTextCopying: + """Test that alt text is preserved when copying shapes.""" + + def test_alt_text_copied_with_shape(self): + """Test that alt text title attribute is copied during slide copying.""" + source_prs = Presentation() + source_layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(source_layout) + + # Add a textbox with alt text + textbox = source_slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + textbox.text = "Test content" + + # Set alt text in XML (the way it's actually stored and read) + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + cnvpr.attrib["title"] = "TestAltText" + + # Copy slide using SlideCopierManager + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide_safe(source_slide, insertion_index=0) + + # Verify alt text was copied + # Find the copied textbox (skip layout placeholders) + copied_textbox = None + for shape in copied_slide.shapes: + if shape.shape_type == MSO_SHAPE_TYPE.TEXT_BOX: + cnvpr = shape._element.xpath(".//p:cNvPr")[0] + if cnvpr.attrib.get("title") == "TestAltText": + copied_textbox = shape + break + + assert copied_textbox is not None, "Copied textbox with alt text not found" + + # Verify the alt text value + copied_cnvpr = copied_textbox._element.xpath(".//p:cNvPr")[0] + copied_title = copied_cnvpr.attrib.get("title") + assert copied_title == "TestAltText", f"Expected alt text 'TestAltText', got '{copied_title}'" + + def test_alt_text_copied_for_multiple_shapes(self): + """Test that alt text is copied for all shapes in a slide.""" + source_prs = Presentation() + source_layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(source_layout) + + # Add multiple textboxes with different alt text + alt_texts = ["Title", "CustomerName", "Content"] + for i, alt_text in enumerate(alt_texts): + textbox = source_slide.shapes.add_textbox( + Inches(1), Inches(1 + i), Inches(5), Inches(1) + ) + textbox.text = f"Shape {i}" + + # Set alt text + cnvpr = textbox._element.xpath(".//p:cNvPr")[0] + cnvpr.attrib["title"] = alt_text + + # Copy slide + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide_safe(source_slide, insertion_index=0) + + # Verify all alt texts were copied + # Find copied textboxes (skip layout placeholders) + copied_textboxes = [ + shape for shape in copied_slide.shapes + if shape.shape_type == MSO_SHAPE_TYPE.TEXT_BOX + ] + + # Match each expected alt text with a copied shape + for expected_alt_text in alt_texts: + found = False + for shape in copied_textboxes: + cnvpr = shape._element.xpath(".//p:cNvPr")[0] + if cnvpr.attrib.get("title") == expected_alt_text: + found = True + break + assert found, f"Expected alt text '{expected_alt_text}' not found in any copied shape" + + +class TestRelationshipRemapping: + """Test that r:id references are properly remapped when copying shapes with images.""" + + def test_remap_element_relationships_function(self): + """Test the XmlUtils.remap_element_relationships function directly.""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + # Add a textbox to get an XML element + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + element = textbox._element + + # Create a mapping + mapping = {"rId1": "rId100", "rId2": "rId200"} + + # The function should run without error even with no matching refs + result = XmlUtils.remap_element_relationships(element, mapping) + assert result >= 0 # Returns count of remapped refs + + def test_remap_element_relationships_with_empty_mapping(self): + """Test that empty mapping returns 0 remapped.""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(5), Inches(1)) + element = textbox._element + + result = XmlUtils.remap_element_relationships(element, {}) + assert result == 0 + + def test_remap_element_relationships_with_none_element(self): + """Test that None element returns 0 remapped.""" + mapping = {"rId1": "rId100"} + result = XmlUtils.remap_element_relationships(None, mapping) + assert result == 0 + + def test_image_relationship_remapped_after_copy(self): + """Test that image r:embed references are remapped to new relationship IDs.""" + import io + + from PIL import Image + + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Create a test image + img = Image.new('RGB', (100, 100), color='red') + img_bytes = io.BytesIO() + img.save(img_bytes, format='PNG') + img_bytes.seek(0) + + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + f.write(img_bytes.getvalue()) + img_path = f.name + + try: + # Add image to source slide + picture = source_slide.shapes.add_picture(img_path, Inches(1), Inches(1)) + + # Copy to new presentation + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + assert copied_slide is not None + + # Find copied picture and verify r:embed exists + found_picture = False + for shape in copied_slide.shapes: + if shape.shape_type == MSO_SHAPE_TYPE.PICTURE: + found_picture = True + # Use the NAMESPACES dict from XmlUtils for xpath + a_ns = 'http://schemas.openxmlformats.org/drawingml/2006/main' + copied_blip = shape._element.findall(f'.//{{{a_ns}}}blip') + if copied_blip: + r_ns = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' + new_rid = copied_blip[0].get(f'{{{r_ns}}}embed') + # r:embed should exist (not be None or cleared) + assert new_rid is not None, "r:embed was cleared instead of remapped" + # r:id should point to valid relationship + rel = copied_slide.part.rels.get(new_rid) + assert rel is not None, f"r:embed {new_rid} doesn't exist in slide relationships" + break + + assert found_picture, "No picture shape found in copied slide" + + finally: + os.unlink(img_path) + + def test_copied_image_can_be_saved_and_reopened(self): + """Test that copied images can be saved and reopened without corruption.""" + import io + + from PIL import Image + + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Create a test image + img = Image.new('RGB', (100, 100), color='blue') + img_bytes = io.BytesIO() + img.save(img_bytes, format='PNG') + img_bytes.seek(0) + + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + f.write(img_bytes.getvalue()) + img_path = f.name + + try: + # Add image to source slide + source_slide.shapes.add_picture(img_path, Inches(1), Inches(1)) + + # Copy to new presentation + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copier.copy_slide(source_slide) + + # Save and reopen to verify no corruption + with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f: + output_path = f.name + + target_prs.save(output_path) + + try: + # Reopen to verify file isn't corrupted + reopened = Presentation(output_path) + assert len(reopened.slides) == 1 + + # Verify image shape exists + found_picture = False + for shape in reopened.slides[0].shapes: + if shape.shape_type == MSO_SHAPE_TYPE.PICTURE: + found_picture = True + # Verify image data is accessible + assert hasattr(shape, 'image') + assert shape.image.blob is not None + break + + assert found_picture, "Picture not found after save/reopen" + + finally: + os.unlink(output_path) + + finally: + os.unlink(img_path) + + +class TestGroupShapeCopying: + """Test copying of GROUP shapes including nested groups.""" + + def test_group_shape_is_copied_via_xml(self): + """Test that GROUP shapes are handled by _copy_group_shape.""" + # Create a simple presentation and manually test the copy_shape dispatch + prs = Presentation() + layout = prs.slide_layouts[0] + source_slide = prs.slides.add_slide(layout) + + # Add some shapes to source + textbox = source_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)) + textbox.text = "Test" + + # Copy slide to test GROUP handling works + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + assert copied_slide is not None + + def test_group_shape_handler_exists(self): + """Test that GROUP shape type triggers _copy_group_shape method.""" + prs = Presentation() + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + + # Verify the method exists + assert hasattr(shape_copier, '_copy_group_shape') + + def test_nested_shape_ids_are_regenerated(self): + """Test that nested shape IDs inside groups are regenerated.""" + prs = Presentation() + id_manager = IdManager(prs) + + # Generate multiple IDs to verify uniqueness + ids = [id_manager.generate_unique_shape_id() for _ in range(10)] + + # All IDs should be unique + assert len(set(ids)) == 10 + + +class TestFreeformShapeCopying: + """Test copying of FREEFORM shapes (custom geometry).""" + + def test_freeform_shape_handler_exists(self): + """Test that FREEFORM shape type triggers _copy_freeform_shape method.""" + prs = Presentation() + id_manager = IdManager(prs) + shape_copier = ShapeCopier(id_manager) + + # Verify the method exists + assert hasattr(shape_copier, '_copy_freeform_shape') + + +class TestSampleadTemplateRoundtrip: + """End-to-end test using actual PPTX files with complex shapes.""" + + @pytest.fixture + def samplead_template_path(self): + """Get path to Samplead template if it exists.""" + # Check relative to the test file + possible_paths = [ + "playground/samplead/Samplead Master Deck Template.pptx", + "../playground/samplead/Samplead Master Deck Template.pptx", + "../../playground/samplead/Samplead Master Deck Template.pptx", + ] + for path in possible_paths: + if os.path.exists(path): + return path + return None + + def test_slide_with_complex_shapes_preserves_shape_count(self): + """Test that copying a slide preserves the shape count.""" + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Add various shapes + source_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)) + source_slide.shapes.add_shape(1, Inches(3), Inches(1), Inches(2), Inches(2)) # RECTANGLE + source_slide.shapes.add_table(2, 2, Inches(1), Inches(4), Inches(4), Inches(2)) + + source_shape_count = len(source_slide.shapes) + + # Copy to new presentation + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + assert copied_slide is not None + + # Shape count may differ slightly due to layout placeholders + # but should be in similar range + target_shape_count = len(copied_slide.shapes) + assert target_shape_count >= source_shape_count - 2 # Allow some variation + + def test_save_and_reopen_complex_slide(self): + """Test that complex slides can be saved and reopened.""" + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Add multiple shapes + source_slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)) + source_slide.shapes.add_shape(1, Inches(3), Inches(1), Inches(2), Inches(2)) + + # Copy to new presentation + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copier.copy_slide(source_slide) + + # Save and reopen + with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f: + output_path = f.name + + try: + target_prs.save(output_path) + + # Reopen to verify no corruption + reopened = Presentation(output_path) + assert len(reopened.slides) == 1 + + finally: + os.unlink(output_path) + + +class TestCopyShapeElementWithRelationshipMapping: + """Test that copy_shape_element properly handles relationship_mapping parameter.""" + + def test_copy_shape_element_with_no_mapping(self): + """Test copy_shape_element works with no relationship mapping (legacy behavior).""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)) + textbox.text = "Test" + + id_manager = IdManager(prs) + new_shape_id = id_manager.generate_unique_shape_id() + new_creation_id = id_manager.generate_unique_creation_id() + + # Copy without relationship mapping + new_element = XmlUtils.copy_shape_element( + textbox, + new_shape_id, + new_creation_id, + relationship_mapping=None + ) + + assert new_element is not None + + def test_copy_shape_element_with_mapping(self): + """Test copy_shape_element works with relationship mapping.""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + textbox = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)) + textbox.text = "Test" + + id_manager = IdManager(prs) + new_shape_id = id_manager.generate_unique_shape_id() + new_creation_id = id_manager.generate_unique_creation_id() + + # Copy with relationship mapping + mapping = {"rId1": "rId100"} + new_element = XmlUtils.copy_shape_element( + textbox, + new_shape_id, + new_creation_id, + relationship_mapping=mapping + ) + + assert new_element is not None + + +class TestImageRelationshipCopying: + """Test that image relationships are correctly copied to target slide.""" + + def test_image_relationship_copied_successfully(self): + """Test that _copy_image_relationship returns valid relationship ID.""" + import io as stdlib_io + + from PIL import Image + + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Create and add test image + img = Image.new('RGB', (100, 100), color='red') + img_bytes = stdlib_io.BytesIO() + img.save(img_bytes, format='PNG') + img_bytes.seek(0) + + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + f.write(img_bytes.getvalue()) + img_path = f.name + + try: + source_slide.shapes.add_picture(img_path, Inches(1), Inches(1)) + + # Copy slide - this should now work + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + # Verify relationship mapping was populated + stats = copier.get_copy_statistics() + assert stats['total_relationships_copied'] > 0, "No relationships were copied" + + finally: + os.unlink(img_path) + + def test_image_relationship_copy_creates_valid_reference(self): + """Test that copied images have valid r:embed references in target slide.""" + import io as stdlib_io + + from PIL import Image + + source_prs = Presentation() + layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(layout) + + # Create and add test image + img = Image.new('RGB', (100, 100), color='green') + img_bytes = stdlib_io.BytesIO() + img.save(img_bytes, format='PNG') + img_bytes.seek(0) + + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + f.write(img_bytes.getvalue()) + img_path = f.name + + try: + source_slide.shapes.add_picture(img_path, Inches(1), Inches(1)) + + # Copy slide + target_prs = Presentation() + copier = SlideCopierManager(target_prs) + copied_slide = copier.copy_slide(source_slide) + + # Find picture in copied slide and verify its r:embed reference is valid + for shape in copied_slide.shapes: + if shape.shape_type == MSO_SHAPE_TYPE.PICTURE: + # Get the r:embed reference from the shape's XML + a_ns = 'http://schemas.openxmlformats.org/drawingml/2006/main' + r_ns = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' + blip_elements = shape._element.findall(f'.//{{{a_ns}}}blip') + for blip in blip_elements: + embed_ref = blip.get(f'{{{r_ns}}}embed') + if embed_ref: + # Verify the relationship ID exists in target slide + rel = copied_slide.part.rels.get(embed_ref) + assert rel is not None, f"r:embed {embed_ref} not found in target slide relationships" + break + + finally: + os.unlink(img_path) + + +class TestAppXmlMetadataUpdate: + """Test that app.xml metadata is updated correctly after slide changes.""" + + def test_app_xml_metadata_updated_after_slide_deletion(self, tmp_path): + """Test that app.xml metadata reflects actual slide count after deletion.""" + import re + import zipfile + + # Create a presentation with multiple slides + prs = Presentation() + layout = prs.slide_layouts[0] + for i in range(5): + prs.slides.add_slide(layout) + + original_count = len(prs.slides) + assert original_count == 5 + + # Delete a slide using the SlideDeleter + from gslides_api.pptx.slide_deleter import SlideDeleter + deleter = SlideDeleter(prs) + deleter.delete_slide(0) + + new_count = len(prs.slides) + assert new_count == original_count - 1 + + # Save via PowerPointPresentation which should fix app.xml + output_path = tmp_path / "output.pptx" + pp = PowerPointPresentation(pptx_presentation=prs, file_path=str(output_path)) + pp.save(api_client=PowerPointAPIClient()) + + # Verify app.xml has correct slide count + with zipfile.ZipFile(output_path, 'r') as zf: + app_xml = zf.read('docProps/app.xml').decode('utf-8') + match = re.search(r'<Slides>(\d+)</Slides>', app_xml) + assert match is not None, "Could not find <Slides> element in app.xml" + assert int(match.group(1)) == new_count, ( + f"app.xml says {match.group(1)} slides but actual is {new_count}" + ) + + def test_app_xml_metadata_updated_after_multiple_deletions(self, tmp_path): + """Test that app.xml metadata is correct after deleting multiple slides.""" + import re + import zipfile + + # Create a presentation with many slides + prs = Presentation() + layout = prs.slide_layouts[0] + for i in range(10): + prs.slides.add_slide(layout) + + assert len(prs.slides) == 10 + + # Delete multiple slides + from gslides_api.pptx.slide_deleter import SlideDeleter + deleter = SlideDeleter(prs) + # Delete in reverse order to avoid index shifting + for idx in [8, 5, 2, 0]: + deleter.delete_slide(idx) + + remaining_count = len(prs.slides) + assert remaining_count == 6 + + # Save via PowerPointPresentation + output_path = tmp_path / "output.pptx" + pp = PowerPointPresentation(pptx_presentation=prs, file_path=str(output_path)) + pp.save(api_client=PowerPointAPIClient()) + + # Verify app.xml has correct slide count + with zipfile.ZipFile(output_path, 'r') as zf: + app_xml = zf.read('docProps/app.xml').decode('utf-8') + match = re.search(r'<Slides>(\d+)</Slides>', app_xml) + assert match is not None + assert int(match.group(1)) == remaining_count + + def test_saved_file_can_be_reopened(self, tmp_path): + """Test that a file saved after modifications can be reopened.""" + # Create and modify presentation + prs = Presentation() + layout = prs.slide_layouts[0] + for i in range(5): + slide = prs.slides.add_slide(layout) + slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)) + + from gslides_api.pptx.slide_deleter import SlideDeleter + deleter = SlideDeleter(prs) + deleter.delete_slide(0) + deleter.delete_slide(0) + + # Save + output_path = tmp_path / "output.pptx" + pp = PowerPointPresentation(pptx_presentation=prs, file_path=str(output_path)) + pp.save(api_client=PowerPointAPIClient()) + + # Reopen and verify + reopened = Presentation(str(output_path)) + assert len(reopened.slides) == 3 + + +class TestCreateImageElementLike: + """Test the create_image_element_like functionality for chart replacement.""" + + def test_create_image_element_like_replaces_shape(self, tmp_path): + """Test that create_image_element_like creates a valid image element.""" + from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointElementParent, + PowerPointImageElement, + PowerPointSlide, + ) + + # Create a presentation with a shape (simulating a chart) + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + # Add a shape that we'll replace with an image + shape = slide.shapes.add_shape( + MSO_AUTO_SHAPE_TYPE.RECTANGLE, + Inches(1), Inches(1), Inches(4), Inches(3) + ) + shape.name = "TestChart" + original_left = shape.left + original_top = shape.top + original_width = shape.width + original_height = shape.height + original_shape_count = len(slide.shapes) + + # Create wrapper for the shape + from gslides_api.adapters.pptx_adapter import validate_pptx_element + element = validate_pptx_element(shape) + element.pptx_slide = slide + + # Call create_image_element_like + api_client = PowerPointAPIClient() + new_image_element = element.create_image_element_like(api_client=api_client) + + # Verify the result + assert isinstance(new_image_element, PowerPointImageElement) + assert new_image_element.pptx_element is not None + assert new_image_element.pptx_slide == slide + + # Verify the new image has similar position + # (might differ slightly due to placeholder image size) + assert new_image_element.pptx_element.left == original_left + assert new_image_element.pptx_element.top == original_top + + # Verify the old shape was removed and new one added + # (shape count should be same since we removed one and added one) + assert len(slide.shapes) == original_shape_count + + def test_create_image_element_like_requires_slide_reference(self): + """Test that create_image_element_like fails without slide reference.""" + from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointElementParent, + ) + + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + shape = slide.shapes.add_shape( + MSO_AUTO_SHAPE_TYPE.RECTANGLE, + Inches(1), Inches(1), Inches(2), Inches(2) + ) + + from gslides_api.adapters.pptx_adapter import validate_pptx_element + element = validate_pptx_element(shape) + # Don't set pptx_slide + + api_client = PowerPointAPIClient() + with pytest.raises(ValueError, match="slide reference"): + element.create_image_element_like(api_client=api_client) + + +class TestPlaceholderRemoval: + """Test removal of layout placeholder shapes.""" + + def test_remove_layout_placeholders_removes_all_placeholders(self): + """Test that _remove_layout_placeholders removes all placeholder shapes.""" + prs = Presentation() + # Use a layout that typically has placeholders (Title Slide layout) + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + # Count placeholder shapes before removal + placeholders_before = [ + s for s in slide.shapes + if s.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER + ] + # Layout slides typically have at least 1 placeholder + assert len(placeholders_before) > 0, "Test requires a layout with placeholders" + + # Remove placeholders + _remove_layout_placeholders(slide) + + # Count placeholder shapes after removal + placeholders_after = [ + s for s in slide.shapes + if s.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER + ] + assert len(placeholders_after) == 0, "All placeholder shapes should be removed" + + def test_remove_layout_placeholders_preserves_added_content(self): + """Test that _remove_layout_placeholders preserves content shapes.""" + prs = Presentation() + layout = prs.slide_layouts[0] + slide = prs.slides.add_slide(layout) + + # Add a textbox (not a placeholder) + textbox = slide.shapes.add_textbox( + Inches(1), Inches(1), Inches(5), Inches(1) + ) + textbox.text = "This should be preserved" + + # Add a shape + shape = slide.shapes.add_shape( + MSO_AUTO_SHAPE_TYPE.RECTANGLE, + Inches(1), Inches(3), Inches(2), Inches(1) + ) + + # Remove placeholders + _remove_layout_placeholders(slide) + + # Check that the added content is still there + remaining_shapes = list(slide.shapes) + texts = [s.text for s in remaining_shapes if hasattr(s, 'text') and s.text] + assert "This should be preserved" in texts + + # Check that rectangle shape is still there + rectangles = [ + s for s in remaining_shapes + if s.shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE + ] + assert len(rectangles) >= 1 + + def test_slide_copier_manager_removes_placeholders_in_create_target_slide(self): + """Test that SlideCopierManager removes placeholders when creating target slide.""" + source_prs = Presentation() + target_prs = Presentation() + + # Create source slide + source_layout = source_prs.slide_layouts[0] + source_slide = source_prs.slides.add_slide(source_layout) + + # Create target slide using copier (which now removes placeholders) + copier = SlideCopierManager(target_prs) + target_slide = copier._create_target_slide(source_slide=source_slide) + + assert target_slide is not None + + # Verify no placeholder shapes on the target slide + placeholders = [ + s for s in target_slide.shapes + if s.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER + ] + assert len(placeholders) == 0, ( + "SlideCopierManager._create_target_slide should remove placeholders" + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/tests/test_pptx/test_pptx_table_roundtrip.py b/tests/test_pptx/test_pptx_table_roundtrip.py new file mode 100644 index 0000000..e6338f8 --- /dev/null +++ b/tests/test_pptx/test_pptx_table_roundtrip.py @@ -0,0 +1,610 @@ +""" +Comprehensive test suite for PowerPointTableElement.to_markdown_element() roundtrip functionality. + +Tests the complete flow: +1. Markdown string → MarkdownTableElement +2. MarkdownTableElement → PowerPointTableElement (via update_content) +3. PowerPointTableElement → MarkdownTableElement (via to_markdown_element) +4. MarkdownTableElement → Markdown string +""" + +import os +import tempfile +from typing import List + +import pytest +from pptx import Presentation +from pptx.shapes.graphfrm import GraphicFrame +from pptx.util import Inches + +from gslides_api.agnostic.element import MarkdownTableElement, TableData + +from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointPresentation, + PowerPointTableElement, +) + + +@pytest.fixture +def api_client(): + """Create a PowerPointAPIClient instance.""" + return PowerPointAPIClient() + + +@pytest.fixture +def test_presentation(): + """Create a test PowerPoint presentation with a slide.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[5]) # Blank layout + return prs, slide + + +def create_test_table(slide, rows: int, cols: int) -> GraphicFrame: + """Create a test PowerPoint table shape.""" + left = Inches(1) + top = Inches(1) + width = Inches(8) + height = Inches(4) + + table_shape = slide.shapes.add_table(rows, cols, left, top, width, height) + return table_shape + + +def normalize_markdown(markdown: str) -> str: + """Normalize markdown for comparison (handle spacing variations).""" + lines = markdown.strip().split("\n") + normalized_lines = [] + + for line in lines: + if line.strip(): + # Normalize spacing around table delimiters + line = line.strip() + normalized_lines.append(line) + else: + normalized_lines.append("") + + return "\n".join(normalized_lines) + + +class TestPowerPointTableBasicRoundtrip: + """Test basic table roundtrip conversion.""" + + def test_simple_2x2_table_roundtrip(self, api_client, test_presentation): + """Test simple 2x2 table roundtrip.""" + prs, slide = test_presentation + + # Original markdown + markdown_input = """| A | B | +|---|---| +| 1 | 2 |""" + + # Step 1: Create MarkdownTableElement + markdown_elem = MarkdownTableElement(name="Simple Table", content=markdown_input) + + # Verify parsing worked + assert markdown_elem.content.headers == ["A", "B"] + assert markdown_elem.content.rows == [["1", "2"]] + + # Step 2: Create PowerPoint table + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + # Step 3: Update content + pptx_table_elem.update_content(api_client, markdown_elem) + + # Step 4: Extract back to markdown + extracted_elem = pptx_table_elem.to_markdown_element("Simple Table") + + # Step 5: Verify roundtrip + assert extracted_elem.name == "Simple Table" + assert extracted_elem.content.headers == ["A", "B"] + assert extracted_elem.content.rows == [["1", "2"]] + + # Verify markdown output + final_markdown = extracted_elem.to_markdown() + assert "| A | B |" in final_markdown + assert "| 1 | 2 |" in final_markdown + + def test_3x3_table_with_headers_roundtrip(self, api_client, test_presentation): + """Test 3x3 table with meaningful headers.""" + prs, slide = test_presentation + + markdown_input = """| Name | Age | City | +|------|-----|------| +| Alice | 25 | NYC | +| Bob | 30 | LA |""" + + markdown_elem = MarkdownTableElement(name="People Table", content=markdown_input) + + table_shape = create_test_table(slide, 3, 3) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("People Table") + + assert extracted_elem.content.headers == ["Name", "Age", "City"] + assert len(extracted_elem.content.rows) == 2 + assert extracted_elem.content.rows[0] == ["Alice", "25", "NYC"] + assert extracted_elem.content.rows[1] == ["Bob", "30", "LA"] + + def test_single_row_table(self, api_client, test_presentation): + """Test edge case: table with just headers.""" + prs, slide = test_presentation + + markdown_input = """| Header1 | Header2 | Header3 | +|---------|---------|---------|""" + + markdown_elem = MarkdownTableElement(name="Headers Only", content=markdown_input) + + table_shape = create_test_table(slide, 1, 3) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Headers Only") + + assert extracted_elem.content.headers == ["Header1", "Header2", "Header3"] + assert len(extracted_elem.content.rows) == 0 + + def test_single_column_table(self, api_client, test_presentation): + """Test edge case: narrow single column table.""" + prs, slide = test_presentation + + markdown_input = """| Items | +|-------| +| Apple | +| Banana | +| Cherry |""" + + markdown_elem = MarkdownTableElement(name="Single Column", content=markdown_input) + + table_shape = create_test_table(slide, 4, 1) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Single Column") + + assert extracted_elem.content.headers == ["Items"] + assert len(extracted_elem.content.rows) == 3 + assert extracted_elem.content.rows == [["Apple"], ["Banana"], ["Cherry"]] + + +class TestPowerPointTableRichTextRoundtrip: + """Test preservation of text formatting in table cells.""" + + def test_bold_text_in_cells(self, api_client, test_presentation): + """Test bold text formatting preservation.""" + prs, slide = test_presentation + + markdown_input = """| **Bold Header** | Normal | +|-----------------|--------| +| **Bold** | Text |""" + + markdown_elem = MarkdownTableElement(name="Bold Table", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Bold Table") + + # Verify bold formatting is preserved through roundtrip + # Markdown **bold** -> PowerPoint bold -> Markdown **bold** + assert extracted_elem.content.headers[0] == "**Bold Header**" + assert extracted_elem.content.headers[1] == "Normal" + assert extracted_elem.content.rows[0][0] == "**Bold**" + assert extracted_elem.content.rows[0][1] == "Text" + + # Verify the markdown output contains bold markers + final_markdown = extracted_elem.to_markdown() + assert "**Bold Header**" in final_markdown + assert "**Bold**" in final_markdown + + def test_italic_text_in_cells(self, api_client, test_presentation): + """Test italic text formatting preservation.""" + prs, slide = test_presentation + + markdown_input = """| _Italic_ | Normal | +|----------|--------| +| _Text_ | Here |""" + + markdown_elem = MarkdownTableElement(name="Italic Table", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Italic Table") + + # Verify italic formatting is preserved through roundtrip + # Markdown _italic_ -> PowerPoint italic -> Markdown *italic* + assert extracted_elem.content.headers[0] == "*Italic*" + assert extracted_elem.content.headers[1] == "Normal" + assert extracted_elem.content.rows[0] == ["*Text*", "Here"] + + def test_hyperlinks_in_cells(self, api_client, test_presentation): + """Test hyperlink preservation.""" + prs, slide = test_presentation + + markdown_input = """| [Link](http://example.com) | Text | +|----------------------------|------| +| Normal | [Google](http://google.com) |""" + + markdown_elem = MarkdownTableElement(name="Link Table", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Link Table") + + # Links will be converted to plain text in this roundtrip, + # but the text content should be preserved + assert "Link" in extracted_elem.content.headers[0] + assert extracted_elem.content.headers[1] == "Text" + assert extracted_elem.content.rows[0][0] == "Normal" + assert "Google" in extracted_elem.content.rows[0][1] + + def test_mixed_formatting(self, api_client, test_presentation): + """Test complex formatting combinations.""" + prs, slide = test_presentation + + markdown_input = """| **Bold** _italic_ | Text | +|-------------------|------| +| Normal | Mixed **bold** content |""" + + markdown_elem = MarkdownTableElement(name="Mixed Format", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Mixed Format") + + # Verify mixed formatting is preserved through roundtrip + # Bold uses **, italic uses * (single asterisk in output) + header0 = extracted_elem.content.headers[0] + assert "**Bold**" in header0 + assert "*italic*" in header0 + assert extracted_elem.content.rows[0][0] == "Normal" + assert "**bold**" in extracted_elem.content.rows[0][1] + + +class TestPowerPointTableSpecialCharacters: + """Test handling of special characters and escaping.""" + + def test_pipe_character_in_cells(self, api_client, test_presentation): + """Test cells with pipe-like content (simplified test for parsing limitations).""" + prs, slide = test_presentation + + # Use a simpler approach since pipe escaping has parsing complexities + markdown_input = """| Contains Text | Normal | +|---------------|--------| +| Has pipe word | Text |""" + + markdown_elem = MarkdownTableElement(name="Pipe Table", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Pipe Table") + + assert extracted_elem.content.headers == ["Contains Text", "Normal"] + # Verify the content is preserved (without expecting literal pipe character) + assert "pipe" in extracted_elem.content.rows[0][0] + assert extracted_elem.content.rows[0][1] == "Text" + + def test_line_breaks_in_cells(self, api_client, test_presentation): + """Test line breaks within cells.""" + prs, slide = test_presentation + + markdown_input = """| Multi Line | Normal | +|------------|--------| +| Line<br />Break | Single |""" + + markdown_elem = MarkdownTableElement(name="Line Break Table", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Line Break Table") + + # Line breaks may be converted to spaces or preserved + first_cell = extracted_elem.content.rows[0][0] + assert "Line" in first_cell and "Break" in first_cell + + def test_empty_cells(self, api_client, test_presentation): + """Test tables with empty cells.""" + prs, slide = test_presentation + + markdown_input = """| Header | Empty | +|--------|-------| +| Data | | +| | Data2 |""" + + markdown_elem = MarkdownTableElement(name="Empty Cells", content=markdown_input) + + table_shape = create_test_table(slide, 3, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Empty Cells") + + assert extracted_elem.content.headers == ["Header", "Empty"] + assert len(extracted_elem.content.rows) == 2 + assert extracted_elem.content.rows[0][0] == "Data" + # Empty cells should be empty strings or whitespace + assert extracted_elem.content.rows[0][1].strip() == "" + assert extracted_elem.content.rows[1][0].strip() == "" + assert extracted_elem.content.rows[1][1] == "Data2" + + +class TestPowerPointTableMetadata: + """Test metadata preservation through roundtrip.""" + + def test_object_id_preservation(self, api_client, test_presentation): + """Test that objectId is preserved in metadata.""" + prs, slide = test_presentation + + markdown_input = """| Test | Table | +|------|-------| +| Data | Here |""" + + markdown_elem = MarkdownTableElement(name="Metadata Test", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + # Get the original objectId + original_object_id = pptx_table_elem.objectId + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Metadata Test") + + # Check metadata preservation + assert "objectId" in extracted_elem.metadata + assert extracted_elem.metadata["objectId"] == original_object_id + + def test_dimensions_metadata(self, api_client, test_presentation): + """Test that table dimensions are preserved in metadata.""" + prs, slide = test_presentation + + markdown_input = """| A | B | C | +|---|---|---| +| 1 | 2 | 3 | +| 4 | 5 | 6 |""" + + markdown_elem = MarkdownTableElement(name="Dimensions Test", content=markdown_input) + + table_shape = create_test_table(slide, 3, 3) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Dimensions Test") + + # Check dimensions in metadata + assert "rows" in extracted_elem.metadata + assert "columns" in extracted_elem.metadata + assert extracted_elem.metadata["rows"] == 3 # Header + 2 data rows + assert extracted_elem.metadata["columns"] == 3 + + def test_size_metadata(self, api_client, test_presentation): + """Test that size metadata is preserved.""" + prs, slide = test_presentation + + markdown_input = """| Size | Test | +|------|------| +| Data | Here |""" + + markdown_elem = MarkdownTableElement(name="Size Test", content=markdown_input) + + table_shape = create_test_table(slide, 2, 2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + pptx_table_elem.update_content(api_client, markdown_elem) + extracted_elem = pptx_table_elem.to_markdown_element("Size Test") + + # Size metadata should be present + if "size" in extracted_elem.metadata: + size_data = extracted_elem.metadata["size"] + assert "width" in size_data + assert "height" in size_data + assert size_data["width"] > 0 + assert size_data["height"] > 0 + + +class TestPowerPointTableErrorHandling: + """Test error conditions and edge cases.""" + + def test_invalid_pptx_element(self, api_client): + """Test when pptx_element is not a GraphicFrame.""" + # Create a PowerPointTableElement with invalid element + invalid_elem = PowerPointTableElement() + invalid_elem.pptx_element = None + + with pytest.raises( + ValueError, match="PowerPointTableElement has no valid GraphicFrame element" + ): + invalid_elem.to_markdown_element("Invalid Test") + + def test_empty_table_handling(self, api_client, test_presentation): + """Test handling of tables with no meaningful content.""" + prs, slide = test_presentation + + # Create a minimal table that might cause issues + table_shape = create_test_table(slide, 1, 1) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + # This should handle gracefully even with minimal content + extracted_elem = pptx_table_elem.to_markdown_element("Empty Test") + + # Should return a valid MarkdownTableElement even if content is minimal + assert isinstance(extracted_elem, MarkdownTableElement) + assert extracted_elem.name == "Empty Test" + + def test_resize_table_does_not_crash(self, api_client, test_presentation): + """Test that calling resize on a table doesn't crash. + + The resize method catches exceptions internally, so we verify it + handles edge cases gracefully without raising. This includes the + guard against index -1 access when current_cols could be 0. + """ + prs, slide = test_presentation + + # Create a 2x2 table + table_shape = create_test_table(slide, rows=2, cols=2) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + # Resize should not raise an exception even if operations fail internally + # (python-pptx doesn't support add_column, but code should handle gracefully) + pptx_table_elem.resize(api_client, rows=3, cols=4) + + # Table still exists and is accessible (even if dimensions didn't change) + assert table_shape.table is not None + + +class TestPowerPointTableFullIntegration: + """Complete integration test with real PowerPoint objects.""" + + def test_complete_roundtrip_with_presentation(self, api_client): + """Test complete roundtrip with file save/load.""" + + # Complex table with various formatting + markdown_input = """| **Product** | _Price_ | Status | +|-------------|---------|--------| +| **Laptop** | $999 | Available | +| _Tablet_ | $499 | **Sold Out** | +| Phone | $699 | Available |""" + + # Step 1: Parse to MarkdownTableElement + markdown_elem = MarkdownTableElement(name="Product Table", content=markdown_input) + + # Verify initial parsing (markdown formatting is now preserved) + assert markdown_elem.content.headers == ["**Product**", "*Price*", "Status"] + assert len(markdown_elem.content.rows) == 3 + + # Step 2: Create PowerPoint presentation with table + with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp_file: + try: + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[5]) + + # Create table with appropriate dimensions + table_shape = create_test_table(slide, 4, 3) # 4 rows (header + 3 data), 3 columns + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + # Step 3: Update table content + pptx_table_elem.update_content(api_client, markdown_elem) + + # Save presentation + prs.save(tmp_file.name) + + # Step 4: Extract back to markdown + extracted_elem = pptx_table_elem.to_markdown_element("Product Table") + + # Step 5: Verify roundtrip results (markdown formatting preserved) + assert extracted_elem.name == "Product Table" + assert extracted_elem.content.headers == ["**Product**", "*Price*", "Status"] + assert len(extracted_elem.content.rows) == 3 + + # Check specific content (formatting may be normalized) + rows = extracted_elem.content.rows + assert "Laptop" in rows[0][0] + assert "$999" in rows[0][1] + assert "Available" in rows[0][2] + assert "Tablet" in rows[1][0] + assert "$499" in rows[1][1] + assert "Sold Out" in rows[1][2] or "Sold" in rows[1][2] + + # Check metadata preservation + assert "objectId" in extracted_elem.metadata + assert "rows" in extracted_elem.metadata + assert "columns" in extracted_elem.metadata + assert extracted_elem.metadata["rows"] == 4 # Total rows including header + assert extracted_elem.metadata["columns"] == 3 + + # Final markdown output + final_markdown = extracted_elem.to_markdown() + assert "Product" in final_markdown + assert "Price" in final_markdown + assert "Status" in final_markdown + assert "Laptop" in final_markdown + assert "$999" in final_markdown + + finally: + # Cleanup + if os.path.exists(tmp_file.name): + os.unlink(tmp_file.name) + + +class TestPowerPointTableCellSizing: + """Test cell sizing preservation.""" + + def test_column_widths_preserved_when_dimensions_match(self, api_client, test_presentation): + """Test that column widths are preserved when table dimensions don't change.""" + prs, slide = test_presentation + + # Create a table with custom column widths + table_shape = create_test_table(slide, 2, 3) + table = table_shape.table + + # Set custom column widths (first column wider) + total_width = table_shape.width + table.columns[0].width = total_width // 2 # 50% + table.columns[1].width = total_width // 4 # 25% + table.columns[2].width = total_width // 4 # 25% + + original_widths = [col.width for col in table.columns] + + # Create markdown content with same dimensions (2 rows, 3 columns) + markdown_input = """| A | B | C | +|---|---|---| +| 1 | 2 | 3 |""" + + markdown_elem = MarkdownTableElement(name="Size Test", content=markdown_input) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + # Update content - dimensions match, so widths should be preserved + pptx_table_elem.update_content(api_client, markdown_elem, check_shape=False) + + # Verify column widths are preserved + final_widths = [col.width for col in table.columns] + assert final_widths == original_widths, "Column widths should be preserved when dimensions match" + + def test_row_heights_preserved_when_dimensions_match(self, api_client, test_presentation): + """Test that row heights are preserved when table dimensions don't change.""" + prs, slide = test_presentation + + # Create a table with custom row heights + table_shape = create_test_table(slide, 3, 2) + table = table_shape.table + + # Set custom row heights (first row taller for header) + total_height = table_shape.height + table.rows[0].height = total_height // 2 # Header gets 50% + table.rows[1].height = total_height // 4 # 25% + table.rows[2].height = total_height // 4 # 25% + + original_heights = [row.height for row in table.rows] + + # Create markdown content with same dimensions (3 rows, 2 columns) + markdown_input = """| Header A | Header B | +|----------|----------| +| Data 1 | Data 2 | +| Data 3 | Data 4 |""" + + markdown_elem = MarkdownTableElement(name="Height Test", content=markdown_input) + pptx_table_elem = PowerPointTableElement(pptx_element=table_shape) + + # Update content - dimensions match, so heights should be preserved + pptx_table_elem.update_content(api_client, markdown_elem, check_shape=False) + + # Verify row heights are preserved + final_heights = [row.height for row in table.rows] + assert final_heights == original_heights, "Row heights should be preserved when dimensions match" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_pptx/test_pptx_text_extraction.py b/tests/test_pptx/test_pptx_text_extraction.py new file mode 100644 index 0000000..e2dc30f --- /dev/null +++ b/tests/test_pptx/test_pptx_text_extraction.py @@ -0,0 +1,176 @@ +import os +import pytest +import re + +from gslides_api.adapters.pptx_adapter import PowerPointAPIClient +from gslides_api.adapters.abstract_slides import AbstractPresentation + + +@pytest.fixture +def test_pptx_path(): + """Path to the test PPTX file.""" + here = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(here, "Samplead Master Deck Template.pptx") + + +@pytest.fixture +def pptx_presentation(test_pptx_path): + """Load the test presentation.""" + api_client = PowerPointAPIClient() + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=test_pptx_path + ) + return presentation + + +class TestPPTXTextExtraction: + """Test suite for PowerPoint text extraction without HTML/escaping.""" + + def test_first_text_box_in_first_slide_no_escaping(self, pptx_presentation): + """Test that the first text box in the first slide extracts text correctly. + + The text box contains 'QBR\\n{quarter}' with bold and color formatting. + It should NOT: + - Escape curly brackets (\\{quarter\\}) + - Add extra underscores (__QBR__) + - Include HTML span tags for colors + """ + # Get the first slide + first_slide = pptx_presentation.slides[0] + + # Get text boxes (shapes with text frames) + text_boxes = [ + elem + for elem in first_slide.page_elements_flat + if hasattr(elem, "has_text") and elem.has_text + ] + + assert len(text_boxes) > 0, "First slide should have at least one text box" + + # Get the first NON-EMPTY text box (matches ingestion behavior) + first_text_box = None + for text_box in text_boxes: + text_content = text_box.read_text(as_markdown=True) + if text_content.strip(): + first_text_box = text_box + break + + assert first_text_box is not None, "First slide should have at least one non-empty text box" + + # Extract text as markdown + text_content = first_text_box.read_text(as_markdown=True) + + # Verify the text content + assert text_content is not None, "Text content should not be None" + + # Main assertions: verify correct extraction + assert ( + "{quarter}" in text_content + ), "Template variable {quarter} should be present without escaping" + assert "\\{" not in text_content, "Curly brackets should NOT be escaped" + assert "\\}" not in text_content, "Curly brackets should NOT be escaped" + assert "<span" not in text_content, "HTML span tags should NOT be in text content" + assert "style=" not in text_content, "Inline CSS should NOT be in text content" + + # Check for QBR text (may have markdown bold formatting but no extra underscores with spaces) + assert "QBR" in text_content, "Text 'QBR' should be present" + # The text should not have the pptx2md pattern of " __text__ " (spaces + underscores) + assert ( + " __QBR__ " not in text_content + ), "Should not have pptx2md-style bold formatting with spaces" + + # The text should be clean: either "QBR" or "**QBR**" (markdown bold) + # but not " __QBR__ " (pptx2md style) + assert "QBR" in text_content.replace("**", ""), "QBR should be in the text" + + def test_first_text_box_styles_separate_from_content(self, pptx_presentation): + """Test that style information is available separately from text content. + + Following the gslides-api pattern, styles should be extractable + via the styles() method rather than embedded in text. + """ + # Get the first slide + first_slide = pptx_presentation.slides[0] + + # Get the first NON-EMPTY text box + text_boxes = [ + elem + for elem in first_slide.page_elements_flat + if hasattr(elem, "has_text") and elem.has_text + ] + + first_text_box = None + for text_box in text_boxes: + text_content = text_box.read_text(as_markdown=True) + if text_content.strip(): + first_text_box = text_box + break + + assert first_text_box is not None, "First slide should have at least one non-empty text box" + + # Get styles separately + # Note: styles() method needs to be implemented in PowerPointShapeElement + try: + styles = first_text_box.styles() + + # Verify styles are returned + assert styles is not None, "Styles should be extractable" + + # The styles should contain information about bold and color + # (exact structure depends on implementation, but should be similar to gslides-api) + + except NotImplementedError: + pytest.skip("styles() method not yet implemented for PowerPoint elements") + + def test_template_variable_recognition(self, pptx_presentation): + """Test that template variables in curly brackets are preserved for parsing.""" + first_slide = pptx_presentation.slides[0] + + text_boxes = [ + elem + for elem in first_slide.page_elements_flat + if hasattr(elem, "has_text") and elem.has_text + ] + + # Check first NON-EMPTY text box for template variables + first_text_box = None + for text_box in text_boxes: + text_content = text_box.read_text(as_markdown=True) + if text_content.strip(): + first_text_box = text_box + break + + assert first_text_box is not None, "First slide should have at least one non-empty text box" + + text_content = first_text_box.read_text(as_markdown=True) + + # Verify template variables are unescaped and recognizable + template_vars = re.findall(r"\{([^}]+)\}", text_content) + + assert len(template_vars) > 0, "Should find at least one template variable" + assert "quarter" in template_vars, "Should recognize {quarter} as a template variable" + + def test_paragraph_newline_preservation(self, pptx_presentation): + """Test that newlines between paragraphs are preserved.""" + first_slide = pptx_presentation.slides[0] + + text_boxes = [ + elem + for elem in first_slide.page_elements_flat + if hasattr(elem, "has_text") and elem.has_text + ] + + # Find first NON-EMPTY text box + first_text_box = None + for text_box in text_boxes: + text_content = text_box.read_text(as_markdown=True) + if text_content.strip(): + first_text_box = text_box + break + + assert first_text_box is not None, "First slide should have at least one non-empty text box" + + text_content = first_text_box.read_text(as_markdown=True) + + # The first text box should contain a newline between "QBR" and "{quarter}" + assert "\n" in text_content, "Newlines should be preserved in text extraction" diff --git a/tests/test_pptx/test_pptx_text_frame_markdown.py b/tests/test_pptx/test_pptx_text_frame_markdown.py new file mode 100644 index 0000000..2389c87 --- /dev/null +++ b/tests/test_pptx/test_pptx_text_frame_markdown.py @@ -0,0 +1,445 @@ +""" +Test suite for PowerPointShapeElement text frame to markdown conversion using pptx2md. + +This module tests the enhanced text frame conversion capabilities that use +pptx2md for proper formatting, hyperlinks, colors, and special character handling. +""" + +import pytest +from unittest.mock import Mock, MagicMock +from pptx.enum.dml import MSO_COLOR_TYPE, MSO_THEME_COLOR +from pptx.dml.color import RGBColor + +from gslides_api.adapters.pptx_adapter import PowerPointShapeElement, PowerPointAPIClient +from gslides_api.adapters.abstract_slides import AbstractAltText + + +class TestPowerPointTextFrameMarkdown: + """Test text frame to markdown conversion with pptx2md.""" + + def create_mock_shape_with_text_frame(self, paragraphs_data): + """Helper to create mock shape with specified text frame structure.""" + mock_shape = Mock() + mock_text_frame = Mock() + + mock_paragraphs = [] + for para_data in paragraphs_data: + mock_paragraph = Mock() + mock_paragraph.level = para_data.get('level', 0) + mock_paragraph.text = para_data.get('text', '') + + # Mock XML element for bullet detection + # If has_bullet is explicitly set, use it; otherwise, level > 0 implies bullets + has_bullet = para_data.get('has_bullet', para_data.get('level', 0) > 0) + mock_pPr = Mock() + mock_element = Mock() + + def make_find_method(is_bullet): + """Create a find method that returns buChar for bullets.""" + def find(tag): + if is_bullet and 'buChar' in tag: + return Mock() # Return something truthy for buChar + return None # Return None for everything else + return find + + mock_pPr.find = make_find_method(has_bullet) + mock_element.get_or_add_pPr = Mock(return_value=mock_pPr) + mock_paragraph._element = mock_element + + mock_runs = [] + for run_data in para_data.get('runs', []): + mock_run = Mock() + mock_run.text = run_data.get('text', '') + + # Mock font properties + mock_font = Mock() + mock_font.bold = run_data.get('bold', False) + mock_font.italic = run_data.get('italic', False) + mock_font.underline = run_data.get('underline', False) + + # Mock color + mock_color = Mock() + if 'color_rgb' in run_data: + mock_color.type = MSO_COLOR_TYPE.RGB + mock_color.rgb = RGBColor(*run_data['color_rgb']) + else: + mock_color.type = MSO_COLOR_TYPE.SCHEME + mock_color.theme_color = MSO_THEME_COLOR.DARK_1 + + mock_font.color = mock_color + mock_run.font = mock_font + + # Mock hyperlink + mock_hyperlink = Mock() + mock_hyperlink.address = run_data.get('hyperlink', None) + mock_run.hyperlink = mock_hyperlink + + mock_runs.append(mock_run) + + mock_paragraph.runs = mock_runs + mock_paragraphs.append(mock_paragraph) + + mock_text_frame.paragraphs = mock_paragraphs + mock_shape.text_frame = mock_text_frame + + return mock_shape + + def test_simple_text_conversion(self): + """Test basic text conversion without formatting.""" + paragraphs_data = [ + { + 'text': 'Hello World', + 'runs': [{'text': 'Hello World'}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + assert "Hello World" in result + + def test_bold_text_conversion(self): + """Test bold text formatting preservation.""" + paragraphs_data = [ + { + 'text': 'Bold Text', + 'runs': [{'text': 'Bold Text', 'bold': True}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + assert "**Bold Text**" in result or "__Bold Text__" in result + + def test_italic_text_conversion(self): + """Test italic text formatting preservation.""" + paragraphs_data = [ + { + 'text': 'Italic Text', + 'runs': [{'text': 'Italic Text', 'italic': True}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + assert "*Italic Text*" in result or "_Italic Text_" in result + + def test_hyperlink_conversion(self): + """Test hyperlink preservation.""" + paragraphs_data = [ + { + 'text': 'Click here', + 'runs': [{'text': 'Click here', 'hyperlink': 'http://example.com'}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + # pptx2md should convert hyperlinks to markdown link format + assert "[Click here](http://example.com)" in result + + def test_text_color_not_in_markdown(self): + """Test that RGB colors are NOT converted to HTML spans (colors ignored in markdown).""" + paragraphs_data = [ + { + 'text': 'Red Text', + 'runs': [{'text': 'Red Text', 'color_rgb': (255, 0, 0)}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + # Colors should NOT produce HTML spans - just plain text + assert "Red Text" in result + assert "<span" not in result # No HTML tags in markdown output + + def test_combined_formatting(self): + """Test multiple formatting styles on the same text.""" + paragraphs_data = [ + { + 'text': 'Bold Italic', + 'runs': [{'text': 'Bold Italic', 'bold': True, 'italic': True}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + # Should have both bold and italic formatting + assert "Bold Italic" in result + # Could be **_text_** or __*text*__ depending on pptx2md implementation + + def test_bullet_points_single_level(self): + """Test simple bullet list conversion.""" + paragraphs_data = [ + { + 'text': 'First item', + 'level': 1, + 'runs': [{'text': 'First item'}] + }, + { + 'text': 'Second item', + 'level': 1, + 'runs': [{'text': 'Second item'}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + # pptx2md outputs with double underscores for strong text by default + assert "First item" in result + assert "Second item" in result + assert " -" in result # Should have bullet formatting with indentation + + def test_bullet_points_multi_level(self): + """Test nested bullet list conversion.""" + paragraphs_data = [ + { + 'text': 'Level 1', + 'level': 1, + 'runs': [{'text': 'Level 1'}] + }, + { + 'text': 'Level 2', + 'level': 2, + 'runs': [{'text': 'Level 2'}] + }, + { + 'text': 'Level 2 again', + 'level': 2, + 'runs': [{'text': 'Level 2 again'}] + }, + { + 'text': 'Back to Level 1', + 'level': 1, + 'runs': [{'text': 'Back to Level 1'}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + # Check for content and indentation structure + assert "Level 1" in result + assert "Level 2" in result + assert "Level 2 again" in result + assert "Back to Level 1" in result + # Check for proper indentation levels (pptx2md uses 2 spaces per level) + assert " -" in result # Level 1 indentation + assert " -" in result # Level 2 indentation + + def test_special_character_escaping(self): + """Test special markdown characters are properly escaped.""" + paragraphs_data = [ + { + 'text': 'Text with * and _ and [brackets]', + 'runs': [{'text': 'Text with * and _ and [brackets]'}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + # pptx2md should escape these characters so they don't interfere with markdown + assert "\\*" in result or "Text with * and _ and [brackets]" in result + + def test_mixed_paragraph_and_lists(self): + """Test complex content with mixed paragraphs and lists.""" + paragraphs_data = [ + { + 'text': 'Regular paragraph', + 'level': 0, + 'runs': [{'text': 'Regular paragraph'}] + }, + { + 'text': 'First bullet', + 'level': 1, + 'runs': [{'text': 'First bullet'}] + }, + { + 'text': 'Second bullet', + 'level': 1, + 'runs': [{'text': 'Second bullet'}] + }, + { + 'text': 'Another paragraph', + 'level': 0, + 'runs': [{'text': 'Another paragraph'}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + assert "Regular paragraph" in result + assert "First bullet" in result + assert "Second bullet" in result + assert "Another paragraph" in result + # Check for bullet point formatting + assert " -" in result # Should have bullet indentation + + def test_empty_text_frame(self): + """Test handling of empty text frames.""" + mock_shape = Mock() + mock_shape.text_frame = None + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + assert result == "" + + def test_text_frame_with_empty_paragraphs(self): + """Test handling of text frames with empty paragraphs.""" + paragraphs_data = [ + { + 'text': 'First line', + 'runs': [{'text': 'First line'}] + }, + { + 'text': '', # Empty paragraph + 'runs': [] + }, + { + 'text': 'Third line', + 'runs': [{'text': 'Third line'}] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + assert "First line" in result + assert "Third line" in result + # Empty lines should be preserved as line breaks + + def test_complex_real_world_example(self): + """Test a complex real-world example with multiple formatting types.""" + paragraphs_data = [ + { + 'text': 'Project Overview', + 'runs': [{'text': 'Project Overview', 'bold': True}] + }, + { + 'text': 'Key objectives:', + 'level': 0, + 'runs': [{'text': 'Key objectives:'}] + }, + { + 'text': 'Implement new features', + 'level': 1, + 'runs': [{'text': 'Implement new features'}] + }, + { + 'text': 'Improve performance by 20%', + 'level': 1, + 'runs': [ + {'text': 'Improve performance by '}, + {'text': '20%', 'bold': True, 'color_rgb': (255, 0, 0)} + ] + }, + { + 'text': 'Documentation available at company.com', + 'level': 1, + 'runs': [ + {'text': 'Documentation available at '}, + {'text': 'company.com', 'hyperlink': 'https://company.com'} + ] + } + ] + + mock_shape = self.create_mock_shape_with_text_frame(paragraphs_data) + + element = PowerPointShapeElement( + objectId="1", + pptx_element=mock_shape, + alt_text=AbstractAltText() + ) + + result = element.read_text(as_markdown=True) + + # Verify basic structure + assert "Project Overview" in result + assert "Key objectives:" in result + assert "Implement new features" in result + assert "Improve performance" in result + assert "20%" in result + assert "Documentation available" in result + assert "company.com" in result + + # Verify formatting is preserved + # Bold, colors, hyperlinks should be converted by pptx2md \ No newline at end of file diff --git a/tests/test_pptx/test_pptx_write_markdown.py b/tests/test_pptx/test_pptx_write_markdown.py new file mode 100644 index 0000000..a5ec9f9 --- /dev/null +++ b/tests/test_pptx/test_pptx_write_markdown.py @@ -0,0 +1,398 @@ +""" +Test suite for writing markdown to PowerPoint text frames. + +This module tests the PowerPointShapeElement.write_text method to ensure +markdown is correctly converted to formatted PowerPoint text with proper +bold, italic, hyperlinks, and bullet point support. +""" + +import os +import tempfile +from pathlib import Path + +import pytest +from pptx import Presentation +from pptx.enum.text import MSO_AUTO_SIZE +from pptx.util import Inches + +from gslides_api.adapters.pptx_adapter import PowerPointAPIClient, PowerPointShapeElement +from gslides_api.adapters.abstract_slides import AbstractAltText + + +class TestPowerPointWriteMarkdown: + """Test writing markdown to PowerPoint text frames.""" + + @pytest.fixture + def sample_pptx_path(self): + """Return path to sample PPTX file.""" + # Use the samplead template if it exists + sample_path = ( + Path(__file__).parent.parent.parent + / "playground" + / "samplead" + / "Samplead Master Deck Template.pptx" + ) + if sample_path.exists(): + return str(sample_path) + # Otherwise create a simple test PPTX + return self._create_test_pptx() + + def _create_test_pptx(self): + """Create a simple test PPTX file.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[5]) # Blank layout + text_box = slide.shapes.add_textbox(100, 100, 400, 200) + text_frame = text_box.text_frame + text_frame.text = "Initial text" + + # Save to temp file + with tempfile.NamedTemporaryFile(delete=False, suffix=".pptx") as tmp: + prs.save(tmp.name) + return tmp.name + + def _get_text_shape(self, slide): + """Find or create a shape with a text_frame.""" + # Find first shape with text_frame + for shape in slide.shapes: + if hasattr(shape, "text_frame") and shape.text_frame is not None: + return shape + # If no text shape found, create one + text_box = slide.shapes.add_textbox( + left=Inches(1), top=Inches(1), width=Inches(4), height=Inches(2) + ) + text_box.text_frame.text = "Test" + return text_box + + def test_simple_text_write(self, sample_pptx_path): + """Test writing simple text without formatting.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + # Create element + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write simple text + markdown_text = "Hello, World!" + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written + assert shape.text_frame.text.strip() == "Hello, World!" + + def test_bold_text_write(self, sample_pptx_path): + """Test writing bold text.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write text with bold formatting + markdown_text = "This is **bold** text." + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written and formatting applied + text_frame = shape.text_frame + assert "This is" in text_frame.text + assert "bold" in text_frame.text + assert "text." in text_frame.text + + # Check that the word "bold" has bold formatting + found_bold = False + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + if "bold" in run.text and run.font.bold: + found_bold = True + break + assert found_bold, "Bold formatting not applied" + + def test_italic_text_write(self, sample_pptx_path): + """Test writing italic text.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write text with italic formatting + markdown_text = "This is *italic* text." + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written and formatting applied + text_frame = shape.text_frame + assert "italic" in text_frame.text + + # Check that the word "italic" has italic formatting + found_italic = False + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + if "italic" in run.text and run.font.italic: + found_italic = True + break + assert found_italic, "Italic formatting not applied" + + def test_combined_formatting_write(self, sample_pptx_path): + """Test writing text with combined bold and italic.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write text with combined formatting + markdown_text = "This is ***bold and italic*** text." + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written + text_frame = shape.text_frame + assert "bold and italic" in text_frame.text + + # Check for combined formatting + found_combined = False + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + if "bold and italic" in run.text: + if run.font.bold and run.font.italic: + found_combined = True + break + assert found_combined, "Combined bold+italic formatting not applied" + + def test_hyperlink_write(self, sample_pptx_path): + """Test writing hyperlinks.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write text with hyperlink + markdown_text = "Visit [our website](https://example.com) for more info." + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written + text_frame = shape.text_frame + assert "our website" in text_frame.text + + # Check that hyperlink was applied + found_link = False + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + if "our website" in run.text and run.hyperlink.address: + assert run.hyperlink.address == "https://example.com" + found_link = True + break + assert found_link, "Hyperlink not applied" + + def test_bullet_list_write(self, sample_pptx_path): + """Test writing bullet lists.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write bullet list + markdown_text = """Key points: +* First item +* Second item +* Third item""" + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written + text_frame = shape.text_frame + assert "First item" in text_frame.text + assert "Second item" in text_frame.text + assert "Third item" in text_frame.text + + # Check that bullet points were applied + # In PowerPoint, list items have level >= 0 (0 is first level bullet) + # Count paragraphs with actual content (non-empty) + bullet_paragraphs = [p for p in text_frame.paragraphs if p.text.strip()] + # We expect at least 4 paragraphs: "Key points:" + 3 bullet items + assert ( + len(bullet_paragraphs) >= 4 + ), f"Expected at least 4 paragraphs, found {len(bullet_paragraphs)}" + # Verify bullet items are present + assert any( + "item" in p.text.lower() for p in text_frame.paragraphs + ), "Bullet items should be present" + + @pytest.mark.skip(reason="Nested list support in markdown parser needs additional work") + def test_nested_bullet_list_write(self, sample_pptx_path): + """Test writing nested bullet lists.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write nested bullet list + markdown_text = """Main points: +* Top level item + * Nested item 1 + * Nested item 2 +* Another top level""" + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written + text_frame = shape.text_frame + assert "Top level item" in text_frame.text + assert "Nested item 1" in text_frame.text + + # Check nesting levels + # Verify that different nesting levels exist + levels_found = set() + for paragraph in text_frame.paragraphs: + if paragraph.text.strip(): + levels_found.add(paragraph.level) + # Should have at least 2 different levels (0 for top, 1 for nested) + assert ( + len(levels_found) >= 2 + ), f"Expected at least 2 nesting levels, found {len(levels_found)}: {levels_found}" + + def test_autoscale_option(self, sample_pptx_path): + """Test autoscale option applies fit_text() to shrink text to fit shape. + + Note: We use fit_text() instead of MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE because + the latter only sets a flag that PowerPoint applies when you edit the text - + it doesn't work when you just open the file. fit_text() directly calculates + and sets the font size, working immediately. + """ + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write with autoscale enabled + markdown_text = "This is a very long text that should trigger autoscaling if the text box is too small to fit all the content." + element.write_text(api_client=api_client, content=markdown_text, autoscale=True) + + # Verify fit_text() was applied: + # - auto_size should be NONE (fit_text sets this) + # - word_wrap should be True + # - font size should be explicitly set (not None) + text_frame = shape.text_frame + assert text_frame.auto_size == MSO_AUTO_SIZE.NONE # fit_text sets NONE + assert text_frame.word_wrap is True + # Font size should be set (fit_text calculates and sets it) + if text_frame.paragraphs and text_frame.paragraphs[0].runs: + assert text_frame.paragraphs[0].runs[0].font.size is not None + + def test_word_wrap_enabled(self, sample_pptx_path): + """Test that word wrap is enabled by default.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write text + markdown_text = "Some text that should wrap to box width." + element.write_text(api_client=api_client, content=markdown_text) + + # Verify word wrap is enabled + text_frame = shape.text_frame + assert text_frame.word_wrap is True + + def test_complex_markdown_write(self, sample_pptx_path): + """Test writing complex markdown with multiple features.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write complex markdown + markdown_text = """# Project Summary + +This is a **very important** project with *multiple* features: + +* Feature 1: Authentication +* Feature 2: **Bold feature** with emphasis +* Feature 3: Link to [documentation](https://docs.example.com) + +For more details, visit our ***bold and italic*** section.""" + element.write_text(api_client=api_client, content=markdown_text) + + # Verify all content is present + text_frame = shape.text_frame + full_text = text_frame.text + assert "Project Summary" in full_text + assert "very important" in full_text + assert "Feature 1" in full_text + assert "Feature 2" in full_text + assert "Feature 3" in full_text + assert "documentation" in full_text + assert "bold and italic" in full_text + + def test_code_span_write(self, sample_pptx_path): + """Test writing code spans.""" + prs = Presentation(sample_pptx_path) + slide = prs.slides[0] + shape = self._get_text_shape(slide) + + api_client = PowerPointAPIClient() + api_client.prs = prs + element = PowerPointShapeElement( + objectId="test_1", pptx_element=shape, alt_text=AbstractAltText() + ) + + # Write text with code span + markdown_text = "Use the `print()` function to display output." + element.write_text(api_client=api_client, content=markdown_text) + + # Verify text was written + text_frame = shape.text_frame + assert "print()" in text_frame.text + + # Code spans should be rendered with Courier New font + found_code = False + for paragraph in text_frame.paragraphs: + for run in paragraph.runs: + if "print()" in run.text: + # Check for monospace font (Courier New) + if run.font.name == "Courier New": + found_code = True + break + assert found_code, "Code span not formatted with monospace font" diff --git a/tests/test_pptx/test_table_adapter_interface.py b/tests/test_pptx/test_table_adapter_interface.py new file mode 100644 index 0000000..cbab2c9 --- /dev/null +++ b/tests/test_pptx/test_table_adapter_interface.py @@ -0,0 +1,232 @@ +""" +Tests for table adapter interface conformance. + +These tests verify that all table adapters (PowerPoint, HTML, GSlides) correctly +implement the AbstractTableElement interface: +- resize() returns float (font scale factor) +- update_content() accepts font_scale_factor parameter +- get_column_count() returns int +""" + +import pytest +from bs4 import BeautifulSoup +from pptx import Presentation +from pptx.util import Inches + +from gslides_api.agnostic.element import MarkdownTableElement + +from gslides_api.adapters.html_adapter import ( + HTMLAPIClient, + HTMLTableElement, +) +from gslides_api.adapters.pptx_adapter import ( + PowerPointAPIClient, + PowerPointTableElement, +) + +# ============================================================================ +# Fixtures +# ============================================================================ + +@pytest.fixture +def pptx_api_client(): + """Create a PowerPointAPIClient instance.""" + return PowerPointAPIClient() + + +@pytest.fixture +def html_api_client(): + """Create an HTMLAPIClient instance.""" + return HTMLAPIClient() + + +@pytest.fixture +def pptx_table_element(): + """Create a PowerPoint table element for testing.""" + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[5]) # Blank layout + + left = Inches(1) + top = Inches(1) + width = Inches(8) + height = Inches(4) + + table_shape = slide.shapes.add_table(rows=3, cols=3, left=left, top=top, width=width, height=height) + return PowerPointTableElement(pptx_element=table_shape) + + +@pytest.fixture +def html_table_element(): + """Create an HTML table element for testing.""" + html_content = """ + <table> + <thead> + <tr><th>A</th><th>B</th><th>C</th></tr> + </thead> + <tbody> + <tr><td>1</td><td>2</td><td>3</td></tr> + <tr><td>4</td><td>5</td><td>6</td></tr> + </tbody> + </table> + """ + soup = BeautifulSoup(html_content, 'lxml') + table_tag = soup.find('table') + return HTMLTableElement(html_element=table_tag, objectId="test-html-table") + + +@pytest.fixture +def markdown_table_content(): + """Create a MarkdownTableElement for testing update_content.""" + markdown_input = """| A | B | C | +|---|---|---| +| 1 | 2 | 3 | +| 4 | 5 | 6 |""" + return MarkdownTableElement(name="Test Table", content=markdown_input) + + +# ============================================================================ +# PowerPointTableElement Tests +# ============================================================================ + +class TestPowerPointTableElementInterface: + """Test PowerPointTableElement conforms to AbstractTableElement interface.""" + + def test_resize_returns_float(self, pptx_api_client, pptx_table_element): + """Test that resize() returns a float (font scale factor).""" + result = pptx_table_element.resize( + api_client=pptx_api_client, + rows=4, + cols=3, + fix_width=True, + fix_height=True, + ) + + assert result is not None, "resize() should return a value, not None" + assert isinstance(result, float), f"resize() should return float, got {type(result)}" + assert result > 0, "Font scale factor should be positive" + + def test_update_content_accepts_font_scale_factor(self, pptx_api_client, pptx_table_element, markdown_table_content): + """Test that update_content() accepts font_scale_factor parameter.""" + # This should not raise TypeError about unexpected keyword argument + pptx_table_element.update_content( + api_client=pptx_api_client, + markdown_content=markdown_table_content, + check_shape=False, + font_scale_factor=0.8, + ) + + def test_get_column_count_returns_int(self, pptx_table_element): + """Test that get_column_count() exists and returns int.""" + result = pptx_table_element.get_column_count() + + assert isinstance(result, int), f"get_column_count() should return int, got {type(result)}" + assert result == 3, f"Expected 3 columns, got {result}" + + def test_get_row_count_returns_int(self, pptx_table_element): + """Test that get_row_count() exists and returns int (sanity check).""" + result = pptx_table_element.get_row_count() + + assert isinstance(result, int), f"get_row_count() should return int, got {type(result)}" + assert result == 3, f"Expected 3 rows, got {result}" + + +# ============================================================================ +# HTMLTableElement Tests +# ============================================================================ + +class TestHTMLTableElementInterface: + """Test HTMLTableElement conforms to AbstractTableElement interface.""" + + def test_resize_returns_float(self, html_api_client, html_table_element): + """Test that resize() returns a float (font scale factor).""" + result = html_table_element.resize( + api_client=html_api_client, + rows=4, + cols=3, + fix_width=True, + fix_height=True, + ) + + assert result is not None, "resize() should return a value, not None" + assert isinstance(result, float), f"resize() should return float, got {type(result)}" + assert result > 0, "Font scale factor should be positive" + + def test_update_content_accepts_font_scale_factor(self, html_api_client, html_table_element, markdown_table_content): + """Test that update_content() accepts font_scale_factor parameter.""" + # This should not raise TypeError about unexpected keyword argument + html_table_element.update_content( + api_client=html_api_client, + markdown_content=markdown_table_content, + check_shape=False, + font_scale_factor=0.8, + ) + + def test_get_column_count_returns_int(self, html_table_element): + """Test that get_column_count() exists and returns int.""" + result = html_table_element.get_column_count() + + assert isinstance(result, int), f"get_column_count() should return int, got {type(result)}" + assert result == 3, f"Expected 3 columns, got {result}" + + def test_get_row_count_returns_int(self, html_table_element): + """Test that get_row_count() exists and returns int (sanity check).""" + result = html_table_element.get_row_count() + + assert isinstance(result, int), f"get_row_count() should return int, got {type(result)}" + assert result == 2, f"Expected 2 rows (tbody only), got {result}" + + +# ============================================================================ +# Edge Cases +# ============================================================================ + +class TestTableAdapterEdgeCases: + """Test edge cases for table adapter interface.""" + + def test_pptx_resize_with_none_element(self, pptx_api_client): + """Test resize returns float even with invalid element.""" + elem = PowerPointTableElement() + elem.pptx_element = None + + result = elem.resize( + api_client=pptx_api_client, + rows=2, + cols=2, + ) + + assert isinstance(result, float), f"resize() should return float even with None element, got {type(result)}" + + def test_html_resize_with_none_element(self, html_api_client): + """Test resize returns float even with invalid element.""" + elem = HTMLTableElement(objectId="test-empty") + elem.html_element = None + + result = elem.resize( + api_client=html_api_client, + rows=2, + cols=2, + ) + + assert isinstance(result, float), f"resize() should return float even with None element, got {type(result)}" + + def test_pptx_get_column_count_with_none_element(self): + """Test get_column_count returns 0 with invalid element.""" + elem = PowerPointTableElement() + elem.pptx_element = None + + result = elem.get_column_count() + + assert result == 0, f"get_column_count() should return 0 for None element, got {result}" + + def test_html_get_column_count_with_none_element(self): + """Test get_column_count returns 0 with invalid element.""" + elem = HTMLTableElement(objectId="test-empty") + elem.html_element = None + + result = elem.get_column_count() + + assert result == 0, f"get_column_count() should return 0 for None element, got {result}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_replace_image_preserves_alt_text.py b/tests/test_replace_image_preserves_alt_text.py new file mode 100644 index 0000000..6b7e948 --- /dev/null +++ b/tests/test_replace_image_preserves_alt_text.py @@ -0,0 +1,165 @@ +from unittest.mock import Mock, patch + +from gslides_api.domain.domain import Dimension, Image, ImageReplaceMethod, Size, Transform, Unit +from gslides_api.element.base import ElementKind +from gslides_api.element.image import ImageElement +from gslides_api.request.request import ReplaceImageRequest, UpdatePageElementAltTextRequest + + +class TestReplaceImagePreservesAltText: + """Test that replacing an image preserves the element's alt-text title and description.""" + + def _make_image_element(self, title=None, description=None): + image = Image( + contentUrl="https://example.com/old.png", + sourceUrl="https://example.com/old.png", + ) + transform = Transform(translateX=0, translateY=0, scaleX=1, scaleY=1) + size = Size( + width=Dimension(magnitude=914400, unit=Unit.EMU), + height=Dimension(magnitude=914400, unit=Unit.EMU), + ) + return ImageElement( + objectId="test-image-id", + image=image, + transform=transform, + size=size, + type=ElementKind.IMAGE, + title=title, + description=description, + presentation_id="test-pres-id", + slide_id="test-slide-id", + ) + + def test_replace_image_from_id_with_title_includes_alt_text_request(self): + """replace_image_from_id should include UpdatePageElementAltTextRequest when title is given.""" + mock_client = Mock() + mock_client.auto_flush = False + + # Capture the requests passed to batch_update + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + url="https://example.com/new.png", + api_client=mock_client, + title="my_chart", + description="Chart description", + ) + + assert len(captured_requests) == 2 + assert isinstance(captured_requests[0], ReplaceImageRequest) + assert isinstance(captured_requests[1], UpdatePageElementAltTextRequest) + + alt_text_req = captured_requests[1] + assert alt_text_req.objectId == "test-image-id" + assert alt_text_req.title == "my_chart" + assert alt_text_req.description == "Chart description" + + def test_replace_image_from_id_without_title_no_alt_text_request(self): + """replace_image_from_id should NOT include alt-text request when no title/description.""" + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + url="https://example.com/new.png", + api_client=mock_client, + ) + + assert len(captured_requests) == 1 + assert isinstance(captured_requests[0], ReplaceImageRequest) + + def test_replace_image_from_id_with_title_only(self): + """replace_image_from_id should handle title without description.""" + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + url="https://example.com/new.png", + api_client=mock_client, + title="my_chart", + ) + + assert len(captured_requests) == 2 + alt_text_req = captured_requests[1] + assert alt_text_req.title == "my_chart" + assert alt_text_req.description is None + + def test_replace_image_from_id_with_file_upload_preserves_title(self): + """replace_image_from_id should preserve title when using file upload.""" + mock_client = Mock() + mock_client.auto_flush = False + mock_client.upload_image_to_drive.return_value = "https://drive.google.com/uc?id=abc123" + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + file="/path/to/chart.png", + api_client=mock_client, + title="chart_element", + ) + + mock_client.upload_image_to_drive.assert_called_once_with("/path/to/chart.png") + assert len(captured_requests) == 2 + assert isinstance(captured_requests[1], UpdatePageElementAltTextRequest) + assert captured_requests[1].title == "chart_element" + + def test_replace_image_instance_method_passes_title(self): + """ImageElement.replace_image() should pass self.title and self.description to replace_image_from_id.""" + element = self._make_image_element( + title="my_element_name", + description="my description", + ) + + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + element.replace_image( + url="https://example.com/new.png", + api_client=mock_client, + enforce_size=False, + ) + + assert len(captured_requests) == 2 + alt_text_req = captured_requests[1] + assert isinstance(alt_text_req, UpdatePageElementAltTextRequest) + assert alt_text_req.objectId == "test-image-id" + assert alt_text_req.title == "my_element_name" + assert alt_text_req.description == "my description" + + def test_replace_image_instance_method_no_title(self): + """ImageElement.replace_image() with no title/description should not add alt-text request.""" + element = self._make_image_element(title=None, description=None) + + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + element.replace_image( + url="https://example.com/new.png", + api_client=mock_client, + enforce_size=False, + ) + + assert len(captured_requests) == 1 + assert isinstance(captured_requests[0], ReplaceImageRequest)