From 05fa652bbd72a4c093def4275c2029d103f1b4bd Mon Sep 17 00:00:00 2001 From: Egor Kraev Date: Tue, 17 Mar 2026 17:39:21 +0100 Subject: [PATCH 1/3] Minor fixes and extend MCP server --- gslides_api/domain/domain.py | 17 +- gslides_api/element/base.py | 10 +- gslides_api/element/image.py | 17 + gslides_api/mcp/server.py | 336 +++++++++- tests/mcp_tests/test_copy_presentation.py | 136 +++++ tests/mcp_tests/test_new_tools.py | 572 ++++++++++++++++++ tests/test_absolute_size_position.py | 70 +++ tests/test_page_element.py | 5 +- .../test_replace_image_preserves_alt_text.py | 165 +++++ 9 files changed, 1296 insertions(+), 32 deletions(-) create mode 100644 tests/mcp_tests/test_copy_presentation.py create mode 100644 tests/mcp_tests/test_new_tools.py create mode 100644 tests/test_absolute_size_position.py create mode 100644 tests/test_replace_image_preserves_alt_text.py diff --git a/gslides_api/domain/domain.py b/gslides_api/domain/domain.py index fc10c55..9ce36d0 100644 --- a/gslides_api/domain/domain.py +++ b/gslides_api/domain/domain.py @@ -716,7 +716,7 @@ def _convert_emu_to_units(self, value_emu: float, units: OutputUnit) -> float: return from_emu(value_emu, units) - def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: + def absolute_size(self, units: OutputUnit) -> Optional[Tuple[float, float]]: """Calculate the absolute size of the element in the specified units. This method calculates the actual rendered size of the element, taking into @@ -728,18 +728,18 @@ def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: Returns: A tuple of (width, height) representing the element's dimensions - in the specified units. + in the specified units, or None if size or transform is not available + (e.g. for group container elements). Raises: ValueError: If units is not "cm" or "in". - ValueError: If element size is not available. """ if self.size is None: - raise ValueError("Element size is not available") + return None if self.transform is None: - raise ValueError("Element transform is not available") + return None # Extract width and height from size # Size can have width/height as either float or Dimension objects @@ -763,7 +763,7 @@ def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: return width_result, height_result - def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, float]: + def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Optional[Tuple[float, float]]: """Calculate the absolute position of the element on the page in the specified units. Position represents the distance of the top-left corner of the element @@ -775,11 +775,12 @@ def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, f Returns: A tuple of (x, y) representing the position in the specified units, where x is the horizontal distance from the left edge and y is the - vertical distance from the top edge of the slide. + vertical distance from the top edge of the slide, or None if + transform is not available (e.g. for group container elements). """ if self.transform is None: - raise ValueError("Element transform is not available") + return None # Extract position from transform (translateX, translateY are in EMUs) x_emu = self.transform.translateX diff --git a/gslides_api/element/base.py b/gslides_api/element/base.py index d0097e8..fa7c893 100644 --- a/gslides_api/element/base.py +++ b/gslides_api/element/base.py @@ -290,7 +290,7 @@ def to_markdown(self) -> str | None: """ raise NotImplementedError("Subclasses must implement to_markdown method") - def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: + def absolute_size(self, units: OutputUnit) -> Optional[Tuple[float, float]]: """Calculate the absolute size of the element in the specified units. This method calculates the actual rendered size of the element, taking into @@ -302,16 +302,15 @@ def absolute_size(self, units: OutputUnit) -> Tuple[float, float]: Returns: A tuple of (width, height) representing the element's dimensions - in the specified units. + in the specified units, or None if size/transform is not available. Raises: ValueError: If units is not "cm" or "in". - ValueError: If element size is not available. """ element_props = self.element_properties() return element_props.absolute_size(units) - def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, float]: + def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Optional[Tuple[float, float]]: """Calculate the absolute position of the element on the page in the specified units. Position represents the distance of the top-left corner of the element @@ -323,7 +322,8 @@ def absolute_position(self, units: OutputUnit = OutputUnit.CM) -> Tuple[float, f Returns: A tuple of (x, y) representing the position in the specified units, where x is the horizontal distance from the left edge and y is the - vertical distance from the top edge of the slide. + vertical distance from the top edge of the slide, or None if + transform is not available. """ element_props = self.element_properties() return element_props.absolute_position(units) diff --git a/gslides_api/element/image.py b/gslides_api/element/image.py index 7c0a566..e219efb 100644 --- a/gslides_api/element/image.py +++ b/gslides_api/element/image.py @@ -17,6 +17,7 @@ CreateImageRequest, ReplaceImageRequest, UpdateImagePropertiesRequest, + UpdatePageElementAltTextRequest, ) from gslides_api.utils import dict_to_dot_separated_field_list @@ -142,6 +143,8 @@ def replace_image( file=file, method=method, api_client=api_client, + title=image.title, + description=image.description, ) """ @@ -169,6 +172,8 @@ def replace_image_from_id( file: str | None = None, method: ImageReplaceMethod | None = None, api_client: Optional[GoogleAPIClient] = None, + title: str | None = None, + description: str | None = None, ): if url is None and file is None: raise ValueError("Must specify either url or file") @@ -180,6 +185,18 @@ def replace_image_from_id( url = client.upload_image_to_drive(file) requests = ImageElement._replace_image_requests(image_id, url, method) + + # Google Slides API replaceImage clears the alt-text title/description, + # so we restore them after the replacement. + if title is not None or description is not None: + requests.append( + UpdatePageElementAltTextRequest( + objectId=image_id, + title=title, + description=description, + ) + ) + return client.batch_update(requests, presentation_id) def get_image_data(self) -> ImageData: diff --git a/gslides_api/mcp/server.py b/gslides_api/mcp/server.py index d163892..4d048a0 100644 --- a/gslides_api/mcp/server.py +++ b/gslides_api/mcp/server.py @@ -28,9 +28,11 @@ ThumbnailSize, Weight, ) +from gslides_api.agnostic.element import MarkdownTableElement from gslides_api.element.base import ElementKind from gslides_api.element.element import ImageElement from gslides_api.element.shape import ShapeElement +from gslides_api.element.table import TableElement from gslides_api.presentation import Presentation from gslides_api.request.request import UpdateShapePropertiesRequest @@ -546,25 +548,20 @@ def write_element_markdown( return _format_response(None, presentation_error(pres_id, e)) -# ============================================================================= -# IMAGE TOOLS -# ============================================================================= - - @mcp.tool() -def replace_element_image( +def write_table_markdown( presentation_id_or_url: str, slide_name: str, element_name: str, - image_url: str, + markdown_table: str, ) -> str: - """Replace an image element with a new image from URL. + """Write a markdown-formatted table to a table element, resizing if needed. Args: presentation_id_or_url: Google Slides URL or presentation ID slide_name: Slide name (first line of speaker notes) - element_name: Element name (image alt-title) - image_url: URL of new image + element_name: Element name (table alt-title) + markdown_table: Markdown table string (with | delimiters and --- separator) """ try: pres_id = parse_presentation_id(presentation_id_or_url) @@ -589,6 +586,257 @@ def replace_element_image( client.flush_batch_update() return _format_response(None, element_not_found_error(pres_id, slide_name, element_name, available)) + # Check if it's a table element + if not isinstance(element, TableElement): + client.flush_batch_update() + return _format_response( + None, + validation_error( + "element_name", + f"Element '{element_name}' is not a table element (type: {element.type.value})", + element_name, + ), + ) + + # Parse the markdown table + markdown_elem = MarkdownTableElement.from_markdown(element_name, markdown_table) + + # Compare shapes and resize if needed + current_shape = (element.table.rows, element.table.columns) + target_shape = markdown_elem.shape + font_scale_factor = 1.0 + + if current_shape != target_shape: + font_scale_factor = element.resize( + target_shape[0], target_shape[1], api_client=client + ) + client.flush_batch_update() + + # Re-fetch presentation to get updated table structure after resize + presentation = Presentation.from_id(pres_id, api_client=client) + slide = find_slide_by_name(presentation, slide_name) + element = find_element_by_name(slide, element_name) + + # Generate and execute content update requests + requests = element.content_update_requests( + markdown_elem, check_shape=False, font_scale_factor=font_scale_factor + ) + client.batch_update(requests, pres_id) + client.flush_batch_update() + + result = SuccessResponse( + message=f"Successfully wrote table to element '{element_name}'", + details={ + "element_id": element.objectId, + "slide_name": slide_name, + "table_shape": list(target_shape), + "resized": current_shape != target_shape, + }, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error writing table markdown: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + +@mcp.tool() +def bulk_write_element_markdown( + presentation_id_or_url: str, + writes: str, +) -> str: + """Write markdown content to multiple shape elements in a single batch operation. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + writes: JSON string containing a list of write operations. + Each entry: {"slide_name": str, "element_name": str, "markdown": str} + """ + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + # Parse writes JSON + try: + write_list = json.loads(writes) + except json.JSONDecodeError as e: + return _format_response( + None, + validation_error("writes", f"Invalid JSON: {e}", writes[:200]), + ) + + if not isinstance(write_list, list): + return _format_response( + None, + validation_error("writes", "Expected a JSON array of write operations", type(write_list).__name__), + ) + + # Validate each entry has required keys + required_keys = {"slide_name", "element_name", "markdown"} + for i, entry in enumerate(write_list): + if not isinstance(entry, dict): + return _format_response( + None, + validation_error("writes", f"Entry {i} is not an object", str(entry)[:200]), + ) + missing = required_keys - set(entry.keys()) + if missing: + return _format_response( + None, + validation_error("writes", f"Entry {i} missing keys: {missing}", str(entry)[:200]), + ) + + client = get_api_client() + + try: + presentation = Presentation.from_id(pres_id, api_client=client) + + # Cache slides by name for efficient lookup + slides_by_name = {} + for slide in presentation.slides: + name = get_slide_name(slide) + if name: + slides_by_name[name] = slide + + successes = [] + failures = [] + + for entry in write_list: + slide_name = entry["slide_name"] + element_name = entry["element_name"] + markdown = entry["markdown"] + + try: + slide = slides_by_name.get(slide_name) + if slide is None: + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": f"Slide '{slide_name}' not found", + }) + continue + + element = find_element_by_name(slide, element_name) + if element is None: + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": f"Element '{element_name}' not found", + }) + continue + + if not isinstance(element, ShapeElement): + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": f"Element '{element_name}' is not a text element (type: {element.type.value})", + }) + continue + + element.write_text(markdown, as_markdown=True, api_client=client) + successes.append({ + "slide_name": slide_name, + "element_name": element_name, + "element_id": element.objectId, + }) + except Exception as entry_error: + failures.append({ + "slide_name": slide_name, + "element_name": element_name, + "error": str(entry_error), + }) + + client.flush_batch_update() + + result = SuccessResponse( + message=f"Bulk write completed: {len(successes)} succeeded, {len(failures)} failed", + details={ + "total": len(write_list), + "succeeded": len(successes), + "failed": len(failures), + "successes": successes, + "failures": failures, + }, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error in bulk write: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + +# ============================================================================= +# IMAGE TOOLS +# ============================================================================= + + +@mcp.tool() +def replace_element_image( + presentation_id_or_url: str, + slide_name: str, + element_name: str = None, + image_source: str = "", + element_id: str = None, +) -> str: + """Replace an image element with a new image from a URL or local file path. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + slide_name: Slide name (first line of speaker notes) + element_name: Element name (image alt-title). Either this or element_id must be provided. + image_source: URL (http/https) or local file path of the new image + element_id: Element object ID (alternative to element_name, for unnamed elements) + """ + if element_name is None and element_id is None: + return _format_response( + None, + validation_error("element_name", "Either element_name or element_id must be provided", None), + ) + + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + client = get_api_client() + + try: + presentation = Presentation.from_id(pres_id, api_client=client) + slide = find_slide_by_name(presentation, slide_name) + + if slide is None: + available = get_available_slide_names(presentation) + client.flush_batch_update() + return _format_response(None, slide_not_found_error(pres_id, slide_name, available)) + + # Find element by name or by ID + element = None + if element_id is not None: + for el in slide.page_elements_flat: + if el.objectId == element_id: + element = el + break + if element is None: + available = get_available_element_names(slide) + client.flush_batch_update() + return _format_response( + None, + validation_error( + "element_id", + f"No element found with ID '{element_id}' on slide '{slide_name}'", + element_id, + ), + ) + else: + element = find_element_by_name(slide, element_name) + if element is None: + available = get_available_element_names(slide) + client.flush_batch_update() + return _format_response(None, element_not_found_error(pres_id, slide_name, element_name, available)) + + display_name = element_name or element_id + # Check if it's an image element if not isinstance(element, ImageElement): client.flush_batch_update() @@ -596,21 +844,24 @@ def replace_element_image( None, validation_error( "element_name", - f"Element '{element_name}' is not an image element (type: {element.type.value})", - element_name, + f"Element '{display_name}' is not an image element (type: {element.type.value})", + display_name, ), ) - # Replace the image - element.replace_image(url=image_url, api_client=client) + # Replace the image - route to url= or file= based on source + if image_source.startswith(("http://", "https://")): + element.replace_image(url=image_source, api_client=client) + else: + element.replace_image(file=image_source, api_client=client) client.flush_batch_update() result = SuccessResponse( - message=f"Successfully replaced image in element '{element_name}'", + message=f"Successfully replaced image in element '{display_name}'", details={ "element_id": element.objectId, "slide_name": slide_name, - "new_image_url": image_url, + "image_source": image_source, }, ) return _format_response(result) @@ -779,6 +1030,59 @@ def delete_slide( return _format_response(None, presentation_error(pres_id, e)) +# ============================================================================= +# PRESENTATION MANIPULATION TOOLS +# ============================================================================= + + +@mcp.tool() +def copy_presentation( + presentation_id_or_url: str, + copy_title: str = None, + folder_id: str = None, +) -> str: + """Copy an entire presentation to create a new one. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + copy_title: Title for the copy (defaults to "Copy of {original title}") + folder_id: Google Drive folder ID to place the copy in (optional) + """ + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + client = get_api_client() + + try: + # Load presentation to get its title for the default copy name + presentation = Presentation.from_id(pres_id, api_client=client) + original_title = presentation.title or "Untitled" + + if copy_title is None: + copy_title = f"Copy of {original_title}" + + # Copy the presentation + copy_result = client.copy_presentation(pres_id, copy_title, folder_id) + new_pres_id = copy_result["id"] + + result = SuccessResponse( + message=f"Successfully copied presentation '{original_title}'", + details={ + "original_presentation_id": pres_id, + "new_presentation_id": new_pres_id, + "new_presentation_url": f"https://docs.google.com/presentation/d/{new_pres_id}/edit", + "new_title": copy_title, + }, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error copying presentation: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + # ============================================================================= # MAIN ENTRY POINT # ============================================================================= diff --git a/tests/mcp_tests/test_copy_presentation.py b/tests/mcp_tests/test_copy_presentation.py new file mode 100644 index 0000000..6e1e228 --- /dev/null +++ b/tests/mcp_tests/test_copy_presentation.py @@ -0,0 +1,136 @@ +"""Tests for the copy_presentation MCP tool.""" + +import json +from unittest.mock import Mock, patch + +import pytest + +from gslides_api.mcp.server import copy_presentation + + +@pytest.fixture +def mock_api_client(): + """Create a mock API client.""" + client = Mock() + client.copy_presentation.return_value = {"id": "new_pres_id_123"} + client.flush_batch_update.return_value = None + return client + + +@pytest.fixture +def mock_presentation(): + """Create a mock Presentation object.""" + pres = Mock() + pres.title = "My Presentation" + pres.presentationId = "original_pres_id" + return pres + + +class TestCopyPresentation: + """Tests for the copy_presentation tool.""" + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_default_title(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation with default title.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + result = json.loads(copy_presentation("original_pres_id")) + + assert result["success"] is True + assert result["message"] == "Successfully copied presentation 'My Presentation'" + assert result["details"]["original_presentation_id"] == "original_pres_id" + assert result["details"]["new_presentation_id"] == "new_pres_id_123" + assert result["details"]["new_title"] == "Copy of My Presentation" + assert "docs.google.com/presentation/d/new_pres_id_123/edit" in result["details"]["new_presentation_url"] + + mock_api_client.copy_presentation.assert_called_once_with("original_pres_id", "Copy of My Presentation", None) + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_custom_title(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation with a custom title.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + result = json.loads(copy_presentation("original_pres_id", copy_title="Custom Title")) + + assert result["success"] is True + assert result["details"]["new_title"] == "Custom Title" + mock_api_client.copy_presentation.assert_called_once_with("original_pres_id", "Custom Title", None) + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_folder_id(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation into a specific folder.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + result = json.loads(copy_presentation("original_pres_id", copy_title="In Folder", folder_id="folder_abc")) + + assert result["success"] is True + mock_api_client.copy_presentation.assert_called_once_with("original_pres_id", "In Folder", "folder_abc") + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_url_input(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test copying a presentation using a Google Slides URL.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + + url = "https://docs.google.com/presentation/d/abc123_xyz/edit" + result = json.loads(copy_presentation(url, copy_title="From URL")) + + assert result["success"] is True + mock_pres_class.from_id.assert_called_once_with("abc123_xyz", api_client=mock_api_client) + mock_api_client.copy_presentation.assert_called_once_with("abc123_xyz", "From URL", None) + + def test_copy_with_invalid_url(self): + """Test copying with an invalid Google Slides URL.""" + result = json.loads(copy_presentation("https://example.com/not-a-slides-url")) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_with_untitled_presentation(self, mock_get_client, mock_pres_class, mock_api_client): + """Test copying a presentation with no title defaults correctly.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.title = None + mock_pres.presentationId = "pres_no_title" + mock_pres_class.from_id.return_value = mock_pres + + result = json.loads(copy_presentation("pres_no_title")) + + assert result["success"] is True + assert result["details"]["new_title"] == "Copy of Untitled" + mock_api_client.copy_presentation.assert_called_once_with("pres_no_title", "Copy of Untitled", None) + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_presentation_api_error(self, mock_get_client, mock_pres_class, mock_api_client, mock_presentation): + """Test error handling when the API call fails.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = mock_presentation + mock_api_client.copy_presentation.side_effect = Exception("Drive API quota exceeded") + + result = json.loads(copy_presentation("original_pres_id", copy_title="Will Fail")) + + assert result["error"] is True + assert "PresentationError" in result["error_type"] + assert "Drive API quota exceeded" in result["message"] + + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_copy_presentation_load_error(self, mock_get_client, mock_pres_class, mock_api_client): + """Test error handling when loading the original presentation fails.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.side_effect = Exception("Presentation not found") + + result = json.loads(copy_presentation("nonexistent_id")) + + assert result["error"] is True + assert "Presentation not found" in result["message"] diff --git a/tests/mcp_tests/test_new_tools.py b/tests/mcp_tests/test_new_tools.py new file mode 100644 index 0000000..1981a70 --- /dev/null +++ b/tests/mcp_tests/test_new_tools.py @@ -0,0 +1,572 @@ +"""Tests for new MCP tools: replace_element_image (file paths), write_table_markdown, bulk_write_element_markdown.""" + +import json +from unittest.mock import Mock, patch, call + +import pytest + +from gslides_api.element.base import ElementKind +from gslides_api.mcp.server import ( + bulk_write_element_markdown, + replace_element_image, + write_table_markdown, +) + + +# ============================================================================= +# Fixtures +# ============================================================================= + + +@pytest.fixture +def mock_api_client(): + """Create a mock API client.""" + client = Mock() + client.flush_batch_update.return_value = None + client.batch_update.return_value = None + return client + + +@pytest.fixture +def mock_slide(): + """Create a mock slide with page_elements_flat.""" + slide = Mock() + slide.objectId = "slide_001" + return slide + + +@pytest.fixture +def mock_image_element(): + """Create a mock ImageElement.""" + from gslides_api.element.element import ImageElement + + element = Mock(spec=ImageElement) + element.objectId = "img_001" + element.type = ElementKind.IMAGE + element.replace_image = Mock() + return element + + +@pytest.fixture +def mock_shape_element(): + """Create a mock ShapeElement.""" + from gslides_api.element.shape import ShapeElement + + element = Mock(spec=ShapeElement) + element.objectId = "shape_001" + element.type = ElementKind.SHAPE + element.write_text = Mock() + return element + + +@pytest.fixture +def mock_table_element(): + """Create a mock TableElement.""" + from gslides_api.element.table import TableElement + + element = Mock(spec=TableElement) + element.objectId = "table_001" + element.type = ElementKind.TABLE + element.table = Mock() + element.table.rows = 3 + element.table.columns = 2 + element.resize = Mock(return_value=1.0) + element.content_update_requests = Mock(return_value=[]) + return element + + +@pytest.fixture +def mock_presentation(mock_slide): + """Create a mock presentation with one slide.""" + pres = Mock() + pres.slides = [mock_slide] + pres.presentationId = "pres_123" + return pres + + +# ============================================================================= +# Tests for replace_element_image (URL vs file path routing) +# ============================================================================= + + +class TestReplaceElementImageRouting: + """Test that replace_element_image routes URL vs file path correctly.""" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_url_routed_to_url_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that http/https URLs are passed as url= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "https://example.com/image.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + url="https://example.com/image.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_http_url_routed_to_url_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that http:// URLs are also routed to url= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "http://example.com/image.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + url="http://example.com/image.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_local_file_routed_to_file_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that local file paths are passed as file= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "/tmp/chart.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + file="/tmp/chart.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_relative_file_routed_to_file_param( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that relative file paths are passed as file= parameter.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "images/chart.png" + )) + + assert result["success"] is True + mock_image_element.replace_image.assert_called_once_with( + file="images/chart.png", api_client=mock_api_client + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_response_contains_image_source( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_image_element, + ): + """Test that the response includes image_source field.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_image_element + + result = json.loads(replace_element_image( + "pres_123", "slide1", "my_image", "/tmp/chart.png" + )) + + assert result["details"]["image_source"] == "/tmp/chart.png" + + +# ============================================================================= +# Tests for write_table_markdown +# ============================================================================= + + +class TestWriteTableMarkdown: + """Tests for the write_table_markdown tool.""" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_table_same_shape( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_table_element, + ): + """Test writing a table that matches the existing table shape (no resize).""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_table_element + + # Table has 3 rows, 2 columns - markdown matches + md_table = "| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |" + + with patch("gslides_api.mcp.server.MarkdownTableElement") as mock_mte: + mock_md_elem = Mock() + mock_md_elem.shape = (3, 2) + mock_mte.from_markdown.return_value = mock_md_elem + + result = json.loads(write_table_markdown("pres_123", "slide1", "my_table", md_table)) + + assert result["success"] is True + assert result["details"]["table_shape"] == [3, 2] + assert result["details"]["resized"] is False + mock_table_element.resize.assert_not_called() + mock_table_element.content_update_requests.assert_called_once_with( + mock_md_elem, check_shape=False, font_scale_factor=1.0 + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_table_with_resize( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_table_element, + ): + """Test writing a table that requires resizing.""" + mock_get_client.return_value = mock_api_client + + # After resize, re-fetch returns updated element + resized_table_element = Mock() + resized_table_element.objectId = "table_001" + resized_table_element.content_update_requests = Mock(return_value=[]) + + # First call returns original, second call returns resized + mock_pres_class.from_id.side_effect = [ + Mock(slides=[mock_slide]), + Mock(slides=[mock_slide]), + ] + mock_find_slide.return_value = mock_slide + mock_find_element.side_effect = [mock_table_element, resized_table_element] + + # Table has 3 rows, 2 cols but markdown has 4 rows, 3 cols + md_table = "| A | B | C |\n|---|---|---|\n| 1 | 2 | 3 |\n| 4 | 5 | 6 |\n| 7 | 8 | 9 |" + mock_table_element.resize.return_value = 0.8 + + with patch("gslides_api.mcp.server.MarkdownTableElement") as mock_mte: + mock_md_elem = Mock() + mock_md_elem.shape = (4, 3) + mock_mte.from_markdown.return_value = mock_md_elem + + result = json.loads(write_table_markdown("pres_123", "slide1", "my_table", md_table)) + + assert result["success"] is True + assert result["details"]["resized"] is True + assert result["details"]["table_shape"] == [4, 3] + mock_table_element.resize.assert_called_once_with(4, 3, api_client=mock_api_client) + resized_table_element.content_update_requests.assert_called_once_with( + mock_md_elem, check_shape=False, font_scale_factor=0.8 + ) + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.find_slide_by_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_table_not_a_table( + self, mock_get_client, mock_pres_class, mock_find_slide, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test error when element is not a table.""" + mock_get_client.return_value = mock_api_client + mock_pres_class.from_id.return_value = Mock(slides=[mock_slide]) + mock_find_slide.return_value = mock_slide + mock_find_element.return_value = mock_shape_element + + result = json.loads(write_table_markdown("pres_123", "slide1", "not_table", "| A |\n|---|\n| 1 |")) + + assert result["error"] is True + assert "not a table element" in result["message"] + + def test_write_table_invalid_presentation_url(self): + """Test error with invalid presentation URL.""" + result = json.loads(write_table_markdown( + "https://example.com/bad-url", "slide1", "table1", "| A |\n|---|\n| 1 |" + )) + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + +# ============================================================================= +# Tests for bulk_write_element_markdown +# ============================================================================= + + +class TestBulkWriteElementMarkdown: + """Tests for the bulk_write_element_markdown tool.""" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_successful_bulk_write( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test successful bulk write to multiple elements.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + mock_find_element.return_value = mock_shape_element + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title", "markdown": "# Hello"}, + {"slide_name": "slide1", "element_name": "body", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 2 + assert result["details"]["failed"] == 0 + assert mock_shape_element.write_text.call_count == 2 + + def test_invalid_json(self): + """Test error with invalid JSON input.""" + result = json.loads(bulk_write_element_markdown("pres_123", "not valid json{")) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + assert "Invalid JSON" in result["message"] + + def test_json_not_array(self): + """Test error when JSON is not an array.""" + result = json.loads(bulk_write_element_markdown("pres_123", '{"key": "value"}')) + + assert result["error"] is True + assert "Expected a JSON array" in result["message"] + + def test_missing_keys(self): + """Test error when entries are missing required keys.""" + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title"}, # missing "markdown" + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["error"] is True + assert "missing keys" in result["message"] + + def test_entry_not_object(self): + """Test error when an entry is not an object.""" + writes = json.dumps(["not an object"]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["error"] is True + assert "Entry 0 is not an object" in result["message"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_partial_failure_slide_not_found( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that missing slides are reported as failures without blocking others.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + mock_find_element.return_value = mock_shape_element + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title", "markdown": "# Hello"}, + {"slide_name": "nonexistent", "element_name": "body", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "not found" in result["details"]["failures"][0]["error"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_partial_failure_element_not_found( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that missing elements are reported as failures without blocking others.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + + # First call returns shape, second returns None (not found) + mock_find_element.side_effect = [mock_shape_element, None] + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "title", "markdown": "# Hello"}, + {"slide_name": "slide1", "element_name": "missing_elem", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "not found" in result["details"]["failures"][0]["error"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_partial_failure_wrong_element_type( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_table_element, mock_shape_element, + ): + """Test that non-shape elements are reported as failures.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + + # First returns table (wrong type), second returns shape (correct) + mock_find_element.side_effect = [mock_table_element, mock_shape_element] + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "my_table", "markdown": "# Hello"}, + {"slide_name": "slide1", "element_name": "title", "markdown": "World"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "not a text element" in result["details"]["failures"][0]["error"] + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_write_text_exception_captured( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that exceptions during write_text are captured per-element.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + + # Create two separate shape elements + from gslides_api.element.shape import ShapeElement + good_element = Mock(spec=ShapeElement) + good_element.objectId = "shape_good" + good_element.type = ElementKind.SHAPE + good_element.write_text = Mock() + + bad_element = Mock(spec=ShapeElement) + bad_element.objectId = "shape_bad" + bad_element.type = ElementKind.SHAPE + bad_element.write_text = Mock(side_effect=RuntimeError("API error")) + + mock_find_element.side_effect = [bad_element, good_element] + + writes = json.dumps([ + {"slide_name": "slide1", "element_name": "bad", "markdown": "fail"}, + {"slide_name": "slide1", "element_name": "good", "markdown": "succeed"}, + ]) + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + assert result["details"]["succeeded"] == 1 + assert result["details"]["failed"] == 1 + assert "API error" in result["details"]["failures"][0]["error"] + + def test_invalid_presentation_url(self): + """Test error with invalid presentation URL.""" + writes = json.dumps([{"slide_name": "s", "element_name": "e", "markdown": "m"}]) + result = json.loads(bulk_write_element_markdown("https://example.com/bad", writes)) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_empty_writes_list( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, + ): + """Test with an empty writes list.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [] + mock_pres_class.from_id.return_value = mock_pres + + result = json.loads(bulk_write_element_markdown("pres_123", "[]")) + + assert result["success"] is True + assert result["details"]["succeeded"] == 0 + assert result["details"]["failed"] == 0 + + @patch("gslides_api.mcp.server.find_element_by_name") + @patch("gslides_api.mcp.server.get_slide_name") + @patch("gslides_api.mcp.server.Presentation") + @patch("gslides_api.mcp.server.get_api_client") + def test_markdown_with_escaped_newlines( + self, mock_get_client, mock_pres_class, mock_get_slide_name, mock_find_element, + mock_api_client, mock_slide, mock_shape_element, + ): + """Test that JSON-escaped newlines in markdown are handled correctly.""" + mock_get_client.return_value = mock_api_client + mock_pres = Mock() + mock_pres.slides = [mock_slide] + mock_pres_class.from_id.return_value = mock_pres + mock_get_slide_name.return_value = "slide1" + mock_find_element.return_value = mock_shape_element + + # JSON with escaped newlines + writes = '[{"slide_name": "slide1", "element_name": "title", "markdown": "line1\\nline2\\nline3"}]' + + result = json.loads(bulk_write_element_markdown("pres_123", writes)) + + assert result["success"] is True + # Verify the markdown was passed with actual newlines + mock_shape_element.write_text.assert_called_once_with( + "line1\nline2\nline3", as_markdown=True, api_client=mock_api_client + ) diff --git a/tests/test_absolute_size_position.py b/tests/test_absolute_size_position.py new file mode 100644 index 0000000..e7e309f --- /dev/null +++ b/tests/test_absolute_size_position.py @@ -0,0 +1,70 @@ +"""Tests for absolute_size() and absolute_position() returning None when size/transform is missing.""" + +import pytest + +from gslides_api.agnostic.units import OutputUnit +from gslides_api.domain.domain import PageElementProperties, Size, Transform + + +class TestAbsoluteSizeNone: + """Test that absolute_size returns None when size or transform is missing.""" + + def test_returns_none_when_size_is_none(self): + props = PageElementProperties( + size=None, + transform=Transform(translateX=0, translateY=0, scaleX=1, scaleY=1), + ) + result = props.absolute_size(units=OutputUnit.CM) + assert result is None + + def test_returns_none_when_transform_is_none(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=None, + ) + result = props.absolute_size(units=OutputUnit.CM) + assert result is None + + def test_returns_none_when_both_missing(self): + props = PageElementProperties( + size=None, + transform=None, + ) + result = props.absolute_size(units=OutputUnit.CM) + assert result is None + + def test_returns_tuple_when_both_present(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=Transform(translateX=0, translateY=0, scaleX=1, scaleY=1), + ) + result = props.absolute_size(units=OutputUnit.IN) + assert result is not None + assert isinstance(result, tuple) + assert len(result) == 2 + assert result[0] == pytest.approx(1.0, abs=0.01) + assert result[1] == pytest.approx(1.0, abs=0.01) + + +class TestAbsolutePositionNone: + """Test that absolute_position returns None when transform is missing.""" + + def test_returns_none_when_transform_is_none(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=None, + ) + result = props.absolute_position(units=OutputUnit.CM) + assert result is None + + def test_returns_tuple_when_transform_present(self): + props = PageElementProperties( + size=Size(width=914400, height=914400), + transform=Transform(translateX=914400, translateY=457200, scaleX=1, scaleY=1), + ) + result = props.absolute_position(units=OutputUnit.IN) + assert result is not None + assert isinstance(result, tuple) + assert len(result) == 2 + assert result[0] == pytest.approx(1.0, abs=0.01) + assert result[1] == pytest.approx(0.5, abs=0.01) diff --git a/tests/test_page_element.py b/tests/test_page_element.py index 1806bf7..4daed24 100644 --- a/tests/test_page_element.py +++ b/tests/test_page_element.py @@ -280,7 +280,7 @@ def test_absolute_size_invalid_units(): def test_absolute_size_no_size(): - """Test absolute_size method when size is None.""" + """Test absolute_size method when size is None returns None.""" element = ShapeElement( objectId="test_id", size=None, @@ -288,8 +288,7 @@ def test_absolute_size_no_size(): shape=Shape(shapeType=Type.RECTANGLE, shapeProperties=ShapeProperties()), ) - with pytest.raises(ValueError, match="Element size is not available"): - element.absolute_size(OutputUnit.CM) + assert element.absolute_size(OutputUnit.CM) is None def test_alt_text_property(): diff --git a/tests/test_replace_image_preserves_alt_text.py b/tests/test_replace_image_preserves_alt_text.py new file mode 100644 index 0000000..6b7e948 --- /dev/null +++ b/tests/test_replace_image_preserves_alt_text.py @@ -0,0 +1,165 @@ +from unittest.mock import Mock, patch + +from gslides_api.domain.domain import Dimension, Image, ImageReplaceMethod, Size, Transform, Unit +from gslides_api.element.base import ElementKind +from gslides_api.element.image import ImageElement +from gslides_api.request.request import ReplaceImageRequest, UpdatePageElementAltTextRequest + + +class TestReplaceImagePreservesAltText: + """Test that replacing an image preserves the element's alt-text title and description.""" + + def _make_image_element(self, title=None, description=None): + image = Image( + contentUrl="https://example.com/old.png", + sourceUrl="https://example.com/old.png", + ) + transform = Transform(translateX=0, translateY=0, scaleX=1, scaleY=1) + size = Size( + width=Dimension(magnitude=914400, unit=Unit.EMU), + height=Dimension(magnitude=914400, unit=Unit.EMU), + ) + return ImageElement( + objectId="test-image-id", + image=image, + transform=transform, + size=size, + type=ElementKind.IMAGE, + title=title, + description=description, + presentation_id="test-pres-id", + slide_id="test-slide-id", + ) + + def test_replace_image_from_id_with_title_includes_alt_text_request(self): + """replace_image_from_id should include UpdatePageElementAltTextRequest when title is given.""" + mock_client = Mock() + mock_client.auto_flush = False + + # Capture the requests passed to batch_update + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + url="https://example.com/new.png", + api_client=mock_client, + title="my_chart", + description="Chart description", + ) + + assert len(captured_requests) == 2 + assert isinstance(captured_requests[0], ReplaceImageRequest) + assert isinstance(captured_requests[1], UpdatePageElementAltTextRequest) + + alt_text_req = captured_requests[1] + assert alt_text_req.objectId == "test-image-id" + assert alt_text_req.title == "my_chart" + assert alt_text_req.description == "Chart description" + + def test_replace_image_from_id_without_title_no_alt_text_request(self): + """replace_image_from_id should NOT include alt-text request when no title/description.""" + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + url="https://example.com/new.png", + api_client=mock_client, + ) + + assert len(captured_requests) == 1 + assert isinstance(captured_requests[0], ReplaceImageRequest) + + def test_replace_image_from_id_with_title_only(self): + """replace_image_from_id should handle title without description.""" + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + url="https://example.com/new.png", + api_client=mock_client, + title="my_chart", + ) + + assert len(captured_requests) == 2 + alt_text_req = captured_requests[1] + assert alt_text_req.title == "my_chart" + assert alt_text_req.description is None + + def test_replace_image_from_id_with_file_upload_preserves_title(self): + """replace_image_from_id should preserve title when using file upload.""" + mock_client = Mock() + mock_client.auto_flush = False + mock_client.upload_image_to_drive.return_value = "https://drive.google.com/uc?id=abc123" + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + ImageElement.replace_image_from_id( + image_id="test-image-id", + presentation_id="test-pres-id", + file="/path/to/chart.png", + api_client=mock_client, + title="chart_element", + ) + + mock_client.upload_image_to_drive.assert_called_once_with("/path/to/chart.png") + assert len(captured_requests) == 2 + assert isinstance(captured_requests[1], UpdatePageElementAltTextRequest) + assert captured_requests[1].title == "chart_element" + + def test_replace_image_instance_method_passes_title(self): + """ImageElement.replace_image() should pass self.title and self.description to replace_image_from_id.""" + element = self._make_image_element( + title="my_element_name", + description="my description", + ) + + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + element.replace_image( + url="https://example.com/new.png", + api_client=mock_client, + enforce_size=False, + ) + + assert len(captured_requests) == 2 + alt_text_req = captured_requests[1] + assert isinstance(alt_text_req, UpdatePageElementAltTextRequest) + assert alt_text_req.objectId == "test-image-id" + assert alt_text_req.title == "my_element_name" + assert alt_text_req.description == "my description" + + def test_replace_image_instance_method_no_title(self): + """ImageElement.replace_image() with no title/description should not add alt-text request.""" + element = self._make_image_element(title=None, description=None) + + mock_client = Mock() + mock_client.auto_flush = False + + captured_requests = [] + mock_client.batch_update.side_effect = lambda reqs, pres_id: captured_requests.extend(reqs) + + element.replace_image( + url="https://example.com/new.png", + api_client=mock_client, + enforce_size=False, + ) + + assert len(captured_requests) == 1 + assert isinstance(captured_requests[0], ReplaceImageRequest) From 74cc840cdfa8857d834d2ea1e2f08fb44e90204e Mon Sep 17 00:00:00 2001 From: Egor Kraev Date: Wed, 18 Mar 2026 11:18:02 +0100 Subject: [PATCH 2/3] Move all deck-related machinery to gslides-api --- gslides_api/adapters/__init__.py | 1 + gslides_api/adapters/abstract_slides.py | 670 +++++ gslides_api/adapters/gslides_adapter.py | 693 +++++ gslides_api/adapters/html_adapter.py | 1616 +++++++++++ gslides_api/adapters/markdown_to_html.py | 236 ++ gslides_api/adapters/pptx_adapter.py | 2351 +++++++++++++++++ gslides_api/agnostic/element_size.py | 21 + gslides_api/common/__init__.py | 1 + gslides_api/common/download.py | 88 + gslides_api/common/google_errors.py | 163 ++ gslides_api/common/log_time.py | 43 + gslides_api/common/retry.py | 160 ++ gslides_api/pptx/__init__.py | 1 + gslides_api/pptx/chart_renderer.py | 426 +++ gslides_api/pptx/converters.py | 441 ++++ gslides_api/pptx/id_manager.py | 195 ++ gslides_api/pptx/markdown_to_pptx.py | 730 +++++ gslides_api/pptx/relationship_copier.py | 442 ++++ gslides_api/pptx/shape_copier.py | 1071 ++++++++ gslides_api/pptx/slide_copier.py | 456 ++++ gslides_api/pptx/slide_deleter.py | 263 ++ gslides_api/pptx/xml_utils.py | 373 +++ poetry.lock | 280 +- pyproject.toml | 7 +- tests/test_adapters/__init__.py | 0 .../test_abstract_slide_markdown.py | 317 +++ ...est_gslides_adapter_discriminated_union.py | 410 +++ .../test_gslides_adapter_write_text.py | 93 + .../Samplead Master Deck Template.pptx | Bin 0 -> 3089394 bytes tests/test_pptx/__init__.py | 0 tests/test_pptx/test_markdown_to_pptx.py | 641 +++++ tests/test_pptx/test_pptx_adapter.py | 656 +++++ tests/test_pptx/test_pptx_adapter_slides.py | 315 +++ tests/test_pptx/test_pptx_autoscale.py | 246 ++ tests/test_pptx/test_pptx_bullets.py | 1183 +++++++++ tests/test_pptx/test_pptx_converters.py | 913 +++++++ tests/test_pptx/test_pptx_integration.py | 491 ++++ tests/test_pptx/test_pptx_roundtrip.py | 345 +++ tests/test_pptx/test_pptx_shape_styling.py | 1128 ++++++++ tests/test_pptx/test_pptx_slide_copying.py | 1390 ++++++++++ tests/test_pptx/test_pptx_table_roundtrip.py | 610 +++++ tests/test_pptx/test_pptx_text_extraction.py | 176 ++ .../test_pptx_text_frame_markdown.py | 445 ++++ tests/test_pptx/test_pptx_write_markdown.py | 398 +++ .../test_pptx/test_table_adapter_interface.py | 232 ++ 45 files changed, 20707 insertions(+), 10 deletions(-) create mode 100644 gslides_api/adapters/__init__.py create mode 100644 gslides_api/adapters/abstract_slides.py create mode 100644 gslides_api/adapters/gslides_adapter.py create mode 100644 gslides_api/adapters/html_adapter.py create mode 100644 gslides_api/adapters/markdown_to_html.py create mode 100644 gslides_api/adapters/pptx_adapter.py create mode 100644 gslides_api/agnostic/element_size.py create mode 100644 gslides_api/common/__init__.py create mode 100644 gslides_api/common/download.py create mode 100644 gslides_api/common/google_errors.py create mode 100644 gslides_api/common/log_time.py create mode 100644 gslides_api/common/retry.py create mode 100644 gslides_api/pptx/__init__.py create mode 100644 gslides_api/pptx/chart_renderer.py create mode 100644 gslides_api/pptx/converters.py create mode 100644 gslides_api/pptx/id_manager.py create mode 100644 gslides_api/pptx/markdown_to_pptx.py create mode 100644 gslides_api/pptx/relationship_copier.py create mode 100644 gslides_api/pptx/shape_copier.py create mode 100644 gslides_api/pptx/slide_copier.py create mode 100644 gslides_api/pptx/slide_deleter.py create mode 100644 gslides_api/pptx/xml_utils.py create mode 100644 tests/test_adapters/__init__.py create mode 100644 tests/test_adapters/test_abstract_slide_markdown.py create mode 100644 tests/test_adapters/test_gslides_adapter_discriminated_union.py create mode 100644 tests/test_adapters/test_gslides_adapter_write_text.py create mode 100644 tests/test_pptx/Samplead Master Deck Template.pptx create mode 100644 tests/test_pptx/__init__.py create mode 100644 tests/test_pptx/test_markdown_to_pptx.py create mode 100644 tests/test_pptx/test_pptx_adapter.py create mode 100644 tests/test_pptx/test_pptx_adapter_slides.py create mode 100644 tests/test_pptx/test_pptx_autoscale.py create mode 100644 tests/test_pptx/test_pptx_bullets.py create mode 100644 tests/test_pptx/test_pptx_converters.py create mode 100644 tests/test_pptx/test_pptx_integration.py create mode 100644 tests/test_pptx/test_pptx_roundtrip.py create mode 100644 tests/test_pptx/test_pptx_shape_styling.py create mode 100644 tests/test_pptx/test_pptx_slide_copying.py create mode 100644 tests/test_pptx/test_pptx_table_roundtrip.py create mode 100644 tests/test_pptx/test_pptx_text_extraction.py create mode 100644 tests/test_pptx/test_pptx_text_frame_markdown.py create mode 100644 tests/test_pptx/test_pptx_write_markdown.py create mode 100644 tests/test_pptx/test_table_adapter_interface.py diff --git a/gslides_api/adapters/__init__.py b/gslides_api/adapters/__init__.py new file mode 100644 index 0000000..a8a8fcf --- /dev/null +++ b/gslides_api/adapters/__init__.py @@ -0,0 +1 @@ +"""Slide adapters for various presentation formats (Google Slides, PPTX, HTML).""" diff --git a/gslides_api/adapters/abstract_slides.py b/gslides_api/adapters/abstract_slides.py new file mode 100644 index 0000000..9ef5831 --- /dev/null +++ b/gslides_api/adapters/abstract_slides.py @@ -0,0 +1,670 @@ +import logging +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Literal, Optional, Union + +if TYPE_CHECKING: + pass # Forward reference types handled via string annotations + +from pydantic import BaseModel, Field, PrivateAttr, model_validator + +from gslides_api.agnostic.domain import ImageData +from gslides_api.agnostic.element import MarkdownTableElement +from gslides_api.agnostic.units import OutputUnit + +from gslides_api.agnostic.element_size import ElementSizeMeta + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + pass + + +def _extract_font_size_pt(styles: list[Any] | None) -> float: + """Extract the dominant font size (in points) from a shape element's text styles. + + Handles both GSlides (RichStyle with font_size_pt) and PPTX + (StyleInfo dict with font_size that has a .pt attribute). + + Returns: + Font size in points, or 12.0 as fallback. + """ + if not styles: + return 12.0 + + font_sizes = [] + for style in styles: + if isinstance(style, dict): + # PPTX StyleInfo dict + fs = style.get("font_size") + if fs is not None and hasattr(fs, "pt"): + font_sizes.append(fs.pt) + else: + # GSlides RichStyle object + if hasattr(style, "font_size_pt") and style.font_size_pt is not None: + font_sizes.append(style.font_size_pt) + + return max(font_sizes) if font_sizes else 12.0 + + +def _extract_font_size_from_table(element: "AbstractTableElement") -> float: + """Extract the dominant font size (in points) from a table element's first cell. + + Handles both GSlides (TableElement with tableRows) and PPTX + (GraphicFrame with .table accessor). + + Returns: + Font size in points, or 10.0 as fallback. + """ + try: + if hasattr(element, "pptx_element") and element.pptx_element is not None: + # PPTX path + table = element.pptx_element.table + cell = table.cell(0, 0) + for para in cell.text_frame.paragraphs: + for run in para.runs: + if run.font.size is not None and hasattr(run.font.size, "pt"): + return run.font.size.pt + elif hasattr(element, "gslides_element") and element.gslides_element is not None: + # GSlides path — access table rows from the underlying gslides-api element + gslides_table = element.gslides_element + if hasattr(gslides_table, "table") and gslides_table.table is not None: + table_data = gslides_table.table + if table_data.tableRows: + first_row = table_data.tableRows[0] + if first_row.tableCells: + cell = first_row.tableCells[0] + # Cell text content has styles + if hasattr(cell, "text") and hasattr(cell.text, "textElements"): + for te in cell.text.textElements: + if hasattr(te, "textRun") and te.textRun is not None: + ts = te.textRun.style + if ts and hasattr(ts, "fontSize") and ts.fontSize: + return ts.fontSize.magnitude + except Exception: + pass + return 10.0 + + +# Supporting data classes +class AbstractThumbnail(BaseModel): + contentUrl: str + width: int + height: int + mime_type: str + content: Optional[bytes] = None + file_size: Optional[int] = None + + +class AbstractSlideProperties(BaseModel): + isSkipped: bool = False + + +class AbstractAltText(BaseModel): + title: str | None = None + description: str | None = None + + +class AbstractSpeakerNotes(BaseModel, ABC): + @abstractmethod + def read_text(self, as_markdown: bool = True) -> str: + pass + + @abstractmethod + def write_text(self, api_client: "AbstractSlidesAPIClient", content: str): + pass + + +class AbstractCredentials(BaseModel): + token: Optional[str] = None + refresh_token: Optional[str] = None + client_id: Optional[str] = None + client_secret: Optional[str] = None + token_uri: Optional[str] = None + + +class AbstractSize(BaseModel): + width: float = 0.0 + height: float = 0.0 + + +class AbstractPreprocessedSlide(BaseModel): + gslide: "AbstractSlide" + raw_metadata: str = "" + metadata: Optional[list[dict]] = None + named_elements: dict[str, "AbstractElement"] = Field(default_factory=dict) + + +# Enums +class AbstractThumbnailSize: + MEDIUM = "MEDIUM" + + +class AbstractElementKind: + IMAGE = "IMAGE" + SHAPE = "SHAPE" + TABLE = "TABLE" + + +# Core abstract classes +class AbstractSlidesAPIClient(ABC): + auto_flush: bool = True + + # TODO: remembering to call this is a chore, should we make this into a context manager? + @abstractmethod + def flush_batch_update(self): + pass + + @abstractmethod + def copy_presentation( + self, presentation_id: str, copy_title: str, folder_id: Optional[str] = None + ) -> dict: + pass + + @abstractmethod + def create_folder( + self, name: str, ignore_existing: bool = True, parent_folder_id: Optional[str] = None + ) -> dict: + pass + + @abstractmethod + def delete_file(self, file_id: str): + pass + + def trash_file(self, file_id: str): + """Move a file to trash. Defaults to delete_file for non-GSlides adapters.""" + self.delete_file(file_id) + + @abstractmethod + def set_credentials(self, credentials: AbstractCredentials): + pass + + @abstractmethod + def get_credentials(self) -> Optional[AbstractCredentials]: + pass + + @abstractmethod + def replace_text( + self, slide_ids: list[str], match_text: str, replace_text: str, presentation_id: str + ): + pass + + @classmethod + def get_default_api_client(cls) -> "AbstractSlidesAPIClient": + """Get the default API client wrapped in concrete implementation.""" + # TODO: This is a horrible, non-generalizable hack, will need to fix later + from gslides_api.adapters.gslides_adapter import GSlidesAPIClient + + return GSlidesAPIClient.get_default_api_client() + + @abstractmethod + def get_presentation_as_pdf(self, presentation_id: str) -> bytes: + pass + + +class AbstractElement(BaseModel, ABC): + objectId: str = "" + presentation_id: str = "" + slide_id: str = "" + alt_text: AbstractAltText = Field(default_factory=AbstractAltText) + type: str = "" + + # Parent references - not serialized, populated by parent validators + _parent_slide: Optional["AbstractSlide"] = PrivateAttr(default=None) + _parent_presentation: Optional["AbstractPresentation"] = PrivateAttr(default=None) + + def __eq__(self, other: object) -> bool: + """Custom equality that excludes parent references to avoid circular comparison.""" + if not isinstance(other, AbstractElement): + return False + # Compare only the public fields, not parent references + return self.model_dump() == other.model_dump() + + def __hash__(self) -> int: + """Hash based on objectId for use in sets/dicts.""" + return hash(self.objectId) + + @abstractmethod + def absolute_size(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + pass + + # @abstractmethod + # def element_properties(self) -> dict: + # pass + + @abstractmethod + def absolute_position(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + pass + + @abstractmethod + def create_image_element_like( + self, api_client: AbstractSlidesAPIClient + ) -> "AbstractImageElement": + pass + + @abstractmethod + def set_alt_text( + self, + api_client: AbstractSlidesAPIClient, + title: str | None = None, + description: str | None = None, + ): + pass + + +class AbstractShapeElement(AbstractElement): + type: Literal[AbstractElementKind.SHAPE] = AbstractElementKind.SHAPE + + @property + @abstractmethod + def has_text(self) -> bool: + pass + + @abstractmethod + def write_text( + self, api_client: AbstractSlidesAPIClient, content: str, autoscale: bool = False + ): + pass + + @abstractmethod + def read_text(self, as_markdown: bool = True) -> str: + pass + + @abstractmethod + def styles(self, skip_whitespace: bool = True) -> list[Any] | None: + pass + + +class AbstractImageElement(AbstractElement): + type: Literal[AbstractElementKind.IMAGE] = AbstractElementKind.IMAGE + + # @abstractmethod + # def replace_image(self, url: str, api_client: Optional[AbstractSlidesAPIClient] = None): + # pass + + @abstractmethod + def replace_image( + self, + api_client: AbstractSlidesAPIClient, + file: str | None = None, + url: str | None = None, + ): + pass + + +class AbstractTableElement(AbstractElement): + type: Literal[AbstractElementKind.TABLE] = AbstractElementKind.TABLE + + @abstractmethod + def resize( + self, + api_client: AbstractSlidesAPIClient, + rows: int, + cols: int, + fix_width: bool = True, + fix_height: bool = True, + target_height_in: float | None = None, + ) -> float: + """Resize the table. + + Returns: + Font scale factor (1.0 if no scaling, < 1.0 if rows were added with fix_height) + """ + pass + + @abstractmethod + def update_content( + self, + api_client: AbstractSlidesAPIClient, + markdown_content: MarkdownTableElement, + check_shape: bool = True, + font_scale_factor: float = 1.0, + ): + pass + + @abstractmethod + def to_markdown_element(self, name: str | None = None) -> MarkdownTableElement: + pass + + @abstractmethod + def get_horizontal_border_weight(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get weight of a single horizontal border in specified units.""" + pass + + @abstractmethod + def get_row_count(self) -> int: + """Get current number of rows.""" + pass + + @abstractmethod + def get_column_count(self) -> int: + """Get current number of columns.""" + pass + + def get_total_height_including_borders(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get total table height including borders. + + Returns: + Total height: sum of row heights + all horizontal border heights. + """ + _, row_heights_total = self.absolute_size(units=units) + border_weight = self.get_horizontal_border_weight(units=units) + num_borders = self.get_row_count() + 1 + return row_heights_total + (border_weight * num_borders) + + def get_max_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Calculate max allowed height based on elements below this table. + + Returns: + Max height in specified units. + + Raises: + RuntimeError: If parent references are not set (programming error). + """ + if self._parent_slide is None or self._parent_presentation is None: + raise RuntimeError( + f"Element {self.objectId} missing parent references. " + f"_parent_slide={self._parent_slide}, _parent_presentation={self._parent_presentation}. " + "This is a programming error - parent references should be set during slide creation." + ) + + slide = self._parent_slide + presentation = self._parent_presentation + + # Get this table's position and size + table_x, table_top_y = self.absolute_position(units=units) + table_width, table_height = self.absolute_size(units=units) + table_bottom_y = table_top_y + table_height + + # Find minimum Y of elements below the table + slide_height = presentation.slide_height(units=units) + min_y_below = slide_height + + for element in slide.page_elements_flat: + if element.objectId == self.objectId: + continue + + elem_x, elem_y = element.absolute_position(units=units) + + # Element is "below" if its top is at or below table's bottom + if elem_y >= table_bottom_y: + min_y_below = min(min_y_below, elem_y) + + return min_y_below - table_top_y + + +class AbstractSlide(BaseModel, ABC): + elements: list[AbstractElement] = Field( + description="The elements of the slide", default_factory=list + ) + objectId: str = "" + slideProperties: AbstractSlideProperties = Field(default_factory=AbstractSlideProperties) + speaker_notes: Optional[AbstractSpeakerNotes] = None + + # Parent reference for this slide + _parent_presentation: Optional["AbstractPresentation"] = PrivateAttr(default=None) + + def __eq__(self, other: object) -> bool: + """Custom equality that excludes parent references to avoid circular comparison.""" + if not isinstance(other, AbstractSlide): + return False + # Compare only the public fields, not parent references + return self.model_dump() == other.model_dump() + + def __hash__(self) -> int: + """Hash based on objectId for use in sets/dicts.""" + return hash(self.objectId) + + @model_validator(mode="after") + def _populate_element_parent_refs(self) -> "AbstractSlide": + """Populate parent references on elements after creation/deserialization.""" + for element in self.elements: + element._parent_slide = self + # _parent_presentation is set by presentation validator + return self + + @property + def name(self) -> str: + """Get the slide name from the speaker notes.""" + if not self.speaker_notes: + return "" + try: + return self.speaker_notes.read_text() + except Exception: + return "" + + @property + def page_elements_flat(self) -> list[AbstractElement]: + """Flatten the elements tree into a list.""" + return self.elements + + def markdown(self) -> str: + """Return a markdown representation of this slide's layout and content. + + Metadata (element type, position, size, char capacity) is embedded in + HTML comments following the gslides-api MarkdownSlideElement convention. + Text and table content appears as regular markdown below each comment. + """ + parts = [] + for element in self.page_elements_flat: + name = element.alt_text.title or element.objectId + x, y = element.absolute_position() + w, h = element.absolute_size() + + if isinstance(element, AbstractShapeElement) and element.has_text: + text = element.read_text(as_markdown=True) + try: + font_pt = _extract_font_size_pt(element.styles(skip_whitespace=True)) + except Exception: + font_pt = 12.0 + meta = ElementSizeMeta( + box_width_inches=w, box_height_inches=h, font_size_pt=font_pt, + ) + parts.append( + f"\n{text}" + ) + elif isinstance(element, AbstractTableElement): + md_elem = element.to_markdown_element(name=name) + table_md = md_elem.content.to_markdown() if md_elem and md_elem.content else "" + _rows, cols = md_elem.shape if md_elem else (0, 0) + # Estimate per-column char capacity (equal-width approximation) + col_chars_str = "" + if cols > 0: + font_pt = _extract_font_size_from_table(element) + col_width = w / cols + chars_per_col = int(col_width / (font_pt * 0.5 / 72)) + col_chars_str = f" | ~{chars_per_col} chars/col" + parts.append( + f"\n{table_md}" + ) + elif isinstance(element, AbstractImageElement): + parts.append( + f"" + ) + else: + parts.append( + f"" + ) + + return "\n\n".join(parts) + + @abstractmethod + def thumbnail( + self, api_client: AbstractSlidesAPIClient, size: str, include_data: bool = False + ) -> AbstractThumbnail: + pass + + def get_elements_by_alt_title(self, title: str) -> list[AbstractElement]: + return [e for e in self.page_elements_flat if e.alt_text.title == title] + + def __getitem__(self, item: str): + """Get element by alt title.""" + elements = self.get_elements_by_alt_title(item) + if not elements: + raise KeyError(f"Element with alt title {item} not found") + if len(elements) > 1: + raise KeyError(f"Multiple elements with alt title {item} found") + return elements[0] + + +class AbstractPresentation(BaseModel, ABC): + slides: list[AbstractSlide] = Field(default_factory=list) + presentationId: str | None = None + revisionId: str | None = None + title: str | None = None + + @model_validator(mode="after") + def _populate_slide_parent_refs(self) -> "AbstractPresentation": + """Populate parent references on slides/elements after creation/deserialization.""" + for slide in self.slides: + slide._parent_presentation = self + for element in slide.elements: + element._parent_presentation = self + return self + + @property + @abstractmethod + def url(self) -> str: + pass + + @abstractmethod + def slide_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Return slide height in specified units.""" + pass + + @abstractmethod + def save(self, api_client: "AbstractSlidesAPIClient") -> None: + """Save/persist all changes made to this presentation.""" + pass + + def __getitem__(self, item: str): + """Get slide by name.""" + for slide in self.slides: + if slide.name == item: + return slide + raise KeyError(f"Slide with name {item} not found") + + @classmethod + @abstractmethod + def from_id( + cls, api_client: AbstractSlidesAPIClient, presentation_id: str + ) -> "AbstractPresentation": + from gslides_api.adapters.gslides_adapter import GSlidesAPIClient, GSlidesPresentation + from gslides_api.adapters.html_adapter import HTMLAPIClient, HTMLPresentation + from gslides_api.adapters.pptx_adapter import PowerPointAPIClient, PowerPointPresentation + + if isinstance(api_client, GSlidesAPIClient): + return GSlidesPresentation.from_id(api_client, presentation_id) + elif isinstance(api_client, PowerPointAPIClient): + return PowerPointPresentation.from_id(api_client, presentation_id) + elif isinstance(api_client, HTMLAPIClient): + return HTMLPresentation.from_id(api_client, presentation_id) + else: + raise NotImplementedError("Only gslides, pptx, and html clients are supported") + + @abstractmethod + def copy_via_drive( + self, + api_client: AbstractSlidesAPIClient, + copy_title: str, + folder_id: Optional[str] = None, + ) -> "AbstractPresentation": + pass + + @abstractmethod + def sync_from_cloud(self, api_client: AbstractSlidesAPIClient): + pass + + @abstractmethod + def insert_copy( + self, + source_slide: "AbstractSlide", + api_client: AbstractSlidesAPIClient, + insertion_index: int | None = None, + ) -> "AbstractSlide": + pass + + @abstractmethod + def delete_slide(self, slide: Union["AbstractSlide", int], api_client: AbstractSlidesAPIClient): + """Delete a slide from the presentation by reference or index.""" + pass + + @abstractmethod + def delete_slides( + self, slides: list[Union["AbstractSlide", int]], api_client: AbstractSlidesAPIClient + ): + """Delete multiple slides from the presentation by reference or index.""" + pass + + @abstractmethod + def move_slide( + self, + slide: Union["AbstractSlide", int], + insertion_index: int, + api_client: AbstractSlidesAPIClient, + ): + """Move a slide to a new position within the presentation.""" + pass + + @abstractmethod + def duplicate_slide( + self, slide: Union["AbstractSlide", int], api_client: AbstractSlidesAPIClient + ) -> "AbstractSlide": + """Duplicate a slide within the presentation.""" + pass + + async def get_slide_thumbnails( + self, + api_client: "AbstractSlidesAPIClient", + slides: Optional[list["AbstractSlide"]] = None, + ) -> list["AbstractThumbnail"]: + """Get thumbnails for slides in this presentation. + + Default implementation loops through each slide's thumbnail() method. + Subclasses can override for more efficient batch implementations + (e.g., HTML using single Playwright session, PPTX using single PDF conversion). + + Args: + api_client: The API client for slide operations + slides: Optional list of slides to get thumbnails for. If None, uses all slides. + + Returns: + List of AbstractThumbnail objects with image data + """ + target_slides = slides if slides is not None else self.slides + thumbnails = [] + + for slide in target_slides: + thumb = slide.thumbnail( + api_client=api_client, + size=AbstractThumbnailSize.MEDIUM, + include_data=True, + ) + + # Ensure file_size is set if content is available + if thumb.content and thumb.file_size is None: + thumb = AbstractThumbnail( + contentUrl=thumb.contentUrl, + width=thumb.width, + height=thumb.height, + mime_type=thumb.mime_type, + content=thumb.content, + file_size=len(thumb.content), + ) + + thumbnails.append(thumb) + + return thumbnails + + +# class AbstractLayoutMatcher(ABC): +# """Abstract matcher for finding slide layouts that match given criteria.""" +# +# @abstractmethod +# def __init__(self, presentation: AbstractPresentation, matching_rule: Optional[str] = None): +# pass +# +# @abstractmethod +# def match(self, layout, matching_rule: Optional[str] = None) -> list[AbstractPreprocessedSlide]: +# pass diff --git a/gslides_api/adapters/gslides_adapter.py b/gslides_api/adapters/gslides_adapter.py new file mode 100644 index 0000000..2382240 --- /dev/null +++ b/gslides_api/adapters/gslides_adapter.py @@ -0,0 +1,693 @@ +""" +Concrete implementation of abstract slides using gslides-api. +This module provides the actual implementation that maps abstract slide operations to gslides-api calls. +""" + +import io +import logging +from typing import Annotated, Any, Optional, Union + +import httpx +from pydantic import BaseModel, Discriminator, Field, Tag, TypeAdapter, model_validator + +import gslides_api +from gslides_api import GoogleAPIClient, Presentation +from gslides_api import Slide as GSlide +from gslides_api.agnostic.element import MarkdownTableElement +from gslides_api.agnostic.units import EMU_PER_CM, EMU_PER_INCH, OutputUnit, from_emu +from gslides_api.domain.domain import ThumbnailSize +from gslides_api.domain.request import SubstringMatchCriteria +from gslides_api.domain.text import TextStyle +from gslides_api.element.base import ElementKind, PageElementBase +from gslides_api.element.element import ImageElement, PageElement, TableElement +from gslides_api.element.shape import ShapeElement +from gslides_api.page.notes import Notes +from gslides_api.request.parent import GSlidesAPIRequest +from gslides_api.request.request import ReplaceAllTextRequest + +from gslides_api.common.log_time import log_time +from gslides_api.common.retry import retry +from gslides_api.adapters.abstract_slides import ( + AbstractAltText, + AbstractElement, + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractSlide, + AbstractSlideProperties, + AbstractSlidesAPIClient, + AbstractSpeakerNotes, + AbstractTableElement, + AbstractThumbnail, +) + +logger = logging.getLogger(__name__) + + +def concrete_element_discriminator(v: Any) -> str: + """Discriminator to determine which ConcreteElement subclass based on type field.""" + if hasattr(v, "type"): + # Handle ElementKind enum + element_type = v.type + if hasattr(element_type, "value"): + element_type = element_type.value + + if element_type in ["SHAPE", "shape"]: + return "shape" + elif element_type in ["IMAGE", "image"]: + return "image" + elif element_type in ["TABLE", "table"]: + return "table" + else: + # Fallback for other element types (LINE, VIDEO, WORD_ART, etc.) + return "generic" + + raise ValueError(f"Cannot determine element type from: {v}") + + +class GSlidesAPIClient(AbstractSlidesAPIClient): + def __init__(self, gslides_client: GoogleAPIClient | None = None): + if gslides_client is None: + gslides_client = gslides_api.client.api_client + self.gslides_client = gslides_client + + @property + def auto_flush(self): + return self.gslides_client.auto_flush + + @auto_flush.setter + def auto_flush(self, value: bool): + self.gslides_client.auto_flush = value + + def flush_batch_update(self): + pending_count = len(self.gslides_client.pending_batch_requests) + pending_presentation = self.gslides_client.pending_presentation_id + logger.info( + f"FLUSH_BATCH_UPDATE: {pending_count} pending requests " + f"for presentation {pending_presentation}" + ) + result = self.gslides_client.flush_batch_update() + replies = result.get("replies", []) if result else [] + logger.info( + f"FLUSH_BATCH_UPDATE: completed, {len(replies)} replies, " + f"result keys: {result.keys() if result else 'None'}" + ) + # Log any non-empty replies (errors or meaningful responses) + for i, reply in enumerate(replies): + if reply: # Non-empty reply + logger.debug(f"FLUSH_BATCH_UPDATE reply[{i}]: {reply}") + return result + + def copy_presentation( + self, presentation_id: str, copy_title: str, folder_id: Optional[str] = None + ) -> dict: + return self.gslides_client.copy_presentation( + presentation_id=presentation_id, copy_title=copy_title, folder_id=folder_id + ) + + def create_folder( + self, name: str, ignore_existing: bool = True, parent_folder_id: Optional[str] = None + ) -> dict: + return self.gslides_client.create_folder( + name, ignore_existing=ignore_existing, parent_folder_id=parent_folder_id + ) + + def delete_file(self, file_id: str): + self.gslides_client.delete_file(file_id) + + def trash_file(self, file_id: str): + self.gslides_client.trash_file(file_id) + + def upload_image_to_drive(self, image_path: str) -> str: + return self.gslides_client.upload_image_to_drive(image_path) + + def set_credentials(self, credentials): + from google.oauth2.credentials import Credentials + + # Store the abstract credentials for later retrieval + self._abstract_credentials = credentials + # Convert abstract credentials to concrete Google credentials + google_creds = Credentials( + token=credentials.token, + refresh_token=credentials.refresh_token, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + token_uri=credentials.token_uri, + ) + self.gslides_client.set_credentials(google_creds) + + def get_credentials(self): + return getattr(self, "_abstract_credentials", None) + + def initialize_credentials(self, credential_location: str) -> None: + gslides_api.initialize_credentials(credential_location) + + def replace_text( + self, slide_ids: list[str], match_text: str, replace_text: str, presentation_id: str + ): + requests = [ + ReplaceAllTextRequest( + pageObjectIds=slide_ids, + containsText=SubstringMatchCriteria(text=match_text), + replaceText=replace_text, + ) + ] + self.gslides_client.batch_update(requests, presentation_id) + + # Factory functions + @classmethod + def get_default_api_client(cls) -> "GSlidesAPIClient": + """Get the default API client wrapped in concrete implementation.""" + return cls(gslides_api.client.api_client) + + @log_time + async def get_presentation_as_pdf(self, presentation_id: str) -> io.BytesIO: + api_client = self.gslides_client + request = api_client.drive_service.files().export_media( + fileId=presentation_id, mimeType="application/pdf" + ) + + async with httpx.AsyncClient() as client: + response = await retry( + client.get, + args=(request.uri,), + kwargs=dict(headers={"Authorization": f"Bearer {api_client.crdtls.token}"}), + max_attempts=3, + initial_delay=1.0, + max_delay=30.0, + ) + response.raise_for_status() + return io.BytesIO(response.content) + + +class GSlidesSpeakerNotes(AbstractSpeakerNotes): + def __init__(self, gslides_speaker_notes: Notes): + super().__init__() + self._gslides_speaker_notes = gslides_speaker_notes + + def read_text(self, as_markdown: bool = True) -> str: + return self._gslides_speaker_notes.read_text(as_markdown=as_markdown) + + def write_text(self, api_client: GSlidesAPIClient, content: str): + self._gslides_speaker_notes.write_text(content, api_client=api_client.gslides_client) + + +class GSlidesElementParent(AbstractElement): + """Generic concrete element for unsupported element types (LINE, VIDEO, etc.).""" + + gslides_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + # Accept any PageElement + gslides_element = data + + return { + "objectId": gslides_element.objectId, + "presentation_id": gslides_element.presentation_id, + "slide_id": gslides_element.slide_id, + "alt_text": AbstractAltText( + title=gslides_element.alt_text.title, + description=gslides_element.alt_text.description, + ), + "type": ( + str(gslides_element.type.value) + if hasattr(gslides_element.type, "value") + else str(gslides_element.type) + ), + "gslides_element": gslides_element, + } + + def absolute_size(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + return self.gslides_element.absolute_size(units=units) + + # def element_properties(self) -> dict: + # return self.gslides_element.element_properties() + + def absolute_position(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + return self.gslides_element.absolute_position(units=units) + + def to_markdown_element(self, name: str | None = None) -> Any: + if name is None: + # Different element types have different default names + return self.gslides_element.to_markdown_element() + else: + return self.gslides_element.to_markdown_element(name=name) + + def create_image_element_like(self, api_client: GSlidesAPIClient) -> "GSlidesImageElement": + gslides_element = self.gslides_element.create_image_element_like( + api_client=api_client.gslides_client + ) + return GSlidesImageElement(gslides_element=gslides_element) + + def set_alt_text( + self, + api_client: GSlidesAPIClient, + title: str | None = None, + description: str | None = None, + ): + self.gslides_element.set_alt_text( + title=title, description=description, api_client=api_client.gslides_client + ) + + +class GSlidesShapeElement(AbstractShapeElement, GSlidesElementParent): + gslides_element: ShapeElement = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + if isinstance(data, ShapeElement): + gslides_shape = data + elif hasattr(data, "shape"): # It's a PageElement with shape + gslides_shape = data + else: + raise ValueError(f"Expected ShapeElement or PageElement with shape, got {type(data)}") + + return { + "objectId": gslides_shape.objectId, + "presentation_id": gslides_shape.presentation_id, + "slide_id": gslides_shape.slide_id, + "alt_text": AbstractAltText( + title=gslides_shape.alt_text.title, + description=gslides_shape.alt_text.description, + ), + "gslides_element": gslides_shape, + } + + @property + def has_text(self) -> bool: + return self.gslides_element.has_text + + def write_text(self, api_client: GSlidesAPIClient, content: str, autoscale: bool = False): + # Extract styles BEFORE writing to preserve original element styling. + # skip_whitespace=True avoids picking up invisible spacer styles (e.g. white theme colors). + # If no non-whitespace styles exist, TextContent.styles() falls back to including whitespace. + styles = self.gslides_element.styles(skip_whitespace=True) + logger.debug( + f"GSlidesShapeElement.write_text: objectId={self.objectId}, " + f"content={repr(content[:100] if content else None)}, autoscale={autoscale}, " + f"styles_count={len(styles) if styles else 0}" + ) + result = self.gslides_element.write_text( + content, + autoscale=autoscale, + styles=styles, + api_client=api_client.gslides_client, + ) + logger.debug(f"GSlidesShapeElement.write_text: result={result}") + + def read_text(self, as_markdown: bool = True) -> str: + return self.gslides_element.read_text(as_markdown=as_markdown) + + def styles(self, skip_whitespace: bool = True) -> list[TextStyle] | None: + return self.gslides_element.styles(skip_whitespace=skip_whitespace) + + +class GSlidesImageElement(AbstractImageElement, GSlidesElementParent): + gslides_element: ImageElement = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + if isinstance(data, dict): + if "gslides_element" in data: + data = data["gslides_element"] + else: + raise ValueError(f"Need to supply gslides_element in dict, got {data}") + + if isinstance(data, ImageElement): + gslides_image = data + elif hasattr(data, "image"): # It's a PageElement with image + gslides_image = data + else: + raise ValueError(f"Expected ImageElement or PageElement with image, got {type(data)}") + + return { + "objectId": gslides_image.objectId, + "presentation_id": gslides_image.presentation_id, + "slide_id": gslides_image.slide_id, + "alt_text": AbstractAltText( + title=gslides_image.alt_text.title, + description=gslides_image.alt_text.description, + ), + "gslides_element": gslides_image, + } + + def replace_image( + self, + api_client: GSlidesAPIClient, + file: str | None = None, + url: str | None = None, + ): + # Clear cropProperties to avoid Google Slides API error: + # "CropProperties offsets cannot be updated individually" + # This happens when recreating elements that have partial crop settings. + if ( + self.gslides_element.image + and self.gslides_element.image.imageProperties + and hasattr(self.gslides_element.image.imageProperties, "cropProperties") + ): + self.gslides_element.image.imageProperties.cropProperties = None + + new_element = self.gslides_element.replace_image( + file=file, + url=url, + api_client=api_client.gslides_client, + enforce_size="auto", + recreate_element=True, + ) + if new_element is not None: + self.gslides_element = new_element + + def get_image_data(self): + # Get image data from the gslides image and return it directly + return self.gslides_element.get_image_data() + + +class GSlidesTableElement(AbstractTableElement, GSlidesElementParent): + gslides_element: TableElement = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_page_element(cls, data: Any) -> dict: + if isinstance(data, TableElement): + gslides_table = data + elif hasattr(data, "table"): # It's a PageElement with table + gslides_table = data + else: + raise ValueError(f"Expected TableElement or PageElement with table, got {type(data)}") + + return { + "objectId": gslides_table.objectId, + "presentation_id": gslides_table.presentation_id, + "slide_id": gslides_table.slide_id, + "alt_text": AbstractAltText( + title=gslides_table.alt_text.title, + description=gslides_table.alt_text.description, + ), + "gslides_element": gslides_table, + } + + def resize( + self, + api_client: GSlidesAPIClient, + rows: int, + cols: int, + fix_width: bool = True, + fix_height: bool = True, + target_height_in: float | None = None, + ) -> float: + """Resize the table. + + Args: + target_height_in: If provided, constrain total table height (rows + borders) + to this value in inches. Scales both row heights and border + weights proportionally. + + Returns: + Font scale factor (1.0 if no scaling, < 1.0 if rows were added with fix_height) + """ + target_height_emu = None + if target_height_in is not None: + target_height_emu = target_height_in * EMU_PER_INCH + + requests, font_scale_factor = self.gslides_element.resize_requests( + rows, + cols, + fix_width=fix_width, + fix_height=fix_height, + target_height_emu=target_height_emu, + ) + api_client.gslides_client.batch_update(requests, self.presentation_id) + return font_scale_factor + + def get_horizontal_border_weight(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get weight of horizontal borders in specified units.""" + return self.gslides_element.get_horizontal_border_weight(units=units) + + def get_row_count(self) -> int: + """Get current number of rows.""" + return self.gslides_element.table.rows + + def get_column_count(self) -> int: + """Get current number of columns.""" + return self.gslides_element.table.columns + + def update_content( + self, + api_client: GSlidesAPIClient, + markdown_content: MarkdownTableElement, + check_shape: bool = True, + font_scale_factor: float = 1.0, + ): + requests = self.gslides_element.content_update_requests( + markdown_content, check_shape=check_shape, font_scale_factor=font_scale_factor + ) + api_client.gslides_client.batch_update(requests, self.presentation_id) + + def to_markdown_element(self, name: str | None = None) -> Any: + return self.gslides_element.to_markdown_element(name=name) + + +# Discriminated union type for concrete elements +GSlidesElement = Annotated[ + Union[ + Annotated[GSlidesShapeElement, Tag("shape")], + Annotated[GSlidesImageElement, Tag("image")], + Annotated[GSlidesTableElement, Tag("table")], + Annotated[GSlidesElementParent, Tag("generic")], + ], + Discriminator(concrete_element_discriminator), +] + +# TypeAdapter for validating the discriminated union +_concrete_element_adapter = TypeAdapter(GSlidesElement) + + +def validate_concrete_element(page_element: PageElement) -> GSlidesElement: + """Create the appropriate concrete element from a PageElement.""" + return _concrete_element_adapter.validate_python(page_element) + + +class GSlidesSlide(AbstractSlide): + def __init__(self, gslides_slide: GSlide): + # Convert gslides elements to abstract elements, skipping group containers. + # unroll_group_elements includes both the group wrapper and its children; + # children are real elements while the group container has no renderable + # content and may lack size/transform, causing downstream crashes. + elements = [] + for element in gslides_slide.page_elements_flat: + if element.type == ElementKind.GROUP: + continue + concrete_element = validate_concrete_element(element) + elements.append(concrete_element) + + super().__init__( + elements=elements, + objectId=gslides_slide.objectId, + slideProperties=AbstractSlideProperties( + isSkipped=gslides_slide.slideProperties.isSkipped or False + ), + speaker_notes=GSlidesSpeakerNotes(gslides_slide.speaker_notes), + ) + self._gslides_slide = gslides_slide + + def thumbnail( + self, api_client: GSlidesAPIClient, size: str, include_data: bool = False + ) -> AbstractThumbnail: + """Get thumbnail for a Google Slides slide. + + Args: + api_client: The Google Slides API client + size: The thumbnail size (e.g., "MEDIUM") + include_data: If True, downloads the thumbnail image data with retry logic + + Returns: + AbstractThumbnail with metadata and optionally the image content + """ + import http + import ssl + + from gslides_api.common.google_errors import detect_file_access_denied_error + from gslides_api.common.download import download_binary_file + + # Map size string to ThumbnailSize enum + thumbnail_size = getattr(ThumbnailSize, size, ThumbnailSize.MEDIUM) + + # Fetch thumbnail metadata with retry + @retry( + max_attempts=6, + initial_delay=1.0, + max_delay=15.0, + exceptions=( + TimeoutError, + httpx.TimeoutException, + httpx.RequestError, + ConnectionError, + ssl.SSLError, + http.client.ResponseNotReady, + http.client.IncompleteRead, + ), + ) + def fetch_thumbnail(): + return self._gslides_slide.thumbnail( + size=thumbnail_size, api_client=api_client.gslides_client + ) + + try: + gslides_thumbnail = fetch_thumbnail() + except Exception as e: + # Check if this is a file access denied error (drive.file scope) + detect_file_access_denied_error(error=e, file_id=self.presentation_id) + raise + + # Handle mime_type format: Google Slides returns "png", we need "image/png" + mime_type = ( + gslides_thumbnail.mime_type + if gslides_thumbnail.mime_type.startswith("image/") + else f"image/{gslides_thumbnail.mime_type}" + ) + + content = gslides_thumbnail.payload if include_data else None + file_size = len(content) if content else None + + return AbstractThumbnail( + contentUrl=gslides_thumbnail.contentUrl, + width=gslides_thumbnail.width, + height=gslides_thumbnail.height, + mime_type=mime_type, + content=content, + file_size=file_size, + ) + + +class GSlidesPresentation(AbstractPresentation): + def __init__(self, gslides_presentation: Presentation): + # Convert gslides slides to abstract slides + slides = [GSlidesSlide(slide) for slide in gslides_presentation.slides] + + super().__init__( + slides=slides, + url=gslides_presentation.url, + presentationId=getattr(gslides_presentation, "presentationId", ""), + revisionId=getattr(gslides_presentation, "revisionId", ""), + title=gslides_presentation.title, + ) + self._gslides_presentation = gslides_presentation + + @property + def url(self) -> str: + return self._gslides_presentation.url + + def slide_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Return slide height in specified units.""" + height_dim = self._gslides_presentation.pageSize.height + height_emu = height_dim.magnitude if hasattr(height_dim, "magnitude") else float(height_dim) + return from_emu(height_emu, units) + + @classmethod + def from_id(cls, api_client: GSlidesAPIClient, presentation_id: str) -> "GSlidesPresentation": + from gslides_api.common.google_errors import detect_file_access_denied_error + + try: + gslides_presentation = Presentation.from_id( + presentation_id, api_client=api_client.gslides_client + ) + return cls(gslides_presentation) + except Exception as e: + # Check if this is a file access denied error (drive.file scope) + detect_file_access_denied_error(error=e, file_id=presentation_id) + # If not a file access denied error, re-raise the original exception + raise + + def copy_via_drive( + self, + api_client: GSlidesAPIClient, + copy_title: str, + folder_id: Optional[str] = None, + ) -> "GSlidesPresentation": + from gslides_api.common.google_errors import detect_file_access_denied_error + + try: + copied_presentation = self._gslides_presentation.copy_via_drive( + copy_title=copy_title, api_client=api_client.gslides_client, folder_id=folder_id + ) + return GSlidesPresentation(copied_presentation) + except Exception as e: + # Check if this is a file access denied error (drive.file scope) + detect_file_access_denied_error(error=e, file_id=self.presentationId) + raise + + def insert_copy( + self, + source_slide: GSlidesSlide, + api_client: GSlidesAPIClient, + insertion_index: int | None = None, + ): + # Use the new duplicate_slide method + new_gslide = self.duplicate_slide(source_slide, api_client) + if insertion_index is not None: + self.move_slide(new_gslide, insertion_index, api_client) + return new_gslide + + def sync_from_cloud(self, api_client: GSlidesAPIClient): + self._gslides_presentation.sync_from_cloud(api_client=api_client.gslides_client) + # Rebuild the GSlidesSlide wrappers so they reflect the refreshed state + # (e.g. new objectIds for speaker notes elements after slide duplication). + self.slides = [GSlidesSlide(slide) for slide in self._gslides_presentation.slides] + self.presentationId = getattr(self._gslides_presentation, "presentationId", "") + self.revisionId = getattr(self._gslides_presentation, "revisionId", "") + + def save(self, api_client: GSlidesAPIClient) -> None: + """Save/persist all changes made to this presentation.""" + api_client.flush_batch_update() + + def delete_slide(self, slide: Union[GSlidesSlide, int], api_client: GSlidesAPIClient): + """Delete a slide from the presentation by reference or index.""" + if isinstance(slide, int): + slide = self.slides[slide] + if isinstance(slide, GSlidesSlide): + # Use the existing delete logic from GSlidesSlide + slide._gslides_slide.delete(api_client=api_client.gslides_client) + # Remove from our slides list + self.slides.remove(slide) + + def delete_slides(self, slides: list[Union[GSlidesSlide, int]], api_client: GSlidesAPIClient): + for slide in slides: + self.delete_slide(slide, api_client) + + def move_slide( + self, slide: Union[GSlidesSlide, int], insertion_index: int, api_client: GSlidesAPIClient + ): + """Move a slide to a new position within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + if isinstance(slide, GSlidesSlide): + # Use the existing move logic from GSlidesSlide + slide._gslides_slide.move( + insertion_index=insertion_index, api_client=api_client.gslides_client + ) + # Update local slides list order + self.slides.remove(slide) + self.slides.insert(insertion_index, slide) + + def duplicate_slide( + self, slide: Union[GSlidesSlide, int], api_client: GSlidesAPIClient + ) -> GSlidesSlide: + """Duplicate a slide within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + if isinstance(slide, GSlidesSlide): + # Use the existing duplicate logic from GSlidesSlide + duplicated = slide._gslides_slide.duplicate(api_client=api_client.gslides_client) + new_slide = GSlidesSlide(duplicated) + self.slides.append(new_slide) + # Manually set parent refs since validator only runs at construction + new_slide._parent_presentation = self + for element in new_slide.elements: + element._parent_presentation = self + return new_slide + else: + raise ValueError("slide must be a GSlidesSlide or int") diff --git a/gslides_api/adapters/html_adapter.py b/gslides_api/adapters/html_adapter.py new file mode 100644 index 0000000..9e72abe --- /dev/null +++ b/gslides_api/adapters/html_adapter.py @@ -0,0 +1,1616 @@ +""" +Concrete implementation of abstract slides using BeautifulSoup for HTML. +This module provides the actual implementation that maps abstract slide operations to HTML/BeautifulSoup calls. +""" + +import copy +import io +import logging +import os +import shutil +import tempfile +import uuid +from typing import Annotated, Any, List, Optional, Union + +from bs4 import BeautifulSoup +from bs4.element import Tag +from pydantic import BaseModel, ConfigDict, Discriminator, Field +from pydantic import Tag as PydanticTag +from pydantic import TypeAdapter, model_validator + +from gslides_api.agnostic.domain import ImageData +from gslides_api.agnostic.element import MarkdownTableElement, TableData +from gslides_api.agnostic.units import OutputUnit, from_emu, to_emu + +from gslides_api.common.download import download_binary_file + +from gslides_api.adapters.abstract_slides import ( + AbstractAltText, + AbstractCredentials, + AbstractElement, + AbstractElementKind, + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractSlide, + AbstractSlideProperties, + AbstractSlidesAPIClient, + AbstractSpeakerNotes, + AbstractTableElement, + AbstractThumbnail, +) + +logger = logging.getLogger(__name__) + +# Type alias for HTML element inputs +HTMLElementInput = Union[Tag, dict, "HTMLElementParent"] + +# Inline formatting tags to EXCLUDE from element parsing +# These are text styling elements that should not be separate blocks +INLINE_FORMATTING_TAGS = { + "strong", + "b", + "em", + "i", + "u", + "s", + "strike", + "del", + "span", + "br", + "a", + "sub", + "sup", + "code", + "small", + "mark", +} + +# Tags that are internal to other structures (skip these) +INTERNAL_STRUCTURE_TAGS = { + "li", + "thead", + "tbody", + "tr", + "td", + "th", + "figcaption", +} + +# Selectors for common consent/overlay widgets that should not appear in thumbnails +THUMBNAIL_OVERLAY_BLOCKLIST_CSS = """ +[id*="cookie" i], +[class*="cookie" i], +[id*="consent" i], +[class*="consent" i], +[id*="gdpr" i], +[class*="gdpr" i], +[id*="onetrust" i], +[class*="onetrust" i], +[id*="usercentrics" i], +[class*="usercentrics" i], +[id*="didomi" i], +[class*="didomi" i], +[id*="qc-cmp2" i], +[class*="qc-cmp2" i] { + display: none !important; + visibility: hidden !important; + opacity: 0 !important; + pointer-events: none !important; +} +""" + +THUMBNAIL_OVERLAY_SUPPRESSION_SCRIPT = """ +(selector) => { + const slide = document.querySelector(selector); + if (!slide) { + return { hidden: 0, reason: "slide_not_found" }; + } + + const keywords = [ + "cookie", + "consent", + "gdpr", + "onetrust", + "usercentrics", + "didomi", + "privacy", + "trustarc", + "qc-cmp2", + ]; + let hiddenCount = 0; + + for (const element of document.querySelectorAll("body *")) { + if (!(element instanceof HTMLElement)) { + continue; + } + if (slide.contains(element)) { + continue; + } + + const className = typeof element.className === "string" + ? element.className + : (element.getAttribute("class") || ""); + const signature = `${element.id || ""} ${className}`.toLowerCase(); + const hasKeyword = keywords.some((word) => signature.includes(word)); + + const computed = window.getComputedStyle(element); + const position = computed.position; + const zIndex = Number.parseInt(computed.zIndex || "0", 10); + const rect = element.getBoundingClientRect(); + const role = (element.getAttribute("role") || "").toLowerCase(); + const isDialogLike = element.getAttribute("aria-modal") === "true" || role === "dialog"; + const isFixedLike = position === "fixed" || position === "sticky"; + const isLargeBar = + rect.width >= window.innerWidth * 0.6 && + rect.height <= window.innerHeight * 0.35 && + (rect.top <= 120 || rect.bottom >= window.innerHeight - 120); + const isModalSized = + rect.width >= window.innerWidth * 0.3 && + rect.height >= window.innerHeight * 0.2; + const isOverlayCandidate = + hasKeyword || + (isFixedLike && (zIndex >= 10 || isLargeBar || isDialogLike || isModalSized)) || + (isDialogLike && zIndex >= 10); + + if (!isOverlayCandidate) { + continue; + } + + element.setAttribute("data-storyline-thumbnail-hidden", "true"); + element.style.setProperty("display", "none", "important"); + element.style.setProperty("visibility", "hidden", "important"); + element.style.setProperty("pointer-events", "none", "important"); + hiddenCount += 1; + } + + return { hidden: hiddenCount }; +} +""" + + +def _has_includable_child(element: Tag) -> bool: + """Check if element has child elements that would be included as blocks. + + Used to determine if this element is the "innermost" text-containing element. + If it has children that would be included, skip this element (not innermost). + """ + for child in element.children: + if not isinstance(child, Tag): + continue + child_name = child.name.lower() if child.name else "" + # Skip inline formatting - these don't count + if child_name in INLINE_FORMATTING_TAGS: + continue + # Skip internal structures - these don't count + if child_name in INTERNAL_STRUCTURE_TAGS: + continue + # If child is an image, parent has includable child + if child_name == "img": + return True + # If child has text content, parent has includable child + if child.get_text(strip=True): + return True + return False + + +def _hide_overlay_elements_for_thumbnail_sync(page: Any, slide_selector: str) -> None: + """Hide fixed overlays (cookie bars, consent dialogs, sticky headers) before screenshot.""" + try: + page.add_style_tag(content=THUMBNAIL_OVERLAY_BLOCKLIST_CSS) + result = page.evaluate(THUMBNAIL_OVERLAY_SUPPRESSION_SCRIPT, slide_selector) + hidden_count = result.get("hidden", 0) if isinstance(result, dict) else 0 + logger.debug("Suppressed %d overlay element(s) before thumbnail capture", hidden_count) + except Exception as e: + logger.debug("Overlay suppression failed during thumbnail capture: %s", e) + + +async def _hide_overlay_elements_for_thumbnail_async(page: Any, slide_selector: str) -> None: + """Async variant of overlay suppression for batch thumbnail capture.""" + try: + await page.add_style_tag(content=THUMBNAIL_OVERLAY_BLOCKLIST_CSS) + result = await page.evaluate(THUMBNAIL_OVERLAY_SUPPRESSION_SCRIPT, slide_selector) + hidden_count = result.get("hidden", 0) if isinstance(result, dict) else 0 + logger.debug("Suppressed %d overlay element(s) before thumbnail capture", hidden_count) + except Exception as e: + logger.debug("Overlay suppression failed during thumbnail capture: %s", e) + + +def html_element_discriminator(v: HTMLElementInput) -> str: + """Discriminator to determine which HTMLElement subclass based on tag name or type field.""" + # First check if it's a direct BeautifulSoup Tag with a tag name + if isinstance(v, Tag): + tag_name = v.name.lower() if v.name else "" + if tag_name == "img": + return "image" + elif tag_name == "table": + return "table" + elif tag_name in [ + "div", + "p", + "span", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "section", + "article", + "ul", + "ol", + ]: + return "shape" + else: + return "generic" + + # Then check if it's already wrapped with html_element + elif hasattr(v, "html_element"): + html_elem = v.html_element + if isinstance(html_elem, Tag): + tag_name = html_elem.name.lower() if html_elem.name else "" + if tag_name == "img": + return "image" + elif tag_name == "table": + return "table" + elif tag_name in [ + "div", + "p", + "span", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "section", + "article", + "ul", + "ol", + ]: + return "shape" + return "generic" + + # Finally check for type field + else: + element_type = getattr(v, "type", None) + if element_type in [AbstractElementKind.SHAPE, "SHAPE", "shape"]: + return "shape" + elif element_type in [AbstractElementKind.IMAGE, "IMAGE", "image"]: + return "image" + elif element_type in [AbstractElementKind.TABLE, "TABLE", "table"]: + return "table" + else: + return "generic" + + +class HTMLAPIClient(AbstractSlidesAPIClient): + """HTML API client implementation using filesystem operations.""" + + def __init__(self): + # No initialization needed for filesystem-based operations + self._auto_flush = True + + @property + def auto_flush(self): + return self._auto_flush + + @auto_flush.setter + def auto_flush(self, value: bool): + # Just store this for consistency with abstract interface + self._auto_flush = value + + def flush_batch_update(self): + # No batching needed for filesystem operations + pass + + def copy_presentation( + self, presentation_id: str, copy_title: str, folder_id: Optional[str] = None + ) -> dict: + """Copy a presentation directory to another location.""" + if not os.path.exists(presentation_id) or not os.path.isdir(presentation_id): + raise FileNotFoundError(f"Presentation directory not found: {presentation_id}") + + # Determine destination folder + if folder_id is None: + # Copy to same folder as source + dest_folder = os.path.dirname(presentation_id) + else: + # Validate folder exists + if not os.path.exists(folder_id) or not os.path.isdir(folder_id): + raise FileNotFoundError(f"Destination folder not found: {folder_id}") + dest_folder = folder_id + + # Create destination path + dest_path = os.path.join(dest_folder, copy_title) + + # Copy directory (overwrite if exists) + shutil.copytree(presentation_id, dest_path, dirs_exist_ok=True) + + return { + "id": dest_path, + "name": copy_title, + "parents": [dest_folder] if folder_id else [os.path.dirname(presentation_id)], + } + + def create_folder( + self, name: str, ignore_existing: bool = True, parent_folder_id: Optional[str] = None + ) -> dict: + """Create a folder in the filesystem.""" + if parent_folder_id is None: + parent_folder_id = os.getcwd() + + if not os.path.exists(parent_folder_id): + raise FileNotFoundError(f"Parent folder not found: {parent_folder_id}") + + folder_path = os.path.join(parent_folder_id, name) + + try: + os.makedirs(folder_path, exist_ok=ignore_existing) + except FileExistsError: + if not ignore_existing: + raise + + return {"id": folder_path, "name": name, "parents": [parent_folder_id]} + + def delete_file(self, file_id: str): + """Delete a file or directory from the filesystem.""" + if os.path.exists(file_id): + if os.path.isdir(file_id): + shutil.rmtree(file_id) + else: + os.remove(file_id) + + def set_credentials(self, credentials: AbstractCredentials): + # Do nothing as this is filesystem-based + pass + + def get_credentials(self) -> Optional[AbstractCredentials]: + # Return None as no credentials needed for filesystem operations + return None + + def replace_text( + self, slide_ids: list[str], match_text: str, replace_text: str, presentation_id: str + ): + """Replace text across specified slides in a presentation.""" + # This would require loading the presentation, finding slides by ID, and replacing text + # For now, raise NotImplementedError + raise NotImplementedError("replace_text not yet implemented for HTML adapter") + + @classmethod + def get_default_api_client(cls) -> "HTMLAPIClient": + """Get the default API client instance.""" + return cls() + + async def get_presentation_as_pdf(self, presentation_id: str) -> bytes: + """Get PDF from presentation.""" + raise NotImplementedError("PDF export not implemented for HTML adapter") + + +class HTMLSpeakerNotes(AbstractSpeakerNotes): + """HTML speaker notes implementation using data-notes attribute.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_section: Any = Field(exclude=True, default=None) + + def __init__(self, html_section: Tag, **kwargs): + super().__init__(**kwargs) + self.html_section = html_section + + def read_text(self, as_markdown: bool = True) -> str: + """Read text from speaker notes (data-notes attribute).""" + if not self.html_section: + return "" + + notes = self.html_section.get("data-notes", "") + return notes if notes else "" + + def write_text(self, api_client: "HTMLAPIClient", content: str): + """Write text to speaker notes (data-notes attribute).""" + if not self.html_section: + return + + self.html_section["data-notes"] = content + + +class HTMLElementParent(AbstractElement): + """Generic concrete element for HTML elements.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + html_section: Any = Field(exclude=True, default=None) + directory_path: Optional[str] = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup element to our abstract representation.""" + if isinstance(data, dict): + # Already converted + return data + elif isinstance(data, Tag): + html_element = data + elif hasattr(data, "html_element"): + # Already wrapped element + return data.__dict__ + else: + raise ValueError(f"Expected BeautifulSoup Tag, got {type(data)}") + + # Extract basic properties + object_id = html_element.get("id", "") or html_element.get("data-element-id", "") + + # Get alt text from data attributes + alt_text_title = html_element.get("data-alt-title", None) + alt_text_descr = html_element.get("data-alt-description", None) + + return { + "objectId": object_id, + "presentation_id": "", # Will be set by parent + "slide_id": "", + "alt_text": AbstractAltText(title=alt_text_title, description=alt_text_descr), + "type": "generic", + "html_element": html_element, + } + + def absolute_size(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + """Get the absolute size of the element by parsing CSS width/height from style attribute.""" + if not self.html_element: + return (0.0, 0.0) + + # Parse CSS style attribute + style = self.html_element.get("style", "") + width_px = self._parse_css_dimension(style, "width") + height_px = self._parse_css_dimension(style, "height") + + # Convert pixels to requested units (assuming 96 DPI for px to inches) + if units == OutputUnit.IN or units == "in": + return (width_px / 96.0, height_px / 96.0) + elif units == OutputUnit.PX or units == "px": + return (width_px, height_px) + else: + return (width_px / 96.0, height_px / 96.0) + + def _parse_css_dimension(self, style: str, property_name: str) -> float: + """Parse a CSS dimension value from a style string, returning value in pixels.""" + import re + + # Look for the property in the style string + pattern = rf"{property_name}\s*:\s*([^;]+)" + match = re.search(pattern, style, re.IGNORECASE) + if not match: + return 0.0 + + value_str = match.group(1).strip() + + # Parse numeric value and unit + num_match = re.match(r"([\d.]+)\s*(px|in|pt|em|rem|%|)", value_str, re.IGNORECASE) + if not num_match: + return 0.0 + + value = float(num_match.group(1)) + unit = num_match.group(2).lower() if num_match.group(2) else "px" + + # Convert to pixels + if unit == "px" or unit == "": + return value + elif unit == "in": + return value * 96 # 96 DPI + elif unit == "pt": + return value * 96 / 72 # 72 points per inch + elif unit in ("em", "rem"): + return value * 16 # Assume 16px base font + elif unit == "%": + # Percentage values need parent context, return 0 for now + return 0.0 + else: + return value + + def absolute_position(self, units: OutputUnit = OutputUnit.IN) -> tuple[float, float]: + """Get the absolute position of the element (simplified - returns 0,0 for HTML).""" + if not self.html_element: + return (0.0, 0.0) + + # HTML uses CSS layout, not absolute positioning + # For now, return simplified values + # Future: parse CSS left/top from style attribute + return (0.0, 0.0) + + def create_image_element_like(self, api_client: HTMLAPIClient) -> "HTMLImageElement": + """Create an image element with the same properties as this element.""" + if not self.html_element: + logger.warning("Cannot create image element: missing html_element reference") + raise ValueError("Cannot create image element without html_element reference") + + # Create a new tag - need to find the document root for new_tag + # Navigate up to find the root BeautifulSoup object which has new_tag method + parent = self.html_element + while parent is not None: + if hasattr(parent, "new_tag") and callable(getattr(parent, "new_tag", None)): + new_img = parent.new_tag("img") + break + parent = parent.parent + else: + # Fallback: create a minimal soup with the tag + soup = BeautifulSoup("", "lxml") + new_img = soup.find("img") + new_img["src"] = "placeholder.png" + new_img["data-element-name"] = self.html_element.get("data-element-name", "") + new_img["data-alt-title"] = self.alt_text.title or "" + new_img["data-alt-description"] = self.alt_text.description or "" + + # Copy style attribute to preserve dimensions + original_style = self.html_element.get("style", "") + if original_style: + new_img["style"] = original_style + + # Replace current element with new image + self.html_element.replace_with(new_img) + + # Create and return HTMLImageElement wrapper + image_element = HTMLImageElement( + objectId=new_img.get("id", ""), + alt_text=self.alt_text, + html_element=new_img, + directory_path=self.directory_path, + ) + return image_element + + def set_alt_text( + self, + api_client: HTMLAPIClient, + title: str | None = None, + description: str | None = None, + ): + """Set alt text for the element using data attributes.""" + if self.html_element: + if title is not None: + self.html_element["data-alt-title"] = title + self.alt_text.title = title + if description is not None: + self.html_element["data-alt-description"] = description + self.alt_text.description = description + + +class HTMLShapeElement(AbstractShapeElement, HTMLElementParent): + """HTML shape element implementation for text-containing elements.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup element.""" + base_data = HTMLElementParent.convert_from_html_element(data) + base_data["type"] = AbstractElementKind.SHAPE + return base_data + + @property + def has_text(self) -> bool: + """Check if the element has text content.""" + if not self.html_element: + return False + text = self.html_element.get_text(strip=True) + return bool(text) + + def write_text( + self, + api_client: HTMLAPIClient, + content: str, + autoscale: bool = False, + ): + """Write text to the element (supports markdown formatting).""" + if not self.html_element: + return + + from gslides_api.adapters.markdown_to_html import apply_markdown_to_html_element + + # Check if content has markdown formatting indicators + has_markdown_formatting = any( + marker in content for marker in ["**", "*", "__", "~~", "- ", "1. ", "2. "] + ) + + if has_markdown_formatting: + # Use markdown parser to convert markdown to HTML with formatting + apply_markdown_to_html_element( + markdown_text=content, + html_element=self.html_element, + base_style=None, + ) + else: + # Simple text replacement (preserves template variables like {account_name}) + self.html_element.clear() + self.html_element.string = content + + def read_text(self, as_markdown: bool = True) -> str: + """Read text from the element.""" + if not self.html_element: + return "" + + if as_markdown: + from gslides_api.adapters.markdown_to_html import convert_html_to_markdown + + # Convert HTML formatting to markdown + return convert_html_to_markdown(self.html_element) + else: + # Extract plain text content + text = self.html_element.get_text(separator="\n", strip=False) + return text + + def styles(self, skip_whitespace: bool = True) -> Optional[List[dict]]: + """Extract style information from the element (simplified).""" + if not self.html_element: + return None + + # Simplified: return basic style info + # Future: parse inline CSS styles from style attribute + text = self.html_element.get_text() + if skip_whitespace and not text.strip(): + return None + + return [{"text": text}] + + +class HTMLImageElement(AbstractImageElement, HTMLElementParent): + """HTML image element implementation for tags.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup element.""" + base_data = HTMLElementParent.convert_from_html_element(data) + base_data["type"] = AbstractElementKind.IMAGE + return base_data + + def replace_image( + self, + api_client: HTMLAPIClient, + file: str | None = None, + url: str | None = None, + ): + """Replace the image in this element.""" + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "img" + ): + logger.warning("Cannot replace image: element is not an tag") + return + + if file and os.path.exists(file): + # Copy file to images/ subdirectory + if not self.directory_path: + logger.warning("Cannot replace image: no directory_path set") + return + + images_dir = os.path.join(self.directory_path, "images") + os.makedirs(images_dir, exist_ok=True) + + # Copy file + filename = os.path.basename(file) + dest_path = os.path.join(images_dir, filename) + shutil.copy2(file, dest_path) + + # Update src attribute (relative path) + self.html_element["src"] = f"images/{filename}" + logger.info(f"Replaced image with {file}, src set to images/{filename}") + + elif url: + # For URL images, download first using utility with retries + content, _ = download_binary_file(url) + # Save to temp file and use file path method + with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file: + temp_file.write(content) + temp_file_path = temp_file.name + + try: + self.replace_image(api_client, file=temp_file_path) + finally: + os.unlink(temp_file_path) + + def get_image_data(self): + """Get the image data from the HTML element.""" + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "img" + ): + return None + + try: + # Get src attribute + src = self.html_element.get("src", "") + if not src: + return None + + # Determine if it's a URL or local file + if src.startswith("http://") or src.startswith("https://"): + # Download from URL + content, _ = download_binary_file(src) + mime_type = "image/png" # Default, could be detected from content + return ImageData(content=content, mime_type=mime_type) + else: + # Local file (relative to directory) + if not self.directory_path: + return None + + file_path = os.path.join(self.directory_path, src) + if not os.path.exists(file_path): + return None + + with open(file_path, "rb") as f: + content = f.read() + + # Detect mime type from extension + ext = os.path.splitext(file_path)[1].lower() + mime_type_map = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".svg": "image/svg+xml", + } + mime_type = mime_type_map.get(ext, "image/png") + + return ImageData(content=content, mime_type=mime_type) + + except Exception as e: + logger.error(f"Error getting image data: {e}") + return None + + +class HTMLTableElement(AbstractTableElement, HTMLElementParent): + """HTML table element implementation for tags.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_element: Any = Field(exclude=True, default=None) + + @model_validator(mode="before") + @classmethod + def convert_from_html_element(cls, data: HTMLElementInput) -> dict: + """Convert from BeautifulSoup
element.""" + base_data = HTMLElementParent.convert_from_html_element(data) + base_data["type"] = AbstractElementKind.TABLE + return base_data + + def _get_soup(self) -> BeautifulSoup: + """Get a BeautifulSoup object for creating new tags.""" + # Create a temporary soup for tag creation + return BeautifulSoup("", "lxml") + + def resize( + self, + api_client: HTMLAPIClient, + rows: int, + cols: int, + fix_width: bool = True, + fix_height: bool = True, + target_height_in: float | None = None, + ) -> float: + """Resize the table to the specified dimensions. + + Args: + target_height_in: Ignored for HTML tables (they auto-size). + + Returns: + Font scale factor (1.0 since HTML doesn't support font scaling during resize). + """ + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "table" + ): + return 1.0 + + try: + soup = self._get_soup() + + # Get or create tbody + tbody = self.html_element.find("tbody") + if not tbody: + tbody = soup.new_tag("tbody") + self.html_element.append(tbody) + + current_rows = tbody.find_all("tr", recursive=False) + current_row_count = len(current_rows) + + # Adjust rows + if rows > current_row_count: + # Add rows + for _ in range(rows - current_row_count): + new_row = soup.new_tag("tr") + for _ in range(cols): + new_cell = soup.new_tag("td") + new_row.append(new_cell) + tbody.append(new_row) + elif rows < current_row_count: + # Remove rows + for row in current_rows[rows:]: + row.decompose() + + # Adjust columns in all rows + for row in tbody.find_all("tr", recursive=False): + cells = row.find_all(["td", "th"], recursive=False) + current_col_count = len(cells) + + if cols > current_col_count: + # Add cells + for _ in range(cols - current_col_count): + new_cell = soup.new_tag("td") + row.append(new_cell) + elif cols < current_col_count: + # Remove cells + for cell in cells[cols:]: + cell.decompose() + + except Exception as e: + logger.error(f"Error resizing table: {e}") + + return 1.0 + + def update_content( + self, + api_client: HTMLAPIClient, + markdown_content: MarkdownTableElement, + check_shape: bool = True, + font_scale_factor: float = 1.0, + ): + """Update the table content with markdown data. + + Args: + font_scale_factor: Font scale factor (currently unused for HTML, but kept for interface conformance). + """ + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "table" + ): + return + + try: + soup = self._get_soup() + + # Get table data from markdown content + if hasattr(markdown_content, "content") and hasattr( + markdown_content.content, "headers" + ): + headers = markdown_content.content.headers + data_rows = markdown_content.content.rows + else: + # Fallback for old interface + headers = ( + markdown_content.rows[0] + if hasattr(markdown_content, "rows") and markdown_content.rows + else [] + ) + data_rows = ( + markdown_content.rows[1:] + if hasattr(markdown_content, "rows") and len(markdown_content.rows) > 1 + else [] + ) + + if not headers: + return + + # Ensure table has thead and tbody + thead = self.html_element.find("thead") + if not thead: + thead = soup.new_tag("thead") + self.html_element.insert(0, thead) + + tbody = self.html_element.find("tbody") + if not tbody: + tbody = soup.new_tag("tbody") + self.html_element.append(tbody) + + # Clear existing content + thead.clear() + tbody.clear() + + # Add header row + header_row = soup.new_tag("tr") + for header in headers: + th = soup.new_tag("th") + th.string = str(header) if header is not None else "" + header_row.append(th) + thead.append(header_row) + + # Add data rows + for row_data in data_rows: + tr = soup.new_tag("tr") + for cell_data in row_data: + td = soup.new_tag("td") + td.string = str(cell_data) if cell_data is not None else "" + tr.append(td) + tbody.append(tr) + + except Exception as e: + logger.error(f"Error updating table content: {e}") + + def get_horizontal_border_weight(self, units: OutputUnit = OutputUnit.IN) -> float: + """Get weight of horizontal borders in specified units. + + For HTML tables, borders are handled via CSS and don't have a fixed weight + that contributes to layout height, so we return 0. + """ + return 0.0 + + def get_row_count(self) -> int: + """Get current number of rows.""" + if not self.html_element: + return 0 + tbody = self.html_element.find("tbody") + if tbody: + return len(tbody.find_all("tr", recursive=False)) + return 0 + + def get_column_count(self) -> int: + """Get current number of columns.""" + if not self.html_element: + return 0 + # Check thead first, then tbody + thead = self.html_element.find("thead") + if thead: + header_row = thead.find("tr") + if header_row: + return len(header_row.find_all(["th", "td"], recursive=False)) + tbody = self.html_element.find("tbody") + if tbody: + first_row = tbody.find("tr") + if first_row: + return len(first_row.find_all(["td", "th"], recursive=False)) + return 0 + + def to_markdown_element(self, name: str | None = None) -> MarkdownTableElement: + """Convert HTML table to markdown table element.""" + if ( + not self.html_element + or not isinstance(self.html_element.name, str) + or self.html_element.name.lower() != "table" + ): + raise ValueError("HTMLTableElement has no valid
element") + + # Extract headers from thead + thead = self.html_element.find("thead") + headers = [] + if thead: + header_row = thead.find("tr") + if header_row: + headers = [th.get_text(strip=True) for th in header_row.find_all(["th", "td"])] + + # Extract rows from tbody + tbody = self.html_element.find("tbody") + rows = [] + if tbody: + for tr in tbody.find_all("tr"): + row = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])] + rows.append(row) + + # Create TableData from extracted headers and rows + if not headers and not rows: + table_data = None + else: + table_data = TableData(headers=headers, rows=rows) + + # Create MarkdownTableElement with TableData + markdown_elem = MarkdownTableElement( + name=name or self.alt_text.title or "Table", + content=table_data, + ) + + return markdown_elem + + +# Discriminated union type for concrete elements +HTMLElement = Annotated[ + Union[ + Annotated[HTMLShapeElement, PydanticTag("shape")], + Annotated[HTMLImageElement, PydanticTag("image")], + Annotated[HTMLTableElement, PydanticTag("table")], + Annotated[HTMLElementParent, PydanticTag("generic")], + ], + Discriminator(html_element_discriminator), +] + +# TypeAdapter for validating the discriminated union +_html_element_adapter = TypeAdapter(HTMLElement) + + +def validate_html_element(html_element: Tag) -> HTMLElement: + """Create the appropriate concrete element from a BeautifulSoup Tag.""" + element_type = html_element_discriminator(html_element) + + if element_type == "shape": + data = HTMLShapeElement.convert_from_html_element(html_element) + return HTMLShapeElement(**data) + elif element_type == "image": + data = HTMLImageElement.convert_from_html_element(html_element) + return HTMLImageElement(**data) + elif element_type == "table": + data = HTMLTableElement.convert_from_html_element(html_element) + return HTMLTableElement(**data) + else: + data = HTMLElementParent.convert_from_html_element(html_element) + return HTMLElementParent(**data) + + +class HTMLSlide(AbstractSlide): + """HTML slide implementation representing a
element.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_section: Any = Field(exclude=True, default=None) + html_soup: Any = Field(exclude=True, default=None) + directory_path: Optional[str] = Field(exclude=True, default=None) + + def __init__(self, html_section: Tag, html_soup: BeautifulSoup, directory_path: str, **kwargs): + # Extract all content elements (innermost text-containing elements and images) + elements = [] + + for child in html_section.descendants: + if isinstance(child, Tag): + # Skip nested sections + if child.name == "section": + continue + + tag_name = child.name.lower() if child.name else "" + + # Skip inline formatting elements + if tag_name in INLINE_FORMATTING_TAGS: + continue + + # Skip internal structure elements + if tag_name in INTERNAL_STRUCTURE_TAGS: + continue + + # For images, always include + if tag_name == "img": + try: + html_elem = validate_html_element(child) + html_elem.directory_path = directory_path + html_elem.html_section = html_section + elements.append(html_elem) + except Exception as e: + logger.warning(f"Could not convert image element: {e}") + continue + + # For other elements, only include if: + # 1. They have text content + # 2. They are the innermost (no includable children) + text_content = child.get_text(strip=True) + if not text_content: + continue + + # Skip if this element has includable children (not innermost) + if _has_includable_child(child): + continue + + try: + html_elem = validate_html_element(child) + html_elem.directory_path = directory_path + html_elem.html_section = html_section + elements.append(html_elem) + except Exception as e: + logger.warning(f"Could not convert element {tag_name}: {e}") + + # Get speaker notes + speaker_notes = HTMLSpeakerNotes(html_section) + + # Get slide properties + is_skipped = html_section.get("data-skip", "").lower() == "true" + slide_properties = AbstractSlideProperties(isSkipped=is_skipped) + + # Get object ID + object_id = html_section.get("id", "") or html_section.get("data-slide-id", "") + + super().__init__( + elements=elements, + objectId=object_id, + slideProperties=slide_properties, + speaker_notes=speaker_notes, + ) + + self.html_section = html_section + self.html_soup = html_soup + self.directory_path = directory_path + + @property + def page_elements_flat(self) -> list[HTMLElementParent]: + """Flatten the elements tree into a list.""" + return self.elements + + def thumbnail( + self, api_client: HTMLAPIClient, size: str, include_data: bool = False + ) -> AbstractThumbnail: + """Generate a thumbnail of the slide using Playwright.""" + if not self.directory_path: + logger.warning("Cannot generate thumbnail: no directory_path set") + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", + width=320, + height=240, + mime_type="image/png", + content=None, + ) + + html_file = os.path.join(self.directory_path, "index.html") + if not os.path.exists(html_file): + logger.warning(f"Cannot generate thumbnail: index.html not found at {html_file}") + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", + width=320, + height=240, + mime_type="image/png", + content=None, + ) + + file_url = f"file://{os.path.abspath(html_file)}" + + # Use Playwright to capture the slide + from playwright.sync_api import sync_playwright + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + try: + context = browser.new_context( + device_scale_factor=2, + viewport={"width": 1280, "height": 720}, + ) + page = context.new_page() + page.goto(file_url, wait_until="networkidle") + page.wait_for_timeout(1000) + + # Find the slide section by ID or position + slide_id = self.objectId + if slide_id: + safe_id = slide_id.replace(":", "\\:") + selector = f"section#{safe_id}" + else: + # Fallback: try to find the section element directly + selector = "section" + + _hide_overlay_elements_for_thumbnail_sync(page, selector) + slide_element = page.query_selector(selector) + if slide_element: + png_bytes = slide_element.screenshot(type="png") + + # Get dimensions from the image + from PIL import Image + import io as _io + + img = Image.open(_io.BytesIO(png_bytes)) + img_width, img_height = img.size + img.close() + + return AbstractThumbnail( + contentUrl=file_url, + width=img_width, + height=img_height, + mime_type="image/png", + content=png_bytes if include_data else None, + ) + else: + logger.warning(f"Could not find slide element with selector: {selector}") + finally: + browser.close() + except Exception as e: + logger.error(f"Error generating HTML thumbnail: {e}") + + # Return placeholder on failure + return AbstractThumbnail( + contentUrl="placeholder_thumbnail.png", + width=320, + height=240, + mime_type="image/png", + content=None, + ) + + +class HTMLPresentation(AbstractPresentation): + """HTML presentation implementation representing a directory with index.html.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + html_soup: Any = Field(exclude=True, default=None) + directory_path: Optional[str] = None + # Storage URLs set after upload (for API adapter layer) + uploaded_html_url: Optional[str] = None + uploaded_zip_url: Optional[str] = None + + def __init__( + self, + html_soup: BeautifulSoup, + directory_path: str, + ): + # Extract all top-level
elements as slides + slides = [] + for section in html_soup.find_all("section", recursive=True): + # Only process top-level sections (not nested) + if section.find_parent("section") is None: + try: + slide = HTMLSlide(section, html_soup, directory_path) + slides.append(slide) + except Exception as e: + logger.warning(f"Could not convert section to slide: {e}") + continue + + # Extract presentation metadata + presentation_id = directory_path + title_tag = html_soup.find("title") + title = title_tag.get_text(strip=True) if title_tag else os.path.basename(directory_path) + + super().__init__( + slides=slides, + presentationId=presentation_id, + revisionId=None, # HTML doesn't have revision IDs + title=title, + ) + + self.html_soup = html_soup + self.directory_path = directory_path + + @property + def url(self) -> str: + """Return the file path as URL (file system based).""" + if self.directory_path: + html_file = os.path.join(self.directory_path, "index.html") + return f"file://{os.path.abspath(html_file)}" + else: + raise ValueError("No directory path specified for presentation") + + def slide_height(self, units: OutputUnit = OutputUnit.IN) -> float: + """Return slide height in specified units. + + HTML slides don't have a fixed height - returns default presentation height. + """ + # Default to standard presentation height (7.5 inches for 4:3 aspect ratio) + default_height_in = 7.5 + default_height_emu = to_emu(default_height_in, OutputUnit.IN) + return from_emu(default_height_emu, units) + + @classmethod + def from_id( + cls, + api_client: HTMLAPIClient, + presentation_id: str, + ) -> "HTMLPresentation": + """Load presentation from directory path.""" + # presentation_id is the directory path + if not os.path.exists(presentation_id) or not os.path.isdir(presentation_id): + raise FileNotFoundError(f"Presentation directory not found: {presentation_id}") + + # Load index.html + html_file = os.path.join(presentation_id, "index.html") + if not os.path.exists(html_file): + raise FileNotFoundError(f"index.html not found in {presentation_id}") + + try: + with open(html_file, "r", encoding="utf-8") as f: + html_content = f.read() + + html_soup = BeautifulSoup(html_content, "lxml") + return cls(html_soup, presentation_id) + except Exception as e: + raise ValueError(f"Could not load presentation from {presentation_id}: {e}") + + def copy_via_drive( + self, + api_client: HTMLAPIClient, + copy_title: str, + folder_id: Optional[str] = None, + ) -> "HTMLPresentation": + """Copy presentation to another location.""" + if not self.directory_path: + raise ValueError("Cannot copy presentation without a directory path") + + # Use the API client to copy the directory + copy_result = api_client.copy_presentation(self.directory_path, copy_title, folder_id) + + # Load the copied presentation + copied_presentation = HTMLPresentation.from_id(api_client, copy_result["id"]) + + return copied_presentation + + def sync_from_cloud(self, api_client: HTMLAPIClient): + """Re-read presentation from filesystem.""" + if not self.directory_path or not os.path.exists(self.directory_path): + return + + # Reload from file + html_file = os.path.join(self.directory_path, "index.html") + with open(html_file, "r", encoding="utf-8") as f: + html_content = f.read() + + # Update our internal representation + html_soup = BeautifulSoup(html_content, "lxml") + self.html_soup = html_soup + + # Rebuild slides + slides = [] + for section in html_soup.find_all("section", recursive=True): + if section.find_parent("section") is None: + try: + slide = HTMLSlide(section, html_soup, self.directory_path) + slides.append(slide) + except Exception as e: + logger.warning(f"Could not convert section during sync: {e}") + continue + + self.slides = slides + + # Update metadata + title_tag = html_soup.find("title") + self.title = ( + title_tag.get_text(strip=True) if title_tag else os.path.basename(self.directory_path) + ) + + def save(self, api_client: HTMLAPIClient) -> None: + """Save/persist all changes made to this presentation.""" + if not self.directory_path: + raise ValueError("No directory path specified for saving") + + html_file = os.path.join(self.directory_path, "index.html") + + # Ensure directory exists + os.makedirs(self.directory_path, exist_ok=True) + + # Save the HTML + with open(html_file, "w", encoding="utf-8") as f: + f.write(self.html_soup.prettify()) + + def insert_copy( + self, + source_slide: AbstractSlide, + api_client: HTMLAPIClient, + insertion_index: int | None = None, + ) -> AbstractSlide: + """Insert a copy of a slide into this presentation.""" + if not isinstance(source_slide, HTMLSlide): + raise ValueError("Can only copy HTMLSlide instances") + + # Deep copy the section + new_section = copy.deepcopy(source_slide.html_section) + + # Generate a unique ID for the copied section to avoid ID collisions + unique_id = f"slide-{uuid.uuid4().hex[:8]}" + new_section["id"] = unique_id + + # Insert into soup at the specified index + if insertion_index is None: + self.html_soup.body.append(new_section) + else: + sections = [ + s + for s in self.html_soup.find_all("section", recursive=True) + if s.find_parent("section") is None + ] + if insertion_index < len(sections): + sections[insertion_index].insert_before(new_section) + else: + self.html_soup.body.append(new_section) + + # Create new slide wrapper + new_slide = HTMLSlide(new_section, self.html_soup, self.directory_path) + + # Update our slides list + if insertion_index is None: + self.slides.append(new_slide) + else: + self.slides.insert(insertion_index, new_slide) + + return new_slide + + def delete_slide(self, slide: Union[HTMLSlide, int], api_client: HTMLAPIClient): + """Delete a slide from the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + if isinstance(slide, HTMLSlide): + # Remove from DOM + slide.html_section.decompose() + # Remove from our slides list + self.slides.remove(slide) + + def delete_slides(self, slides: List[Union[HTMLSlide, int]], api_client: HTMLAPIClient): + """Delete multiple slides from the presentation.""" + # Convert all indices to slide objects first to avoid index shifting issues + slides_to_delete = [] + for slide in slides: + if isinstance(slide, int): + slides_to_delete.append(self.slides[slide]) + else: + slides_to_delete.append(slide) + + # Now delete all slides + for slide in slides_to_delete: + self.delete_slide(slide, api_client) + + def move_slide( + self, + slide: Union[HTMLSlide, int], + insertion_index: int, + api_client: HTMLAPIClient, + ): + """Move a slide to a new position within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + if isinstance(slide, HTMLSlide): + # Extract the section from the DOM + section = slide.html_section.extract() + + # Insert at new position + sections = [ + s + for s in self.html_soup.find_all("section", recursive=True) + if s.find_parent("section") is None + ] + if insertion_index < len(sections): + sections[insertion_index].insert_before(section) + else: + self.html_soup.body.append(section) + + # Update local slides list order + self.slides.remove(slide) + self.slides.insert(insertion_index, slide) + + def duplicate_slide(self, slide: Union[HTMLSlide, int], api_client: HTMLAPIClient) -> HTMLSlide: + """Duplicate a slide within the presentation.""" + if isinstance(slide, int): + slide = self.slides[slide] + + if isinstance(slide, HTMLSlide): + # Deep copy the section + new_section = copy.deepcopy(slide.html_section) + + # Append to DOM + self.html_soup.body.append(new_section) + + # Create new slide wrapper + new_slide = HTMLSlide(new_section, self.html_soup, self.directory_path) + self.slides.append(new_slide) + + return new_slide + else: + raise ValueError("slide must be an HTMLSlide or int") + + async def get_slide_thumbnails( + self, + api_client: "HTMLAPIClient", + slides: Optional[List["AbstractSlide"]] = None, + ) -> List[AbstractThumbnail]: + """Get thumbnails for slides using a single Playwright browser session. + + This is more efficient than calling thumbnail() for each slide individually + because it opens the browser once and captures all slides in one session. + + Args: + api_client: The HTML API client + slides: Optional list of slides to get thumbnails for. If None, uses all slides. + + Returns: + List of AbstractThumbnail objects with image data + """ + import io as _io + + from PIL import Image + from playwright.async_api import async_playwright + + target_slides = slides if slides is not None else self.slides + thumbnails = [] + + if not target_slides: + return thumbnails + + if not self.directory_path: + logger.warning("Cannot generate thumbnails: no directory_path set") + return [self._create_placeholder_thumbnail() for _ in target_slides] + + html_file = os.path.join(self.directory_path, "index.html") + if not os.path.exists(html_file): + logger.warning(f"Cannot generate thumbnails: index.html not found at {html_file}") + return [self._create_placeholder_thumbnail() for _ in target_slides] + + html_file_url = f"file://{os.path.abspath(html_file)}" + + logger.info( + "Generating HTML thumbnails for %d slides from %s", len(target_slides), html_file_url + ) + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + try: + context = await browser.new_context( + device_scale_factor=2, # 2x for good quality thumbnails + viewport={"width": 1280, "height": 720}, + ) + page = await context.new_page() + + # Navigate to the HTML file + await page.goto(html_file_url, wait_until="networkidle") + await page.wait_for_timeout(1000) # Wait for any JS rendering + + # Generate thumbnail for each slide + for i, slide in enumerate(target_slides): + slide_id = slide.objectId + + # Build selector - prefer ID-based, fallback to index + if slide_id: + # CSS ID selectors need escaping for special characters + safe_id = slide_id.replace(":", "\\:") + selector = f"section#{safe_id}" + else: + # Fallback: use nth-of-type selector + selector = f"section:nth-of-type({i + 1})" + + try: + await _hide_overlay_elements_for_thumbnail_async(page, selector) + slide_element = await page.query_selector(selector) + if slide_element: + # Take screenshot of the slide section + png_bytes = await slide_element.screenshot(type="png") + + # Get dimensions from image + img = Image.open(_io.BytesIO(png_bytes)) + img_width, img_height = img.size + img.close() + + thumbnail = AbstractThumbnail( + contentUrl=html_file_url, + width=img_width, + height=img_height, + mime_type="image/png", + content=png_bytes, + file_size=len(png_bytes), + ) + thumbnails.append(thumbnail) + logger.debug( + "Generated thumbnail for slide %s (%dx%d)", + slide_id, + img_width, + img_height, + ) + else: + logger.warning( + "Could not find slide element with selector: %s", selector + ) + thumbnails.append(self._create_placeholder_thumbnail()) + except Exception as e: + logger.error("Failed to capture thumbnail for slide %s: %s", slide_id, e) + thumbnails.append(self._create_placeholder_thumbnail()) + finally: + await browser.close() + + logger.info("Generated %d HTML thumbnails", len(thumbnails)) + return thumbnails + + def _create_placeholder_thumbnail(self) -> AbstractThumbnail: + """Create a placeholder thumbnail for failed captures.""" + import io as _io + + from PIL import Image + + # Create a simple gray placeholder image + img = Image.new("RGB", (320, 240), color=(200, 200, 200)) + buffer = _io.BytesIO() + img.save(buffer, format="PNG") + png_bytes = buffer.getvalue() + img.close() + + return AbstractThumbnail( + contentUrl="", + width=320, + height=240, + mime_type="image/png", + content=png_bytes, + file_size=len(png_bytes), + ) diff --git a/gslides_api/adapters/markdown_to_html.py b/gslides_api/adapters/markdown_to_html.py new file mode 100644 index 0000000..ad12264 --- /dev/null +++ b/gslides_api/adapters/markdown_to_html.py @@ -0,0 +1,236 @@ +"""Convert markdown to HTML formatted text using BeautifulSoup. + +This module uses the shared markdown parser from gslides-api to convert markdown text +to HTML elements with proper formatting tags. +""" + +import logging +from typing import Optional + +from bs4 import BeautifulSoup, Tag + +from gslides_api.agnostic.ir import ( + FormattedDocument, + FormattedList, + FormattedParagraph, + FormattedTextRun, +) +from gslides_api.agnostic.markdown_parser import parse_markdown_to_ir +from gslides_api.agnostic.text import FullTextStyle + +logger = logging.getLogger(__name__) + + +def apply_markdown_to_html_element( + markdown_text: str, + html_element: Tag, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Apply markdown formatting to an HTML element. + + Args: + markdown_text: The markdown text to convert + html_element: The BeautifulSoup Tag to write to (e.g.,
,

) + base_style: Optional base text style (from gslides-api TextStyle). + NOTE: Only RichStyle properties (font_family, font_size, color, underline) + are applied from base_style. Markdown-renderable properties (bold, italic) + should come from the markdown content itself (e.g., **bold**, *italic*). + + Note: + This function clears the existing content of the element before writing. + """ + # Parse markdown to IR using shared parser + ir_doc = parse_markdown_to_ir(markdown_text, base_style=base_style) + + # Clear existing content + html_element.clear() + + # Get soup for creating new tags + soup = BeautifulSoup("", "lxml") + + # Convert IR to HTML + _apply_ir_to_html_element(ir_doc, html_element, soup, base_style) + + +def _apply_ir_to_html_element( + ir_doc: FormattedDocument, + html_element: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Convert IR document to HTML content.""" + # Process each paragraph/list in the document + for item in ir_doc.elements: + if isinstance(item, FormattedParagraph): + _add_paragraph_to_html(item, html_element, soup, base_style) + elif isinstance(item, FormattedList): + _add_list_to_html(item, html_element, soup, base_style) + + +def _add_paragraph_to_html( + paragraph: FormattedParagraph, + parent: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Add a paragraph to HTML element.""" + # For HTML, we'll add runs inline without creating separate

tags + # unless it's the first paragraph (to avoid extra spacing) + + for run in paragraph.runs: + _add_run_to_html(run, parent, soup, base_style) + + # Add line break after paragraph (except if it's the last one) + # We'll just add the content inline for now and let HTML handle spacing + + +def _add_run_to_html( + run: FormattedTextRun, + parent: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Add a text run with formatting to HTML element.""" + text = run.content + + # Apply formatting by wrapping in HTML tags + # FullTextStyle has markdown (bold, italic, strikethrough) and rich (underline, color, etc.) + bold = run.style.markdown.bold if run.style.markdown else False + italic = run.style.markdown.italic if run.style.markdown else False + strikethrough = run.style.markdown.strikethrough if run.style.markdown else False + underline = run.style.rich.underline if run.style.rich else False + + if bold and italic: + # Bold + Italic + wrapper = soup.new_tag("strong") + inner = soup.new_tag("em") + inner.string = text + wrapper.append(inner) + parent.append(wrapper) + elif bold: + # Bold only + wrapper = soup.new_tag("strong") + wrapper.string = text + parent.append(wrapper) + elif italic: + # Italic only + wrapper = soup.new_tag("em") + wrapper.string = text + parent.append(wrapper) + elif underline: + # Underline + wrapper = soup.new_tag("u") + wrapper.string = text + parent.append(wrapper) + elif strikethrough: + # Strikethrough + wrapper = soup.new_tag("s") + wrapper.string = text + parent.append(wrapper) + else: + # Plain text - always append (don't use .string which replaces) + parent.append(text) + + +def _add_list_to_html( + formatted_list: FormattedList, + parent: Tag, + soup: BeautifulSoup, + base_style: Optional[FullTextStyle] = None, +) -> None: + """Add a formatted list to HTML element.""" + # Create

+ + + + + + + +
ABC
123
456
+ """ + soup = BeautifulSoup(html_content, 'lxml') + table_tag = soup.find('table') + return HTMLTableElement(html_element=table_tag, objectId="test-html-table") + + +@pytest.fixture +def markdown_table_content(): + """Create a MarkdownTableElement for testing update_content.""" + markdown_input = """| A | B | C | +|---|---|---| +| 1 | 2 | 3 | +| 4 | 5 | 6 |""" + return MarkdownTableElement(name="Test Table", content=markdown_input) + + +# ============================================================================ +# PowerPointTableElement Tests +# ============================================================================ + +class TestPowerPointTableElementInterface: + """Test PowerPointTableElement conforms to AbstractTableElement interface.""" + + def test_resize_returns_float(self, pptx_api_client, pptx_table_element): + """Test that resize() returns a float (font scale factor).""" + result = pptx_table_element.resize( + api_client=pptx_api_client, + rows=4, + cols=3, + fix_width=True, + fix_height=True, + ) + + assert result is not None, "resize() should return a value, not None" + assert isinstance(result, float), f"resize() should return float, got {type(result)}" + assert result > 0, "Font scale factor should be positive" + + def test_update_content_accepts_font_scale_factor(self, pptx_api_client, pptx_table_element, markdown_table_content): + """Test that update_content() accepts font_scale_factor parameter.""" + # This should not raise TypeError about unexpected keyword argument + pptx_table_element.update_content( + api_client=pptx_api_client, + markdown_content=markdown_table_content, + check_shape=False, + font_scale_factor=0.8, + ) + + def test_get_column_count_returns_int(self, pptx_table_element): + """Test that get_column_count() exists and returns int.""" + result = pptx_table_element.get_column_count() + + assert isinstance(result, int), f"get_column_count() should return int, got {type(result)}" + assert result == 3, f"Expected 3 columns, got {result}" + + def test_get_row_count_returns_int(self, pptx_table_element): + """Test that get_row_count() exists and returns int (sanity check).""" + result = pptx_table_element.get_row_count() + + assert isinstance(result, int), f"get_row_count() should return int, got {type(result)}" + assert result == 3, f"Expected 3 rows, got {result}" + + +# ============================================================================ +# HTMLTableElement Tests +# ============================================================================ + +class TestHTMLTableElementInterface: + """Test HTMLTableElement conforms to AbstractTableElement interface.""" + + def test_resize_returns_float(self, html_api_client, html_table_element): + """Test that resize() returns a float (font scale factor).""" + result = html_table_element.resize( + api_client=html_api_client, + rows=4, + cols=3, + fix_width=True, + fix_height=True, + ) + + assert result is not None, "resize() should return a value, not None" + assert isinstance(result, float), f"resize() should return float, got {type(result)}" + assert result > 0, "Font scale factor should be positive" + + def test_update_content_accepts_font_scale_factor(self, html_api_client, html_table_element, markdown_table_content): + """Test that update_content() accepts font_scale_factor parameter.""" + # This should not raise TypeError about unexpected keyword argument + html_table_element.update_content( + api_client=html_api_client, + markdown_content=markdown_table_content, + check_shape=False, + font_scale_factor=0.8, + ) + + def test_get_column_count_returns_int(self, html_table_element): + """Test that get_column_count() exists and returns int.""" + result = html_table_element.get_column_count() + + assert isinstance(result, int), f"get_column_count() should return int, got {type(result)}" + assert result == 3, f"Expected 3 columns, got {result}" + + def test_get_row_count_returns_int(self, html_table_element): + """Test that get_row_count() exists and returns int (sanity check).""" + result = html_table_element.get_row_count() + + assert isinstance(result, int), f"get_row_count() should return int, got {type(result)}" + assert result == 2, f"Expected 2 rows (tbody only), got {result}" + + +# ============================================================================ +# Edge Cases +# ============================================================================ + +class TestTableAdapterEdgeCases: + """Test edge cases for table adapter interface.""" + + def test_pptx_resize_with_none_element(self, pptx_api_client): + """Test resize returns float even with invalid element.""" + elem = PowerPointTableElement() + elem.pptx_element = None + + result = elem.resize( + api_client=pptx_api_client, + rows=2, + cols=2, + ) + + assert isinstance(result, float), f"resize() should return float even with None element, got {type(result)}" + + def test_html_resize_with_none_element(self, html_api_client): + """Test resize returns float even with invalid element.""" + elem = HTMLTableElement(objectId="test-empty") + elem.html_element = None + + result = elem.resize( + api_client=html_api_client, + rows=2, + cols=2, + ) + + assert isinstance(result, float), f"resize() should return float even with None element, got {type(result)}" + + def test_pptx_get_column_count_with_none_element(self): + """Test get_column_count returns 0 with invalid element.""" + elem = PowerPointTableElement() + elem.pptx_element = None + + result = elem.get_column_count() + + assert result == 0, f"get_column_count() should return 0 for None element, got {result}" + + def test_html_get_column_count_with_none_element(self): + """Test get_column_count returns 0 with invalid element.""" + elem = HTMLTableElement(objectId="test-empty") + elem.html_element = None + + result = elem.get_column_count() + + assert result == 0, f"get_column_count() should return 0 for None element, got {result}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From c57700be8af03764a8ca45dbf922262f14a1402e Mon Sep 17 00:00:00 2001 From: Egor Kraev Date: Sat, 21 Mar 2026 11:37:52 +0100 Subject: [PATCH 3/3] Move all deck-related machinery to gslides-api --- .claude/skills/prepare-presentation/SKILL.md | 99 +++++ .../prepare-presentation/placeholder.png | Bin 0 -> 10016 bytes .../skills/prepare-presentation/prepare.py | 273 +++++++++++++ gslides_api/adapters/abstract_slides.py | 21 +- gslides_api/adapters/add_names.py | 368 ++++++++++++++++++ gslides_api/common/presentation_id.py | 30 ++ gslides_api/mcp/__init__.py | 6 - gslides_api/mcp/models.py | 32 +- gslides_api/mcp/server.py | 207 +++++++--- gslides_api/mcp/utils.py | 132 +++---- playground/table_games.py | 3 +- tests/mcp_tests/test_add_element_names.py | 132 +++++++ tests/mcp_tests/test_models.py | 131 +------ .../test_abstract_slide_markdown.py | 58 ++- 14 files changed, 1187 insertions(+), 305 deletions(-) create mode 100644 .claude/skills/prepare-presentation/SKILL.md create mode 100644 .claude/skills/prepare-presentation/placeholder.png create mode 100644 .claude/skills/prepare-presentation/prepare.py create mode 100644 gslides_api/adapters/add_names.py create mode 100644 gslides_api/common/presentation_id.py create mode 100644 tests/mcp_tests/test_add_element_names.py diff --git a/.claude/skills/prepare-presentation/SKILL.md b/.claude/skills/prepare-presentation/SKILL.md new file mode 100644 index 0000000..f02e2b9 --- /dev/null +++ b/.claude/skills/prepare-presentation/SKILL.md @@ -0,0 +1,99 @@ +--- +name: prepare-presentation +description: Convert an existing presentation (Google Slides or PPTX) into a prepared template by copying it, naming elements by layout position, and replacing all content with placeholders. +--- + +# Prepare Presentation Skill + +Convert an existing presentation (Google Slides or PPTX) into a prepared template by copying it, +naming elements by layout position, and replacing all content with placeholders. + +## Workflow + +### Step 1: Copy the source presentation + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py copy --source [--title ] +``` + +Creates a working copy of the presentation so the original is not modified. +Prints the new presentation's ID or file path. + +### Step 2: Inspect slides and decide on names + +For each slide, run: + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py inspect --source <copied_id_or_path> [--slide <index>] +``` + +This prints: +- The path to a saved thumbnail image (read it to see the visual layout) +- The slide's `markdown()` output with all element metadata in HTML comments + +Examine each slide's thumbnail and markdown to determine position-based names for elements and slides. + +### Step 3: Apply names + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py name --source <copied_id_or_path> --mapping '<json>' +``` + +The `--mapping` JSON has this structure: + +```json +{ + "slides": [ + { + "index": 0, + "slide_name": "title_slide", + "elements": {"Title": "title", "Text_1": "subtitle"} + } + ] +} +``` + +The element keys are the **display names from the inspect output** (alt_text title or objectId), and the values are the new names to assign. Display names are unique per slide. + +### Step 4: Replace content with placeholders + +```bash +poetry run python .claude/skills/prepare-presentation/prepare.py templatize --source <copied_id_or_path> +``` + +This replaces: +- **Named images** (min dimension >= 4 cm): replaced with a gray placeholder image +- **Named text shapes**: replaced with "Example Text" (or "# Example Header Style\n\nExample body style" for multi-style elements) +- **Named tables**: all cells replaced with "Text" + +## Naming Guidelines + +Names must describe **layout geometry and relative position only** — never content, topic, or semantic meaning. Imagine the slide with all text/images blanked out; names should still make sense from position alone. + +### Element names (purely positional) +- Use position on the slide: "top_left", "top_right", "center", "bottom_left", etc. +- For multiple text elements in the same region, number them by reading order: "left_text_1", "left_text_2" +- All images/charts regardless of type -> "chart" (or "chart_1", "chart_2" if multiple) +- Tables -> "table" (or "table_1", "table_2" if multiple) +- The topmost/largest text element is typically "title" +- A text element directly below the title -> "subtitle" +- Small decorative elements (< 1cm) can be skipped + +### Slide names +- Name by layout structure, not content: "title_fullwidth", "two_column", "chart_right", "grid_2x3", etc. +- First slide -> "title_slide" +- Last slide (if simple/closing) -> "closing" +- For repeated layouts, number them: "chart_right_1", "chart_right_2" + +### What NOT to do +- Do NOT use content-derived names like "customer_name", "insights", "performance", "weekly" +- Do NOT name elements after what they currently say (e.g., "sidebar" because it says "Key Achievements") +- DO describe where the element sits: "left_text", "right_chart", "top_bar", "bottom_row_1" + +## Notes + +- Minimum image size for replacement: 4 cm in the smallest dimension +- Tables get "Text" in every cell, preserving the table shape (rows x cols) +- `write_text` handles markdown-to-style mapping automatically +- For Google Slides, credentials must be initialized before running +- For PPTX files, just pass the file path as `--source` diff --git a/.claude/skills/prepare-presentation/placeholder.png b/.claude/skills/prepare-presentation/placeholder.png new file mode 100644 index 0000000000000000000000000000000000000000..61f5ebd854241124a2b98e2703d0703e54e4e5de GIT binary patch literal 10016 zcmYLPdpy(M|DUzhcuT9;eAdNuL0Kavy1157y6A$bl_5eGt94Z_Tcli~i(7I@QX$P} zA2EeRl*}c$ltgk3B_X+NzOVOs{2sseKRn*&yl&6)I<Iry&)2Tm?c6e3iK2wTU}kT% z*|Z0PnL)*1uuK9L-l(anX=5;2JzF<f?mK_?`{&P*^`oDshxCwvf|bbEMN}&_tan&0 z|M-LSss{B0tsV837c+)Z0-^%~zq#Dn|32aTEy|PQ4b&8e)KtEXAhboNxAp44AbVoL z>-di!2PKz$%#v%yhD`EL9YpAEJLt)N#U4e^`-Wa?)L<|=)tf%+VK7vqqXn|Ti-U!E zF--~>V(#TxbDm9)O&=6AA8Z&uiotZgv$t#88AL%aZ04*vi;v~)eV$%4yf~}6{H7BI zbNNiVea~c8L0f0*cqKY#ZDB~}Gg<PIdL{-_`E5ONcK`3yv%9S@frl!q_=z)3XH~KY zm|Cx&?HR|rW<_|@Fo9}EUhWhnb)KN(Fn4G*v=1u=Bj2tjV=+~?r#TI4T53>L7F=Z` z<Sb<(#54g>2-H$POB!IDX1jx4>Y4kCvYR(6W@Vf2a#O%ms9%5~-@ikvIwbtWa-}Q@ zm`rn>Dz*QV&ubW6#~@*diB=vjcb4HCJizMLef6{tj|Jkll3<L*u~1W1uG&A$RS*op zxn0P9_;rS^A-LnB>qZ*bx9zXyeO+Mmtx^^#0*?v0U+*N?E8<1RgKPS?O_8%JO$7#A zO^jWhw~FI&dz?d9ITn-Q_z{d0{56uBaTmYdp2Cb55iyUpjRImBGcv8g{jSIEU}Trv zNXo)9fS5NTcVG2MYpMN#L|#K`4kmD@!3;T@VJ<hKpG1U6EdNHbS*^M~W7Tc3_d9^h z-x9=2{zl(9zZrndms=NaE7`!x4OW+j*sC@G11*0C&INBk&MK{w>-{+$9FXsgm<sj- zTnr~w0Kx=|0%5VY%m6tXswJPr*u?_p<FKgn7V|e5!+@TAAdOxQpxl;&LOnRNIJ?<( zwj8Y$*L+C>s^{bp+E@>O+Fm)n7E^U5tlS4gxmT{YCE!(}+$&<jZ3u8H9$l)y^J(Yu zy=_;?oeH8=fQdVQP3(JI%}=~JTaH!x)=5r7%qe!RCk;ce`~@aP{+jrFM;|%sy-9BS z(Jn45&JIPnQ}*6y1~|!8rL5eMfM0J!mxpotX*|TIH4|OJKm!J(UHxCI?EcfL;U`iS z$(Jy+gVWGBB1c_cMaU-eM$I8J)gzn)vEqpsSmP^QoQ7C&UwRIPI!E{ggUy@+>7ndF zTDYA(dIZAiGGRTjVb6xOO}XWyug%NF!uWr2_~H3X6!O5*20NU?Aqo=cVSrq>V9$QZ zB3QhS!*5wCxZ*~~rL0|;y;)1J`CbmjCc%lb9eZ>WnBU`0T9_=RB#MZEBPJ|0D|Upf zn?lq~A)AMDP=HK-RZU~kc=pO*Y3Nk`%Ddu~H_Ne9y2s0#ge*R!^3`A`!3J#Rc~Di- z!?9n<vxj82h?DTuh|J{%WGs;)WE<feM4&3Dv^~QBo2dj2`o`mRwFE0&!7EVJr<m0j zhbTNsDz#sQb7+7}YS2d-ZP*QB2;ifHKRS;&s!zt+Ee^?SR?J%GMZ<);lazzR8L%+| zQ{(WZnu0#ShkZ%K(Q+>HUM&+7UQSb|^3;gnb*TYzWh*<l3{?56IKG_6)U0Eopeo-L zE8Pb9E|Aw2l(>T{v8DE^IEP-azslp~RYDd4oYF_it=VOOWbI)U$9Hp?t6|UVa3h7^ z7Ket5h$(9&{GoZwAt*!)Ua5UO&LN9IvdEC|6^YEbuq0`(X>l`oYQ|hmi){<DZ)ytm zq~u^+kC3*9i?;=dh}1(KFGmPjr@@YECntItFPaj8Cn7?23C@8Jh_xFc={wjDDp&-f zK}cq&V%BXhT3~G)et{Kxc{RL*EaDQfMqnANzHl0Ju$e2s`aubQ_A&wG6^meJDr6ff zW_f^j`zqQqa&QjwLH^%({DPfqBZxg^g9lUuE{MPV!m-v80C3i;kvc5_fUPQ~*=X~u zV_>kngJVtRmE|!=Vu6G|vRv>-1>*7du#8zM=!Na}rnEg&0Y{R8ZUEEihS=4q+bZyo zhvhFKQyG@Hx07SN6B`x{>n5aboXhNlcXuSTHv$~#LOjCrIF;{;!-lnc(oi(Rx3gC> zxSF*%56f~*fqJlrXp>N?<~ENRzSB+N+iGMFfmg<q1Y2yA@KvqZtLa?LsF@+UO3MT~ zSIV(p-AQR2Y*<Mg!c&jOuedBuwduB^(p4O9Z({G>5P=V>q1imfksd&Nnuf@pH9Tu4 zP!+c@`%$1cl|?}aAx=?m3AAmHOjc}C+BR(1N>JsymbH5|&-x651b|X=iOhX(S%iHR zG@FAsl07VP11CRqGd9c@0suh0wFQqNL`0p-PEkYxt?w`$7uv<Krs7DW@0sF15<bqF zJr~E-gbF~tH<uYy!Xkv1w})aBX(g#Sfm1MU%bvRsq5@Z<6lqo<Ka-ok1ft@2(Nbca zqI3u}K7hV19)B%ZyySGZRZ3ZV=v4ykYf4T^g@nIp3w!Qtt|sG_Q<S<Q&DE72lzuXQ z$sTOjA5WSmR!H4FpScP0$;%L#x1ML6B?*pN8KOInD!7_Z&c7Xp-+D#7#JAhZ2E4yb zpzUGPgFdWf-B9OQ!*TfP4bA2zj<j_(*@`9Mr)^{3pl~%`m9&QjDbniFa;S$rEN^NH zY@m{r944hH;z%nD$yS~Vvpoqs>j?%a2r}wEfkuJ7G-`m<F|c9j`edtvFiz!J3q5I% zZaPJk&to#Ki-;L6q_hegX~$uD(7$V0?;!Z@I;P!BA@wje><TE&k??Dm2t3zuHC4;n zm6~y+!%zvz-AF-nEFyc&BHUk%Sn2WbZ3g5L!NTkm`1dn|)agPBf;1g*p$EN`@b%-w ze3>LzurNfIc|**%WROl9AXYoEhy%n-5>h+01?U05$>-lA(B9a(z1!|#sm>GgHRBL3 zijW$<nf;O@31)<pszJuS)M1bS(ULeEDf|t~2KK+c84rKokbHA-h^|hMn19lXR>vj< zVHIgQfug7n@%Vd8EE4KMUv@~v(awa2x2|KpDx(pW&t=-_b2W4H5vvP0l4=dh<`1~P z4~x7_%^}=!BCezgQlMyCL8RUUnog`JietjEbjOkQS-S1k6H@nWXIolJg6lz5!+fUQ zBCciwH=n<sKznqjJeA^M`3NGV#33sZoQQqP1u2WWt=@KU@VPKmJHi@3B@%deBSZ2Y zRUtKKBim9c2@bDmSK==bP*o{m@8a;97FY!9N<Yl>uq50P^96B81v<W5K;1+M15k+& z3eSN2??RB@#kS0s1dp7`=R?L463bJ;skw8RL<O#<hvn5UG2bK(NszQFr7jgvwJ4ch zAiaWzzp<LUCd7&O6;zsamuLa}j}&QFqszBNI}ssEsjl56BjCquEb`TZzWrLM+A{D{ ziQzCH;YX?S@YnRo<&`wTx4FzH6|PJ8!ffhREVAc*jw`G^WT{P*NdJ<DrA>mE&xl7h zc60C*^O;cw5o)la3|OXD@klAm>?Sg!%(yNDz<xK5#0W0W2y-G*Y}l5U<B-fFq#%!Z z%vxG^$;!1XOE*QD=aLBfH6VXi%)eqlj(JTZ5NMusuFF3QvjexY6)2fH9+oLP*b3bx z*f=~7GVF~3xBWZ}LbCpeDW}9Z5qoXf3T-ZQNezua+rn1(ED63~!cv$)prx2c<N}z2 zB|JQ>lJx;X>(mqkT1tWwLH;g|Bru3@C*iIv5d^aCY#`sk^IQ~Z)TbWR!77eo0*<7A zx0?a#)#Jt3D~&;-S{fmIBU|CV%|-g@d~EqLLEyUwO|X_JL@c6iyN!F#iKr7Q#+FBy zvwAsrRU5WK+0{Hq3yXFf$u3U4$4W@Go5{nguE-YEwkzpe6JtZk{%-Mj-X29-imGN! zFsM3)MRaWAxc5rc>=bzT+q!ql4G?VfT&Bf0t2m^JZxIcuf_;0eLUdDHvB;yHqi6u$ z5Y#sNwP*kZ5VUuL1Ol*#fT@wcC=hi0PPRhs3O70f!D<qj7Kx#W$fE2(3z+KPVGIEh zz-zvKHOrHPvsx+$v<qKDUdK{U*uQrS30!A7IUv;V|I_}Z&2X*!F*M|s&Ne>w>Dlr3 z={q+1<-J~faUdfzo6=*QWODLr{E?(@leWdu?><dWoz0u`qyxelk0a<){O4;}u>5)O z;6X)2#kX(Y`kxld(i2WJKRf=(sEPG{>}l0-@`0xsQ(tD8OTX=MoHC0v{r%y_(k=he z)}Q~$_AxXadNn^REG*Gk)R&R@qo6-qt>4?lrM<J_mQW}>Qhhf*IoX-X%)d1Lt6u)( zHXVBI1zsks`Fv@(D%0V~{vn^VsDRo^=@FCF=#%3++V}YJWANta)55c#3(gp~p6`J9 z*+mn5CyR#JYp1`R-Y@j1x?^-S{ozC7V;?vj5ymGV^wKM{OJ$;{l{~yIlZ8GnPJgR= zy!Ro9N6cQZW}vM&pfYAvbYbBAEs^vw+rgsAAAVz>jqA@%b|?6>224wh+b#`HjSm+V zuQNuUb3a-{Fjdy?Jl*`_!f=x5MZjduNZ8_<!m-v*1-7}eo-gO`Bqc4LT0lSa;twb( z8m?XsI(9Va<;wi^SouHUd-I6XGk@uX>BZjz&ACTiQ=j$AqIDg!S)x<pzq$Z<TBR8* zW@D4%ll{2>%X9Fm@qo{h{@gPnfUK)Bwx_43CfPh-;6sk1v)?F;6;8`0k9c_W`Fq)! z&(~D$DG5dbCO@^QELyj(N+VAo(Ek$pv-RTOs#UAHrcZ$E^!sGl$k#Bv-%m0RJ=~S- z-yMGh+>M)#)Y-N>nSN>P^SS9iugw>)J(ZT1?dvBS@6jt9sY{-o>d`Cu<JxJSv&`1k z7L}X$_HbG@Rva+i6?>-muFPzr_x{5q-@X<R_e?SIQL>o7`^E3(qdvvbv-p78$-&}) zuD8)${-c$8#T(A_?UVV8cHY``D4%mL&e7poYx&#vTaTmaoG%TP-ILv<RaPgNPQr}p z>S~oyNBeHbpIv6-hbRA4yE8sMPCj(r<<XCjWr{TQcbQL%)|s0-|4#n(<N4tA@df*L zCT$omLG+XpD4Ocy?}H(KeftYpOCbj?{r-65=-SU+tBsAvM&9poE;#-82f428qwvV( zs=bfG$Jmy6)#r3;s;Uyr17xF3&gK_>9d7bDz+z2(xjYBW<zI&?f)TRAhJBa*w1+^J z-@0{chJQ_27&-ak4^dI9ATzt5uX_DpOTn3bi9}Lgzpwv^c~5oXBp8d-Fb6D?djfJh ziW39YJ}IYUWW%PLv(Th=a`f7@YoDK=8~l*d0Y(l$GEYrSeS80~&A0!F;gRao(rU<z zo}`OEZfsdxG5l;S^*@;*YybZJEY{uRWOHt}<l21g>l)^*lQkz!ocICT`vSP<`1DZX z^m_8Bl92ZNRM*=Fwy|bYKTBuKQW?223A<p8PwNJm#w@tn>%SS1KQXHgBn8OE$Jzpl z`yI_?e>(Qe$Q)aj>s}dS8%sY^w6Pb`Pvg?hmy6D|to|~IF0ONc^vqz>x$n)n0n;+0 zqear`8BZd&XCKHrG5PyDBvRYMWPjg3KigL#_n*#fy?o}2nAaNZ?d{#yR4f~vZQN2T zdkZy1$OVYjpIdCg`SP>9^kDJyz(*l!=lY`cR{|>BoN~SG`#U1~o1V6bxIHM=;N&iI z>3{usAob&WUYiWfo1x01@o{l+@$s;lHFb4$kQU9OnXpCdS3OlKaaj_?`aSUR{bbo5 z<2z8>##%2v?GFR-0qK}DF!CP{r?c%P!L1C*zXR9T|M)Xr6VU49Sa_n~TYAW1-`?h< zPpigm+O96yxamM-a<6pa@*J}dPL7GczkkXD=UbmT{`R_}(D>xQSVG%W`k-Ym#1X6K zoGn{^rl2e4fNycHDHBfi*rHuQ-hNR<%QOO}PB#6+9_?$$`+W9*&#X(oDzs-jd*Sx_ z`l#&F2@emCX_?fzu>RMG*(OHKlEyJ^EX@1#<z1h)`S{ks_y5pY`NR@!C2cJ&|M5!a z-;F<8T3R4oj$YV$-0y(5Nc)i`7jAS924{6VOizD1Isf9|o=<r`&d<+&pBU}GSffAV zI#a`MWcMI>Y~bVGj{8Z!)O(H~zl&VxmH!@mes;Dj6AJ01*^usZ55v~T)b#T?!U@xt znVbEbE!u=6MkwqPx=+!##~``#Q#FZ9294iOa$pe;`uLmsj`X$`UYsm)${76AS$Y0! zuZHYrh(fV%l~L2wk8{NfzE3Lq&uMIFVf7fc#m2-;n1y(Wm;PBFP&3r=`gxIh^^CbE z3P&5!QgG0Dz|Y6Q*;@ME>u6zx^v_U7;q+;ANj6M-ZG;=Ka-FYa434MPvEp=}&jq<@ z=3~;aw!w+Rjj-`1=eDjtJ8-@$+Hu{(_{JGIzBMz4hldL_EE1BEcKW>ZGInS?a)iaL zGCJCneQ+pYM7mW^ZA19dhoG-$ER4(L+814Uc`wHOV}ql!G+=rxpy7o&AzsWsp)1o( zb3b+J-xoEpqyGLse}D4viS&K{{(WA{DO<<kMW(+0`LIX*hxf!*_3hZTD^C{d;5O># z*7D^wQ(1Pr9p}&%z-fQ+f>!y=r%fhImeP7^wg>%(*0IQdsg9f5x}ba)M!NTfiXgeq z;u`^#V%bFhhI4nCHJ;n$S0^9*U~E!1c|fJAv$Ipre<%!6ut{n~RkbXxRO9EK8*F_U znMBz#xW2LJ$$_TP^cWaF=a-%}YLS&)EEp6nzg%!Z^?{TaQlw!h8NX0dR#w)k7c(+a zKX*j>+*9LcUxV*&sfoGKym=jbeCbDlODSOL+!~TLVJ6Es7si4df~<eOm~&>N_Z`%% zbN=qIp&K&n4{D?(CnQY%s?<XQ7C5xmC=K>s(@yG5uv`UuZ4i#`#>vsAMH4G)E<7Ea z<E^YMm?-*e)>ElFxOOs8W$}jda60%+{*IQ;viyT1WwD{AKyfuRG(=B(IA`C=&evDm z(!QvncoGUqYft#toy0^b9AmIsnw_CWG(;G;ewefrMw(4LdV#R~-KRRg6<!=GlK%d5 z&_{Y`|NaR$O&q4|O0<(N)lHTa`K)?c24&N?`271F#`fcs{r8>;1RUAaI2gP?wtSJ< zrStubPt2?dSfs#yxNS7n@K4Ub1=%mqdDh<@h6;J3<qLe^6whqk;u3AF{ryAE{jrlX z2VV?Ve-YnFP9B^Ls<T-9jbUuuG&bvE?AY!`h4l3FHOHI(gL9qid?{zlUfN?mePQI? zmdpTqqUzvFejtrb{|y^Lr_k^0r{j5}vS)DSe(j2NUfeT>vZZI^rpnr}CylSKFY2Eh z<~ASlAMa}2vj5lz!M))n%viWZOm%2X_evj|Kst`fws%P<$HvTN|A2A|BAH#I7A80n zt`llkukTcYfPS!`epI+O`}vQ8qR3k&r#j!5PmkBUG_UyaewVo)+zLXaH^LHYjtow= zJRUmU`eGa|G(V&h+#1uomc1M2EZ*>8vN4ZV`9su`<m&v1fL?iGi>Dq8M!teK7iUg0 zD*k=qBupRV{vM_^N3EH!iAx)t$tW@YR^UDGDR1!AwfX4r(KL9eC-L0QyCx^$PwT15 zRZ!=X@7|pPW8HAmflbgYiHE|N>;I}9GSOU?>HW|6!#3rkZAE?o(thXXMJDFv)Af6I zeLnem5$yO6kg;&F>gn!=!xcS0i)uEu<)3uB5V>vv+sg~R&L$-!tpEQr^Wy%ag)fM$ zHfSN=7@~6U#rd4!up>1|(x0VsOmlf!@a34Trg5MEE|_glm7HSGD^cO}oi;eHBmGD2 zD+G+&!VL-Pz&Zb}4>@j=V}pZJuh1L7VmQO#04x}5d;cWaaPJ#--q*HouU>8Hr|vs@ z(B0|Th2d_<_(dk1l>UCW0j`A=fP1vVl*-<{d*_(_VasNDc1_PRtv2?8n&sbnFXY!$ zZ~xu4wKbLfM_paNhL}&agXQ|sQE*$4c1N;_aIbgrH#k!4_tUvyl9X8)t2b6OqdVrn ziSaT${~Ob7ua|q7Kgqz3wQy_R%u32V+GO_U%Vo_MlNnFpqVFv2bL^7pNL*m+WsW`8 zNcQ{f^xga_#e126N`O0GAuso2NBL-f(;OLM?9<vZX)#{(<QXhYh=zZDTApdwZfOWf z<NO!iti|WqAF^8VPj)Pg+^}ab;q|N!fCl`33cg9|rB+1^>s&|urM34qHWaH9g02zy zoWA<Ut*@O&r5JWBT!jYxpbPV2cxMlJSF)Mk(ATmv{f%d&-4P3n(M#qa+_vFr9pCfM zz69_?cx2b$*tIusDanDG3R*3C-`&^s4`f4Os7W$~dX*uqZ#oJU3TDF<|M}4i^wQ9? zgROqOwfC}LNBY6p`*3Gs^S|!42cIg*gT4R#^7pnzHRcna59h)U6#9<-Krd&levpYD zKYnZ!ZY7h@CnhFFOIsUiS|51U3Lh`<&7nKxHJA&RkP83<)X8(dIx9e=#%%JKzrPHw zBn5C2>ubnt%5vgLN1J?_|9d?8v9vkByZx2?fgx>BS<sj9f8RPSoYL;9gc;+HP5ho5 zDx82Tu0Nb<1x5ZpzgY6D4WIZ<4ZRj_Yvn32wzwyg7Vf1zoqyG)e>&&<9FkpHb<MIX z&a1vV_C8ddQ8tfh;j-@i%BceHZ^vlyc%T;H+qaj~@W61E5y=i{k#OB?C(c?5q7wxl z$n!dA`M}ZHslt0Y@V;dDADy!#LOw_R7JP5V)XC@QtMKs3+U@c=`UX6FXY_A)AAt|I zU@M3iE+iJvUFtDntV5&t?;OynTqlw1d9#(RQ0e_kt|ux)jGf@-?}zEYS78g{@x!%D zF$O3EAc6h6t=MZ(HlegZ0EDJ9$}tGPoM(_~Ar6n1SRgThcen#xA06Necd#wF@%YM= z9H1vG;q$!`ngc*{+$gpGC<(6Rz{D~db9n?l0w^L&3BLgdH$$K~ltMBK;*fhh94W1a zrjCVi;Dt_lyyU>kweV8XzLL>x1!N!I3n=&PZkE4rq-Aet>SkQcG%)GCFxwTVH<X15 zanj#RL2km<nd|ZLd_1CQ3M9=Jj@o*z=CaLf%N7ZL#XDw{G7mo&_-~-|yogMqZb;@n zn5h87j~nS2SO+4L_z&>kafs$2EaLi=(*V}5-bQ&Pd;(ZMjU#Om)2@n$U%}1ODvtlb z#3fjy1SsOzaych;+bOCGUS7f?R-K$OcDEG)C>%i?zG9b~C2-TYS3prqf{9u@{5?*- zD`+P~iTM-p_<xei`6<Au#XCi%g8V`((gzH84orpENA;0CizDQm+6*Dp9k43|dakNH z)Ei*hj72isNNK(l1Q?H$B_5W2z|Zz6(jM_k)kY1-R%JNS?Mj-B34^o`aJVMn2bHr3 z4Me6LDMYsf$WSF9{gRxbetOY#qQ!hZN&$<AZ<h*EY*ZXyL5Bn750!<~As|bEiNosy z#|spxE&FBu!fcz%A|lY_?9(2Wb@y{n%4hRo(lYyQD|Pg1)P!|o4OcUKE6Qr}86oBT z8*uC5J4L0hCYN~PNS#1o8^$9pIN(P;NVo_*(D(vilTlF3dHBn~LTYn0JC_Mk%!O3i z7Ppt7DA7s+qBR<2H7rMumuoGl|TftAtc#TQ^HcQ5i60_kl|WcGk&*6f^|*go#tX zN%(fE5%}ZSu+4@D5WT@D(Mtyw72G#~Jq^_9F-q8#`M?u90Z&XvdE%<C9K5{&xsHia z%81oL1_>G<o;V@3zLI70o<OVX;ovp($#pu|u-ox?O$NvlnahAzaseS|JuraCdm{;6 zwhI_-A@!A&+YNo5^&TK3VP8L<$4o12S8_c}U$zw+rV8BSSxVR|0xcDIKt>$0^_qAI z8r?juW(AS?>Mf1%xQ@BZf<1S+keXJ-vKhdUhEC@5we-pJjCj`Dfwu<hHj1=V0LR0C zJZ~w_dLht^6_Q|-xL+rv)~4oMy((Vf2~^rGQ52IvE9v6kM_?wNXMI(|7qcm0VnrI? zjfDFJIsp+TFo9E&V6<(ZB&6z7BCG>(Bujk++X3Sgp0yOlhvE^%P;u%`P|Bo)0dM9F z@^9JnWe~hCFpXWS$$Ow1&;V4Q5rbqj4>}w)=y0&m4oAbud~C)MdKw9BhTwU=UbK~3 z0-Z!4-FQGBDbn78e?g@z8(^W`S0h-LI_5qL_HJ;62+eKx`Ak({hzZG}DBuV~50h|T zQ*(egOx5?Wq;3ZKnF@WIQZ=F;S2K7%GaQyA4VY{y+T!6uizgUu@vIBcrJ8nIDWTmU zC+G&1yKcvZ-2>JUKs8X|m2n`zZul3lfSyxIJcG292t6he^q5#^k4cxCkA-1XUK!QH zGH@p)40zK@ASD6Rn<3&<^AKHVTOrW4N(EY+IwztGgAE%5{5p@&53j|B`K)CrC_^4O zW5Z~M2sV6++dFHv3kio3`8%<h-xg+LLS5-l;f!LPh$+gHu<dLkpd?c^bX!5c4TNu3 zu|9CHnNAWu#=edTMV0c32CX@+3$*6aIr-Q}VIl!ETPI*#vz7@(b>0yYju0Xeti?IJ z1?^UD^kW2Gw3d)+@rL2>3+)W@p)*KEJA>uzN>oyJNsuD+5b+AGX*p0-=YexqV@3L= zSY5n|BPPd?thJe4Rz)L3&5MXu=Bcd)MZS_?D5^Q9^07A2B7Fs{t{Fh1Rm+5;dIJ`b z5hBt*gw@po@f~h-wAXnVBC23GoW?oez<M0gu|!}6yUHR=q`w5K%kr?q_<CjO5V9=d z@e0iAz_Md?H-WUW2OaIR0(~Ah#c&Yd94tT@&{WsrsVRl%Vm7<cm7~NN$uK_Tm6bxs zx&!h$p(6c-*vy?EuL@HMJo`C9DyHil6B-ppurPLOxGsgzCkAXDT@eX3V>8!*{fvh> zP*hjKx~I^)OQ5B^5)#`^kLv<I-w=V-pp9<!o7vHCXaoySNiY;ucSvmN_U;lTLe^KX zN=PnW87*EJ2d*&VkjkZk&`yYtB?*S2S_W2$dJ*;}JbP$iqL7ko*%6iPN`X)UkY$3+ zuwqnGN+uC{>fp*AcRJcr4=z>1co~wfZ)MMhQfI;S%(|hNr3vfgN#VA8LZ==AWZV*Q zaM+{nAXUqRqN)p)P*BZzMpr@nv;nz$nP39)PJD{d7>aYyfQjl7mPwqr54sna3vtLS zBC`#$Su9{UIAi+*AX0q_*F}#Pjp|g76bWqK&wzb}v3|!q9-%@5_W{%obuD68{6K>^ zMCdUS(iQWz@n^X=iw!`eP<A9C32IF6&h9~Zb?^>FRH*&|FQVb97%W*7-dRIV*FeXK z{LNLtD`uhcR_~+Ax2=KxFou|}1=%$-2HX##jf?bi<wCT1C$iG8s3Rx0o}q8ohQ1tz zx_N7L@CqC%?i$N<v(|uK1jc6LHm*wx52^}g#a0h`xDl!|Dg#H+eJfYg?3#>pAT&rG zPyVBxk~q1}9rL~1tmmWRp6Tj6B_>!{8~B+}fk=M@6~e5VYvfSNgH8ixU6}#-$^z7R zi>Er>B}6$k%B1sdcrq$Zjg0Ck5y-{g=zS3B-<FHtR}|#%2`CKu*K4~=CT5`O)t&9Q zE{Srbp=vg64J1@PMOjl4Jc>huw@Yg7DanxQ-*>;fJcEG-ViDbU2g#JXpW>S<3AUF< z|H!c{!rm7JKajX@Eji||e<mvvkV+-gDluJ&lKIyaP2G!hni+~-;LqrFWYRx!uj$K; z99-nCxb7=wF}Jg!?qG&iUZO8FmRqGL9m~p*BbAo+NW>XI7avo%_YKQro80Pu5g(Zw z<?HClKiOT<E(gH#`i-8F7jmt4yn@Q#s>uVaxRQuCs-XA>-s>*0ln?jb7UsmvMm@E7 zx`FF5EmzO14&}O>L)Bw`sn;^w<Y{E#Us)C0CQlBrV4^x0{ZZMm@Z_TejK!MfITvSQ d*n!itcI)4n(pa|)e*cESY_;CGDcg$8{~u{f8sh)} literal 0 HcmV?d00001 diff --git a/.claude/skills/prepare-presentation/prepare.py b/.claude/skills/prepare-presentation/prepare.py new file mode 100644 index 0000000..ffd1f36 --- /dev/null +++ b/.claude/skills/prepare-presentation/prepare.py @@ -0,0 +1,273 @@ +"""Prepare-presentation skill: copy, inspect, name, and templatize a presentation. + +Usage: + poetry run python .claude/skills/prepare-presentation/prepare.py copy --source <url_or_path> [--title <title>] + poetry run python .claude/skills/prepare-presentation/prepare.py inspect --source <url_or_path> [--slide <index>] + poetry run python .claude/skills/prepare-presentation/prepare.py name --source <url_or_path> --mapping <json> + poetry run python .claude/skills/prepare-presentation/prepare.py templatize --source <url_or_path> +""" + +import argparse +import json +import logging +import os +import tempfile + +from gslides_api.adapters.abstract_slides import ( + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractTableElement, + AbstractThumbnailSize, +) +from gslides_api.agnostic.element import MarkdownTableElement, TableData +from gslides_api.agnostic.units import OutputUnit +from gslides_api.common.presentation_id import normalize_presentation_id + +logger = logging.getLogger(__name__) + +# Minimum image dimension (in cm) to qualify for replacement +MIN_IMAGE_SIZE_CM = 4.0 + +# Path to placeholder image (relative to this script) +PLACEHOLDER_PATH = os.path.join(os.path.dirname(__file__), "placeholder.png") + + +def _resolve_source(source: str): + """Determine adapter type from source string and return (api_client, presentation_id). + + - If source looks like a Google Slides URL or ID -> GSlidesAPIClient + - If source is a file path ending in .pptx -> PowerPointAPIClient + """ + source = source.strip() + + # PPTX file path + if source.lower().endswith(".pptx") or os.path.isfile(source): + from gslides_api.adapters.pptx_adapter import PowerPointAPIClient + + api_client = PowerPointAPIClient() + return api_client, source + + # Google Slides URL or ID + from gslides_api.adapters.gslides_adapter import GSlidesAPIClient + + presentation_id = normalize_presentation_id(source) + if presentation_id: + api_client = GSlidesAPIClient.get_default_api_client() + credential_location = os.getenv("GSLIDES_CREDENTIALS_PATH") + if credential_location: + api_client.initialize_credentials(credential_location) + return api_client, presentation_id + + # Fallback: try as GSlides ID anyway + api_client = GSlidesAPIClient.get_default_api_client() + credential_location = os.getenv("GSLIDES_CREDENTIALS_PATH") + if credential_location: + api_client.initialize_credentials(credential_location) + return api_client, source + + +def _load_presentation(source: str): + """Load a presentation from source. Returns (api_client, presentation).""" + api_client, presentation_id = _resolve_source(source) + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + return api_client, presentation + + +def cmd_copy(args): + """Copy a presentation and print the new ID/path.""" + api_client, presentation = _load_presentation(args.source) + title = args.title or f"Template - {presentation.title or 'Untitled'}" + + copied = presentation.copy_via_drive(api_client=api_client, copy_title=title) + presentation_id = copied.presentationId or "" + + # For PPTX, the presentationId is the file path + print(f"Copied presentation: {presentation_id}") + if hasattr(copied, "url"): + try: + print(f"URL: {copied.url}") + except Exception: + pass + + +def cmd_inspect(args): + """Inspect slides: print thumbnails and markdown for each slide.""" + api_client, presentation = _load_presentation(args.source) + + if args.slide is not None: + slides_to_inspect = [(args.slide, presentation.slides[args.slide])] + else: + slides_to_inspect = list(enumerate(presentation.slides)) + + for i, slide in slides_to_inspect: + print(f"\n{'='*60}") + print(f"SLIDE {i} (objectId: {slide.objectId})") + print(f"{'='*60}") + + # Get and save thumbnail + try: + thumb = slide.thumbnail( + api_client=api_client, + size=AbstractThumbnailSize.MEDIUM, + include_data=True, + ) + if thumb.content: + ext = ".png" if "png" in thumb.mime_type else ".jpg" + tmp = tempfile.NamedTemporaryFile( + delete=False, suffix=ext, prefix=f"slide_{i}_" + ) + tmp.write(thumb.content) + tmp.close() + print(f"Thumbnail: {tmp.name}") + elif thumb.contentUrl and thumb.contentUrl.startswith("file://"): + print(f"Thumbnail: {thumb.contentUrl.replace('file://', '')}") + else: + print(f"Thumbnail URL: {thumb.contentUrl}") + except Exception as e: + print(f"Thumbnail error: {e}") + + # Print markdown representation + print(f"\nMarkdown:\n") + print(slide.markdown()) + print() + + +def cmd_name(args): + """Apply naming to slides and elements from a JSON mapping.""" + api_client, presentation = _load_presentation(args.source) + mapping = json.loads(args.mapping) + + for slide_mapping in mapping["slides"]: + idx = slide_mapping["index"] + slide = presentation.slides[idx] + + # Name the slide via speaker notes + slide_name = slide_mapping.get("slide_name") + if slide_name and slide.speaker_notes: + slide.speaker_notes.write_text(api_client=api_client, content=slide_name) + print(f"Slide {idx}: named '{slide_name}'") + + # Name elements via alt text (keys are display names from inspect output) + elements_mapping = slide_mapping.get("elements", {}) + for old_name, new_name in elements_mapping.items(): + # Find element by display name (alt_text.title or objectId) + found = False + for element in slide.page_elements_flat: + display_name = element.alt_text.title or element.objectId + if display_name == old_name: + element.set_alt_text(api_client=api_client, title=new_name) + print(f" Element '{old_name}' -> '{new_name}'") + found = True + break + + if not found: + print(f" WARNING: Element '{old_name}' not found on slide {idx}") + + presentation.save(api_client=api_client) + print("\nNames applied and saved.") + + +def cmd_templatize(args): + """Replace all named content with placeholders.""" + api_client, presentation = _load_presentation(args.source) + + for i, slide in enumerate(presentation.slides): + print(f"Processing slide {i}...") + + for element in slide.page_elements_flat: + name = element.alt_text.title + if not name or not name.strip(): + continue # Skip unnamed elements + + # Images: replace with placeholder if large enough + if isinstance(element, AbstractImageElement): + min_dim = min(element.absolute_size(units=OutputUnit.CM)) + if min_dim >= MIN_IMAGE_SIZE_CM: + element.replace_image(api_client=api_client, file=PLACEHOLDER_PATH) + print(f" Replaced image '{name}' with placeholder") + else: + print(f" Skipped small image '{name}' ({min_dim:.1f} cm)") + + # Text shapes: replace with example text + elif isinstance(element, AbstractShapeElement) and element.has_text: + try: + styles = element.styles(skip_whitespace=True) + except Exception: + styles = None + + if styles and len(styles) >= 2: + # Multi-style: provide header + body example + element.write_text( + api_client=api_client, + content="# Example Header Style\n\nExample body style", + ) + else: + element.write_text(api_client=api_client, content="Example Text") + print(f" Replaced text '{name}' with placeholder") + + # Tables: replace all cells with "Text" + elif isinstance(element, AbstractTableElement): + md_elem = element.to_markdown_element(name=name) + if md_elem and md_elem.content: + rows, cols = md_elem.shape + # Build replacement table: all cells = "Text" + if rows > 0 and cols > 0: + new_headers = ["Text"] * cols + new_rows = [["Text"] * cols for _ in range(max(0, rows - 1))] + new_table_data = TableData(headers=new_headers, rows=new_rows) + new_md_elem = MarkdownTableElement( + name=name, content=new_table_data + ) + element.update_content( + api_client=api_client, + markdown_content=new_md_elem, + check_shape=False, + ) + print(f" Replaced table '{name}' ({rows}x{cols}) with 'Text' placeholders") + + presentation.save(api_client=api_client) + print("\nTemplatization complete.") + + +def main(): + parser = argparse.ArgumentParser( + description="Prepare a presentation as a template" + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + # copy + copy_parser = subparsers.add_parser("copy", help="Copy a presentation") + copy_parser.add_argument("--source", required=True, help="Source presentation URL, ID, or file path") + copy_parser.add_argument("--title", help="Title for the copy") + + # inspect + inspect_parser = subparsers.add_parser("inspect", help="Inspect slides") + inspect_parser.add_argument("--source", required=True, help="Presentation URL, ID, or file path") + inspect_parser.add_argument("--slide", type=int, help="Specific slide index to inspect") + + # name + name_parser = subparsers.add_parser("name", help="Apply names to slides and elements") + name_parser.add_argument("--source", required=True, help="Presentation URL, ID, or file path") + name_parser.add_argument("--mapping", required=True, help="JSON mapping of names") + + # templatize + templatize_parser = subparsers.add_parser("templatize", help="Replace content with placeholders") + templatize_parser.add_argument("--source", required=True, help="Presentation URL, ID, or file path") + + args = parser.parse_args() + + commands = { + "copy": cmd_copy, + "inspect": cmd_inspect, + "name": cmd_name, + "templatize": cmd_templatize, + } + commands[args.command](args) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main() diff --git a/gslides_api/adapters/abstract_slides.py b/gslides_api/adapters/abstract_slides.py index 9ef5831..15a14c7 100644 --- a/gslides_api/adapters/abstract_slides.py +++ b/gslides_api/adapters/abstract_slides.py @@ -446,10 +446,17 @@ def markdown(self) -> str: """ parts = [] for element in self.page_elements_flat: - name = element.alt_text.title or element.objectId + name = element.alt_text.title + if not name: + continue + x, y = element.absolute_position() w, h = element.absolute_size() + desc_str = "" + if element.alt_text.description: + desc_str = f' | desc="{element.alt_text.description}"' + if isinstance(element, AbstractShapeElement) and element.has_text: text = element.read_text(as_markdown=True) try: @@ -460,8 +467,8 @@ def markdown(self) -> str: box_width_inches=w, box_height_inches=h, font_size_pt=font_pt, ) parts.append( - f"<!-- text: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f}) " - f"| ~{meta.approx_char_capacity} chars -->\n{text}" + f"<!-- text: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" + f"{desc_str} | ~{meta.approx_char_capacity} chars -->\n{text}" ) elif isinstance(element, AbstractTableElement): md_elem = element.to_markdown_element(name=name) @@ -476,15 +483,17 @@ def markdown(self) -> str: col_chars_str = f" | ~{chars_per_col} chars/col" parts.append( f"<!-- table: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" - f"{col_chars_str} -->\n{table_md}" + f"{desc_str}{col_chars_str} -->\n{table_md}" ) elif isinstance(element, AbstractImageElement): parts.append( - f"<!-- image: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f}) -->" + f"<!-- image: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" + f"{desc_str} -->" ) else: parts.append( - f"<!-- {element.type}: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f}) -->" + f"<!-- {element.type}: {name} | pos=({x:.1f},{y:.1f}) size=({w:.1f},{h:.1f})" + f"{desc_str} -->" ) return "\n\n".join(parts) diff --git a/gslides_api/adapters/add_names.py b/gslides_api/adapters/add_names.py new file mode 100644 index 0000000..77f0ce2 --- /dev/null +++ b/gslides_api/adapters/add_names.py @@ -0,0 +1,368 @@ +from dataclasses import dataclass +from typing import Optional +import logging + +from langchain_core.language_models import BaseLanguageModel + +from gslides_api.adapters.abstract_slides import ( + AbstractElement, + AbstractImageElement, + AbstractPresentation, + AbstractShapeElement, + AbstractSlide, + AbstractSlidesAPIClient, + AbstractTableElement, + _extract_font_size_from_table, + _extract_font_size_pt, +) +from gslides_api.agnostic.element_size import ElementSizeMeta +from gslides_api.agnostic.units import OutputUnit +from motleycrew.utils.image_utils import is_this_a_chart + +logger = logging.getLogger(__name__) + + +@dataclass +class SlideElementNames: + """Names of different types of elements in a slide.""" + + image_names: list[str] + text_names: list[str] + chart_names: list[str] + table_names: list[str] + + @classmethod + def empty(cls) -> "SlideElementNames": + """Create an empty SlideElementNames instance.""" + return cls(image_names=[], text_names=[], chart_names=[], table_names=[]) + + +def name_slides( + presentation_id: str, + name_elements: bool = True, + api_client: AbstractSlidesAPIClient | None = None, + skip_empty_text_boxes: bool = False, + llm: Optional[BaseLanguageModel] = None, + check_success: bool = False, + min_image_size_cm: float = 4.0, +) -> dict[str, SlideElementNames]: + """ + Name slides in a presentation based on their speaker notes. + If name_elements is True, also name the elements in the slides.""" + + if api_client is None: + raise ValueError("API client is required") + """Name slides in a presentation based on their speaker notes., enforcing unique names""" + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + if llm is None: + logger.warning("No LLM provided, will not attempt to distinguish charts from images") + + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + slide_names = {} + for i, slide in enumerate(presentation.slides): + if slide.slideProperties.isSkipped: + logger.info( + f"Skipping slide {i+1}: {slide.objectId} as it is marked as skipped in Google Slides" + ) + continue + + speaker_notes = slide.speaker_notes.read_text().strip() + if speaker_notes: + slide_name = speaker_notes.split("\n")[0] + if slide_name in slide_names: + slide_name = f"Slide_{i+1}" + else: + slide_name = f"Slide_{i+1}" + + logger.info(f"Naming slide {i+1}: {slide.objectId} as {slide_name}") + + slide.speaker_notes.write_text(api_client=api_client, content=slide_name) + + if name_elements: + slide_names[slide_name] = name_slide_elements( + slide, + skip_empty_text_boxes=skip_empty_text_boxes, + slide_name=slide_name, + llm=llm, + api_client=api_client, + min_image_size_cm=min_image_size_cm, + ) + else: + # Just get the existing names + text_names = [ + e.alt_text.title + for e in slide.page_elements_flat + if isinstance(e, AbstractShapeElement) + ] + image_names = [ + e.alt_text.title + for e in slide.page_elements_flat + if isinstance(e, AbstractImageElement) + ] + table_names = [ + e.alt_text.title + for e in slide.page_elements_flat + if isinstance(e, AbstractTableElement) + ] + + slide_names[slide_name] = SlideElementNames( + image_names=image_names, + text_names=text_names, + chart_names=[], + table_names=table_names, + ) + + presentation.save(api_client=api_client) + if check_success: + presentation.sync_from_cloud(api_client=api_client) + for name, slide in zip(slide_names, presentation.slides): + assert name + assert slide.speaker_notes.read_text() == name + # TODO: check element names + + return slide_names + + +def name_if_empty( + element: AbstractElement, + value: str, + api_client: AbstractSlidesAPIClient, + names_so_far: list[str] | None = None, +): + if names_so_far is None: + names_so_far = [] + + if element.alt_text.title is not None: + # Google API doesn't support setting alt text to empty string, + # so we use space instead to indicate "empty" + # And whitespaces aren't valid variable names anyway + # Also names in a single slide must be unique + current_name = element.alt_text.title.strip() + if current_name and current_name not in names_so_far: + return current_name + + element.set_alt_text(api_client=api_client, title=value) + return value + + +def delete_slide_names( + presentation_id: str, api_client: AbstractSlidesAPIClient +): # | None = None): + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + for slide in presentation.slides: + slide.speaker_notes.write_text(" ", api_client=api_client) + + +def delete_alt_titles(presentation_id: str, api_client: AbstractSlidesAPIClient): # | None = None): + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + presentation = AbstractPresentation.from_id( + api_client=api_client, presentation_id=presentation_id + ) + for slide in presentation.slides: + for element in slide.page_elements_flat: + if ( + isinstance(element, (AbstractShapeElement, AbstractImageElement)) + and element.alt_text.title + ): + logger.info(f"Deleting alt title {element.alt_text.title}") + # Unfortunately, Google API doesn't support setting alt text to empty string, so use space instead + element.set_alt_text(api_client=api_client, title=" ") + + presentation.save(api_client=api_client) + + +def name_elements( + elements: list[AbstractElement], + root_name: str, + api_client: AbstractSlidesAPIClient, + names_so_far: list[str] | None = None, +) -> list[str]: + if names_so_far is None: + names_so_far = [] + names = [] + if len(elements) == 1: + names.append( + name_if_empty( + element=elements[0], + value=root_name, + api_client=api_client, + names_so_far=names_so_far + names, + ) + ) + + elif len(elements) > 1: + for i, e in enumerate(elements): + names.append( + name_if_empty( + element=e, + value=f"{root_name}_{i+1}", + api_client=api_client, + names_so_far=names_so_far + names, + ) + ) + + logger.info(f"Named {len(names)} {root_name.lower()}s: {names}") + return names + + +def _is_pptx_chart_element(element: AbstractElement) -> bool: + """Check if an element is a PowerPoint chart (GraphicFrame with embedded chart data).""" + if not hasattr(element, "pptx_element") or element.pptx_element is None: + return False + return getattr(element.pptx_element, "has_chart", False) + + +def name_slide_elements( + slide: AbstractSlide, + slide_name: str = "", + skip_empty_text_boxes: bool = False, + min_image_size_cm: float = 4.0, + api_client: AbstractSlidesAPIClient | None = None, + llm: Optional[BaseLanguageModel] = None, +) -> SlideElementNames: + """ + Name the elements in a slide. + :param slide: + :param slide_name: Only used for clearer logging, for the case when the caller has changed the name + :param skip_empty_text_boxes: + :param api_client: + :param llm: + :return: + """ + if api_client is None: + raise ValueError("API client is required") + # api_client = api_client or AbstractSlidesAPIClient.get_default_api_client() + all_images = [e for e in slide.page_elements_flat if isinstance(e, AbstractImageElement)] + + # Also find PowerPoint chart shapes (GraphicFrames with embedded charts) + # These are separate from images in PowerPoint (unlike Google Slides where charts are images) + pptx_chart_shapes = [ + e + for e in slide.page_elements_flat + if _is_pptx_chart_element(e) and not isinstance(e, AbstractImageElement) + ] + if pptx_chart_shapes: + logger.info( + f"Found {len(pptx_chart_shapes)} PowerPoint chart shape(s) in slide {slide_name}" + ) + + # Sort first by y, then by x + all_images.sort(key=lambda x: x.absolute_position()[::-1]) + pptx_chart_shapes.sort(key=lambda x: x.absolute_position()[::-1]) + + images = [] + charts = [] + for i, image in enumerate(all_images): + if image.alt_text.title and image.alt_text.title.strip(): # already named + continue + + if min(image.absolute_size(units=OutputUnit.CM)) < min_image_size_cm: + logger.info(f"Skipping image number {i+1} in slide {slide_name} as it is too small") + continue + + if llm is not None: + image_data = image.get_image_data() + if is_this_a_chart( + image_bytes=image_data.content, mime_type=image_data.mime_type, llm=llm + ): + logger.info(f"Identified image number {i+1} in slide {slide_name} as a chart") + charts.append(image) + else: + logger.info(f"Identified image number {i+1} in slide {slide_name} as an image") + images.append(image) + else: + logger.info( + f"Assuming image number {i+1} in slide {slide_name} is a chart as no LLM provided" + ) + charts.append(image) + + # Add PowerPoint chart shapes to the charts list + # Always add them - name_if_empty will handle deduplication if they already have names + for chart_shape in pptx_chart_shapes: + charts.append(chart_shape) + logger.info(f"Adding PowerPoint chart shape to charts list in slide {slide_name}") + + image_names = name_elements(images, "Image", api_client) + chart_names = name_elements(charts, "Chart", api_client) + + table_names = name_elements( + [e for e in slide.page_elements_flat if isinstance(e, AbstractTableElement)], + "Table", + api_client, + ) + + text_names = [] + + text_boxes = [e for e in slide.page_elements_flat if isinstance(e, AbstractShapeElement)] + + if skip_empty_text_boxes: + text_boxes = [e for e in text_boxes if e.read_text().strip()] + + if not text_boxes: + return SlideElementNames( + image_names=image_names, + text_names=text_names, + chart_names=chart_names, + table_names=table_names, + ) + + # Sort first by y, then by x + text_boxes.sort(key=lambda x: x.absolute_position()[::-1]) + top_box = text_boxes[0] + text_names.append(name_if_empty(top_box, "Title", api_client, names_so_far=text_names)) + + other_boxes = text_boxes[1:] + text_names.extend(name_elements(other_boxes, "Text", api_client, names_so_far=text_names)) + + return SlideElementNames( + image_names=image_names, + text_names=text_names, + chart_names=chart_names, + table_names=table_names, + ) + + +def _extract_font_size_from_element(element: AbstractShapeElement) -> float: + """Extract the dominant font size (in points) from a shape element's text styles. + + Thin wrapper around _extract_font_size_pt from abstract_slides. + """ + try: + return _extract_font_size_pt(element.styles(skip_whitespace=True)) + except Exception: + return 12.0 + + +def _extract_font_size_from_table_element(element: AbstractTableElement) -> float: + """Extract the dominant font size (in points) from a table element's first cell. + + Delegates to _extract_font_size_from_table from abstract_slides. + """ + return _extract_font_size_from_table(element) + + +def _build_element_size_meta( + element: AbstractElement, + font_size_pt: float, +) -> ElementSizeMeta | None: + """Build ElementSizeMeta from an element's absolute size and a font size. + + Returns None if the element has zero or negative dimensions. + """ + try: + element_size = element.absolute_size(units=OutputUnit.IN) + width, height = element_size[0], element_size[1] + if width > 0 and height > 0: + return ElementSizeMeta( + box_width_inches=width, + box_height_inches=height, + font_size_pt=font_size_pt, + ) + except Exception: + pass + return None diff --git a/gslides_api/common/presentation_id.py b/gslides_api/common/presentation_id.py new file mode 100644 index 0000000..75a1098 --- /dev/null +++ b/gslides_api/common/presentation_id.py @@ -0,0 +1,30 @@ +"""Utility for normalizing Google Slides presentation IDs from URLs.""" + +import logging +import re +from urllib.parse import urlparse + +logger = logging.getLogger(__name__) + + +def normalize_presentation_id(presentation_id_or_url: str) -> str: + """ + Extract presentation ID from a presentation ID or URL (e.g. "https://docs.google.com/presentation/d/1234567890/edit?slide=id.p1"). + """ + presentation_id_or_url = presentation_id_or_url.strip() + + if presentation_id_or_url.startswith("https://docs.google.com/presentation/d/"): + try: + parsed_url = urlparse(presentation_id_or_url) + parts = parsed_url.path.split("/") + idx = parts.index("presentation") + assert parts[idx + 1] == "d" + return parts[idx + 2] + except (TypeError, AssertionError, ValueError, IndexError) as e: + logger.warning(f"Error extracting presentation ID from {presentation_id_or_url}: {e}") + + # check if a valid presentation ID is provided + if re.match(r"^[a-zA-Z0-9_-]{25,}$", presentation_id_or_url): + return presentation_id_or_url + + raise ValueError(f"Invalid presentation ID or URL: {presentation_id_or_url}") diff --git a/gslides_api/mcp/__init__.py b/gslides_api/mcp/__init__.py index 1d69816..1be4c22 100644 --- a/gslides_api/mcp/__init__.py +++ b/gslides_api/mcp/__init__.py @@ -10,11 +10,8 @@ """ from .models import ( - ElementOutline, ErrorResponse, OutputFormat, - PresentationOutline, - SlideOutline, SuccessResponse, ThumbnailSizeOption, ) @@ -37,9 +34,6 @@ "ThumbnailSizeOption", "ErrorResponse", "SuccessResponse", - "ElementOutline", - "SlideOutline", - "PresentationOutline", # Utils "parse_presentation_id", "get_slide_name", diff --git a/gslides_api/mcp/models.py b/gslides_api/mcp/models.py index 1bb91dd..144e408 100644 --- a/gslides_api/mcp/models.py +++ b/gslides_api/mcp/models.py @@ -1,7 +1,7 @@ """Models for the gslides-api MCP server.""" from enum import Enum -from typing import Any, Dict, List, Optional +from typing import Any, Dict from pydantic import BaseModel, Field @@ -11,7 +11,7 @@ class OutputFormat(str, Enum): RAW = "raw" # Raw Google Slides API JSON response DOMAIN = "domain" # gslides-api domain object model_dump() - OUTLINE = "outline" # Bare-bones structure with names and markdown content + MARKDOWN = "markdown" # Slide markdown layout representation class ThumbnailSizeOption(str, Enum): @@ -33,34 +33,6 @@ class ErrorResponse(BaseModel): ) -class ElementOutline(BaseModel): - """Outline representation of a page element.""" - - element_name: Optional[str] = Field(None, description="Element name from alt-text title") - element_id: str = Field(description="Element object ID") - type: str = Field(description="Element type (shape, image, table, etc.)") - alt_description: Optional[str] = Field(None, description="Alt-text description if present") - content_markdown: Optional[str] = Field( - None, description="Markdown content for text elements" - ) - - -class SlideOutline(BaseModel): - """Outline representation of a slide.""" - - slide_name: Optional[str] = Field(None, description="Slide name from speaker notes") - slide_id: str = Field(description="Slide object ID") - elements: List[ElementOutline] = Field(default_factory=list) - - -class PresentationOutline(BaseModel): - """Outline representation of a presentation.""" - - presentation_id: str = Field(description="Presentation ID") - title: str = Field(description="Presentation title") - slides: List[SlideOutline] = Field(default_factory=list) - - class SuccessResponse(BaseModel): """Success response for modification operations.""" diff --git a/gslides_api/mcp/server.py b/gslides_api/mcp/server.py index 4d048a0..1c40608 100644 --- a/gslides_api/mcp/server.py +++ b/gslides_api/mcp/server.py @@ -4,7 +4,6 @@ """ import argparse -import base64 import json import logging import os @@ -13,10 +12,18 @@ import tempfile import traceback import uuid -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional, Union from mcp.server import FastMCP +from mcp.server.fastmcp.utilities.types import Image +from gslides_api.adapters.abstract_slides import ( + AbstractPresentation, + AbstractThumbnailSize, +) +from gslides_api.adapters.add_names import name_slides +from gslides_api.adapters.gslides_adapter import GSlidesAPIClient +from gslides_api.agnostic.element import MarkdownTableElement from gslides_api.client import GoogleAPIClient from gslides_api.domain.domain import ( Color, @@ -28,7 +35,6 @@ ThumbnailSize, Weight, ) -from gslides_api.agnostic.element import MarkdownTableElement from gslides_api.element.base import ElementKind from gslides_api.element.element import ImageElement from gslides_api.element.shape import ShapeElement @@ -39,18 +45,15 @@ from .models import ( ErrorResponse, OutputFormat, - PresentationOutline, - SlideOutline, SuccessResponse, ThumbnailSizeOption, ) from .utils import ( - build_element_outline, - build_presentation_outline, - build_slide_outline, element_not_found_error, + find_abstract_slide_by_name, find_element_by_name, find_slide_by_name, + get_abstract_slide_names, get_available_element_names, get_available_slide_names, get_slide_name, @@ -146,7 +149,7 @@ def get_presentation( Args: presentation_id_or_url: Google Slides URL or presentation ID - how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'outline' (condensed) + how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'markdown' (slide layout markdown) """ try: pres_id = parse_presentation_id(presentation_id_or_url) @@ -163,17 +166,21 @@ def get_presentation( client.flush_batch_update() return _format_response(result) - elif format_type == OutputFormat.DOMAIN: - # Get domain object and dump - presentation = Presentation.from_id(pres_id, api_client=client) + elif format_type == OutputFormat.MARKDOWN: + gslides_client = GSlidesAPIClient(gslides_client=client) + abs_pres = AbstractPresentation.from_id( + api_client=gslides_client, presentation_id=pres_id + ) + parts = [] + for i, slide in enumerate(abs_pres.slides): + parts.append(f"## Slide {i}\n\n{slide.markdown()}") client.flush_batch_update() - return _format_response(presentation.model_dump()) + return "\n\n---\n\n".join(parts) - else: # OUTLINE + else: # DOMAIN presentation = Presentation.from_id(pres_id, api_client=client) client.flush_batch_update() - outline = build_presentation_outline(presentation) - return _format_response(outline) + return _format_response(presentation.model_dump()) except Exception as e: logger.error(f"Error getting presentation: {e}\n{traceback.format_exc()}") @@ -183,16 +190,40 @@ def get_presentation( @mcp.tool() def get_slide( presentation_id_or_url: str, - slide_name: str, + slide_name: str = None, + slide_index: int = None, how: str = None, -) -> str: - """Get a single slide by name (first line of speaker notes). + include_thumbnail: bool = True, +) -> Union[str, List]: + """Get a single slide by name or index. Args: presentation_id_or_url: Google Slides URL or presentation ID - slide_name: Slide name (first line of speaker notes, stripped) - how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'outline' (condensed) + slide_name: Slide name (first line of speaker notes). Mutually exclusive with slide_index. + slide_index: Zero-based slide index. Mutually exclusive with slide_name. + how: Output format - 'markdown' (default), 'raw' (Google API JSON), or 'domain' (model_dump). + include_thumbnail: Include slide thumbnail as image payload. Default True. """ + # Validate slide_name/slide_index + if slide_name is not None and slide_index is not None: + return _format_response( + None, + validation_error( + "slide_name/slide_index", + "slide_name and slide_index are mutually exclusive", + f"slide_name={slide_name}, slide_index={slide_index}", + ), + ) + if slide_name is None and slide_index is None: + return _format_response( + None, + validation_error( + "slide_name/slide_index", + "Either slide_name or slide_index must be provided", + "both are None", + ), + ) + try: pres_id = parse_presentation_id(presentation_id_or_url) except ValueError as e: @@ -202,27 +233,54 @@ def get_slide( client = get_api_client() try: - presentation = Presentation.from_id(pres_id, api_client=client) - slide = find_slide_by_name(presentation, slide_name) - - if slide is None: - available = get_available_slide_names(presentation) - client.flush_batch_update() - return _format_response(None, slide_not_found_error(pres_id, slide_name, available)) + # Always load via AbstractPresentation for unified slide lookup + gslides_client = GSlidesAPIClient(gslides_client=client) + abs_pres = AbstractPresentation.from_id( + api_client=gslides_client, presentation_id=pres_id + ) - if format_type == OutputFormat.RAW: - result = client.get_slide_json(pres_id, slide.objectId) - client.flush_batch_update() - return _format_response(result) + # Find slide + if slide_name is not None: + abs_slide = find_abstract_slide_by_name(abs_pres, slide_name) + if abs_slide is None: + names = get_abstract_slide_names(abs_pres) + client.flush_batch_update() + return _format_response(None, slide_not_found_error(pres_id, slide_name, names)) + else: + if slide_index < 0 or slide_index >= len(abs_pres.slides): + client.flush_batch_update() + return _format_response( + None, + validation_error( + "slide_index", + f"Slide index {slide_index} out of range (0-{len(abs_pres.slides) - 1})", + str(slide_index), + ), + ) + abs_slide = abs_pres.slides[slide_index] + + # Format output based on `how` + if format_type == OutputFormat.MARKDOWN: + result = abs_slide.markdown() + elif format_type == OutputFormat.RAW: + result = _format_response( + client.get_slide_json(pres_id, abs_slide.objectId) + ) + else: # DOMAIN + result = _format_response(abs_slide._gslides_slide.model_dump()) - elif format_type == OutputFormat.DOMAIN: - client.flush_batch_update() - return _format_response(slide.model_dump()) + client.flush_batch_update() - else: # OUTLINE - client.flush_batch_update() - outline = build_slide_outline(slide) - return _format_response(outline) + # Optionally attach thumbnail + if include_thumbnail: + thumb = abs_slide.thumbnail( + api_client=gslides_client, + size=AbstractThumbnailSize.MEDIUM, + include_data=True, + ) + return [result, Image(data=thumb.content, format="png")] + else: + return result except Exception as e: logger.error(f"Error getting slide: {e}\n{traceback.format_exc()}") @@ -242,7 +300,7 @@ def get_element( presentation_id_or_url: Google Slides URL or presentation ID slide_name: Slide name (first line of speaker notes) element_name: Element name (from alt-text title, stripped) - how: Output format - 'raw' (Google API JSON), 'domain' (model_dump), or 'outline' (condensed) + how: Output format - 'raw' (Google API JSON) or 'domain' (model_dump) """ try: pres_id = parse_presentation_id(presentation_id_or_url) @@ -274,13 +332,9 @@ def get_element( # For raw, we return the element's API format return _format_response(element.to_api_format() if hasattr(element, "to_api_format") else element.model_dump()) - elif format_type == OutputFormat.DOMAIN: + else: # DOMAIN (also handles MARKDOWN since element-level markdown is not distinct) return _format_response(element.model_dump()) - else: # OUTLINE - outline = build_element_outline(element) - return _format_response(outline) - except Exception as e: logger.error(f"Error getting element: {e}\n{traceback.format_exc()}") return _format_response(None, presentation_error(pres_id, e)) @@ -1083,6 +1137,63 @@ def copy_presentation( return _format_response(None, presentation_error(pres_id, e)) +@mcp.tool() +def add_element_names( + presentation_id_or_url: str, + skip_empty_text_boxes: bool = False, + min_image_size_cm: float = 4.0, +) -> str: + """Name all slides and elements in a presentation. + + Names slides based on their speaker notes (first line). + Names elements (text boxes, images, charts, tables) with descriptive alt-text titles. + The topmost text box becomes "Title", others become "Text_1", "Text_2", etc. + Images and charts are named "Image_1", "Chart_1", etc. + + Args: + presentation_id_or_url: Google Slides URL or presentation ID + skip_empty_text_boxes: Skip text boxes that contain only whitespace + min_image_size_cm: Minimum image dimension (cm) to include (smaller images are skipped) + """ + try: + pres_id = parse_presentation_id(presentation_id_or_url) + except ValueError as e: + return _format_response(None, validation_error("presentation_id_or_url", str(e), presentation_id_or_url)) + + client = get_api_client() + + try: + gslides_client = GSlidesAPIClient(gslides_client=client) + slide_names = name_slides( + pres_id, + name_elements=True, + api_client=gslides_client, + skip_empty_text_boxes=skip_empty_text_boxes, + min_image_size_cm=min_image_size_cm, + ) + client.flush_batch_update() + + # Convert SlideElementNames dataclass to serializable dict + names_dict = {} + for slide_name, element_names in slide_names.items(): + names_dict[slide_name] = { + "text_names": element_names.text_names, + "image_names": element_names.image_names, + "chart_names": element_names.chart_names, + "table_names": element_names.table_names, + } + + result = SuccessResponse( + message=f"Successfully named {len(slide_names)} slides and their elements", + details={"slide_element_names": names_dict}, + ) + return _format_response(result) + + except Exception as e: + logger.error(f"Error naming elements: {e}\n{traceback.format_exc()}") + return _format_response(None, presentation_error(pres_id, e)) + + # ============================================================================= # MAIN ENTRY POINT # ============================================================================= @@ -1100,9 +1211,9 @@ def main(): parser.add_argument( "--default-format", type=str, - choices=["raw", "domain", "outline"], - default="raw", - help="Default output format for tools (default: raw)", + choices=["raw", "domain", "markdown"], + default="markdown", + help="Default output format for tools (default: markdown)", ) args = parser.parse_args() diff --git a/gslides_api/mcp/utils.py b/gslides_api/mcp/utils.py index e8ba946..731afa7 100644 --- a/gslides_api/mcp/utils.py +++ b/gslides_api/mcp/utils.py @@ -1,14 +1,15 @@ """Utility functions for the gslides-api MCP server.""" import re -from typing import List, Optional, Tuple +from typing import List, Optional +from gslides_api.adapters.abstract_slides import AbstractPresentation, AbstractSlide from gslides_api.element.base import ElementKind, PageElementBase from gslides_api.element.element import PageElement from gslides_api.page.slide import Slide from gslides_api.presentation import Presentation -from .models import ElementOutline, ErrorResponse, PresentationOutline, SlideOutline +from .models import ErrorResponse # Pattern to match Google Slides URLs and extract the presentation ID # Matches: https://docs.google.com/presentation/d/{ID}/edit @@ -90,20 +91,6 @@ def get_element_name(element: PageElementBase) -> Optional[str]: return None -def get_element_alt_description(element: PageElementBase) -> Optional[str]: - """Get the alt-text description of an element. - - Args: - element: The element to get the description from - - Returns: - The alt-text description, or None if not present - """ - if hasattr(element, "description") and element.description: - return element.description.strip() or None - return None - - def find_slide_by_name(presentation: Presentation, slide_name: str) -> Optional[Slide]: """Find a slide by its name (first line of speaker notes). @@ -193,77 +180,6 @@ def get_element_type_string(element: PageElement) -> str: return "unknown" -def get_element_markdown_content(element: PageElement) -> Optional[str]: - """Get the markdown content of a shape element. - - Args: - element: The element to get content from - - Returns: - Markdown content if it's a text element, None otherwise - """ - if hasattr(element, "type") and element.type == ElementKind.SHAPE: - try: - # Try to read text as markdown - if hasattr(element, "read_text"): - return element.read_text(as_markdown=True) - except Exception: - pass - return None - - -def build_element_outline(element: PageElement) -> ElementOutline: - """Build an outline representation of an element. - - Args: - element: The element to build outline from - - Returns: - ElementOutline representation - """ - return ElementOutline( - element_name=get_element_name(element), - element_id=element.objectId, - type=get_element_type_string(element), - alt_description=get_element_alt_description(element), - content_markdown=get_element_markdown_content(element), - ) - - -def build_slide_outline(slide: Slide) -> SlideOutline: - """Build an outline representation of a slide. - - Args: - slide: The slide to build outline from - - Returns: - SlideOutline representation - """ - elements = [build_element_outline(e) for e in slide.page_elements_flat] - return SlideOutline( - slide_name=get_slide_name(slide), - slide_id=slide.objectId, - elements=elements, - ) - - -def build_presentation_outline(presentation: Presentation) -> PresentationOutline: - """Build an outline representation of a presentation. - - Args: - presentation: The presentation to build outline from - - Returns: - PresentationOutline representation - """ - slides = [build_slide_outline(s) for s in presentation.slides] - return PresentationOutline( - presentation_id=presentation.presentationId, - title=presentation.title or "Untitled", - slides=slides, - ) - - def create_error_response( error_type: str, message: str, @@ -374,3 +290,45 @@ def validation_error(field: str, message: str, value: str = None) -> ErrorRespon message=message, **details, ) + + +def find_abstract_slide_by_name( + presentation: AbstractPresentation, slide_name: str +) -> Optional[AbstractSlide]: + """Find an AbstractSlide by speaker notes name. + + Args: + presentation: The abstract presentation to search in + slide_name: The slide name to find (first line of speaker notes) + + Returns: + The abstract slide if found, None otherwise + """ + for slide in presentation.slides: + if slide.speaker_notes: + text = slide.speaker_notes.read_text() + if text: + first_line = text.strip().split("\n")[0].strip() + if first_line == slide_name: + return slide + return None + + +def get_abstract_slide_names(presentation: AbstractPresentation) -> List[str]: + """Get slide names from an AbstractPresentation for error messages. + + Args: + presentation: The abstract presentation to get slide names from + + Returns: + List of slide names (or placeholder for unnamed slides) + """ + names = [] + for i, slide in enumerate(presentation.slides): + name = None + if slide.speaker_notes: + text = slide.speaker_notes.read_text() + if text: + name = text.strip().split("\n")[0].strip() or None + names.append(name or f"(unnamed slide at index {i})") + return names diff --git a/playground/table_games.py b/playground/table_games.py index d9acba1..87b8434 100644 --- a/playground/table_games.py +++ b/playground/table_games.py @@ -14,7 +14,8 @@ from storyline.domain.chart_image_to_config import image_to_config from storyline.domain.content.chart_block import ChartBlock -from storyline.slides.ingest_presentation import name_slides, delete_alt_titles, ingest_presentation +from storyline.slides.ingest_presentation import ingest_presentation +from gslides_api.adapters.add_names import delete_alt_titles, name_slides from storyline.slides.slide_deck import SlideDeck logger = logging.getLogger(__name__) diff --git a/tests/mcp_tests/test_add_element_names.py b/tests/mcp_tests/test_add_element_names.py new file mode 100644 index 0000000..c350434 --- /dev/null +++ b/tests/mcp_tests/test_add_element_names.py @@ -0,0 +1,132 @@ +"""Tests for the add_element_names MCP tool.""" + +import json +from unittest.mock import Mock, patch + +import pytest + +from gslides_api.adapters.add_names import SlideElementNames +from gslides_api.mcp.server import add_element_names + + +class TestAddElementNames: + """Tests for the add_element_names tool.""" + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_successful_call( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test successful call returns SuccessResponse with slide/element names.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_client = Mock() + mock_gslides_class.return_value = mock_gslides_client + + mock_name_slides.return_value = { + "Intro": SlideElementNames( + image_names=["Image_1"], + text_names=["Title", "Text_1"], + chart_names=["Chart_1"], + table_names=[], + ), + "Summary": SlideElementNames( + image_names=[], + text_names=["Title"], + chart_names=[], + table_names=["Table_1"], + ), + } + + result = json.loads(add_element_names("pres_123")) + + assert result["success"] is True + assert "Successfully named 2 slides" in result["message"] + details = result["details"]["slide_element_names"] + assert details["Intro"]["text_names"] == ["Title", "Text_1"] + assert details["Intro"]["image_names"] == ["Image_1"] + assert details["Intro"]["chart_names"] == ["Chart_1"] + assert details["Intro"]["table_names"] == [] + assert details["Summary"]["table_names"] == ["Table_1"] + + mock_name_slides.assert_called_once_with( + "pres_123", + name_elements=True, + api_client=mock_gslides_client, + skip_empty_text_boxes=False, + min_image_size_cm=4.0, + ) + mock_client.flush_batch_update.assert_called_once() + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_with_custom_parameters( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test that custom parameters are passed through.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_client = Mock() + mock_gslides_class.return_value = mock_gslides_client + mock_name_slides.return_value = {} + + result = json.loads(add_element_names( + "pres_123", + skip_empty_text_boxes=True, + min_image_size_cm=2.0, + )) + + assert result["success"] is True + mock_name_slides.assert_called_once_with( + "pres_123", + name_elements=True, + api_client=mock_gslides_client, + skip_empty_text_boxes=True, + min_image_size_cm=2.0, + ) + + def test_invalid_presentation_url(self): + """Test that invalid presentation URL returns validation error.""" + result = json.loads(add_element_names("https://example.com/bad-url")) + + assert result["error"] is True + assert result["error_type"] == "ValidationError" + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_exception_handling( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test that exceptions are caught and returned as error responses.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_class.return_value = Mock() + mock_name_slides.side_effect = RuntimeError("API connection failed") + + result = json.loads(add_element_names("pres_123")) + + assert result["error"] is True + assert "API connection failed" in result["message"] + + @patch("gslides_api.mcp.server.name_slides") + @patch("gslides_api.mcp.server.GSlidesAPIClient") + @patch("gslides_api.mcp.server.get_api_client") + def test_google_slides_url_parsed( + self, mock_get_client, mock_gslides_class, mock_name_slides, + ): + """Test that a full Google Slides URL is parsed to extract the presentation ID.""" + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_gslides_class.return_value = Mock() + mock_name_slides.return_value = {} + + url = "https://docs.google.com/presentation/d/abc123xyz/edit" + result = json.loads(add_element_names(url)) + + assert result["success"] is True + mock_name_slides.assert_called_once() + call_args = mock_name_slides.call_args + assert call_args[0][0] == "abc123xyz" diff --git a/tests/mcp_tests/test_models.py b/tests/mcp_tests/test_models.py index 47794c1..de4ad8c 100644 --- a/tests/mcp_tests/test_models.py +++ b/tests/mcp_tests/test_models.py @@ -3,11 +3,8 @@ import pytest from gslides_api.mcp.models import ( - ElementOutline, ErrorResponse, OutputFormat, - PresentationOutline, - SlideOutline, SuccessResponse, ThumbnailSizeOption, ) @@ -26,16 +23,21 @@ def test_domain_format(self): assert OutputFormat.DOMAIN.value == "domain" assert OutputFormat("domain") == OutputFormat.DOMAIN - def test_outline_format(self): - """Test OUTLINE format value.""" - assert OutputFormat.OUTLINE.value == "outline" - assert OutputFormat("outline") == OutputFormat.OUTLINE + def test_markdown_format(self): + """Test MARKDOWN format value.""" + assert OutputFormat.MARKDOWN.value == "markdown" + assert OutputFormat("markdown") == OutputFormat.MARKDOWN def test_invalid_format(self): """Test that invalid format raises ValueError.""" with pytest.raises(ValueError): OutputFormat("invalid") + def test_outline_format_removed(self): + """Test that OUTLINE format no longer exists.""" + with pytest.raises(ValueError): + OutputFormat("outline") + class TestThumbnailSizeOption: """Tests for ThumbnailSizeOption enum.""" @@ -109,118 +111,3 @@ def test_success_response_with_details(self): details={"new_slide_id": "abc123", "position": 5}, ) assert response.details == {"new_slide_id": "abc123", "position": 5} - - -class TestElementOutline: - """Tests for ElementOutline model.""" - - def test_minimal_element_outline(self): - """Test creating a minimal element outline.""" - outline = ElementOutline( - element_id="elem123", - type="shape", - ) - assert outline.element_id == "elem123" - assert outline.type == "shape" - assert outline.element_name is None - assert outline.alt_description is None - assert outline.content_markdown is None - - def test_full_element_outline(self): - """Test creating a full element outline.""" - outline = ElementOutline( - element_name="Title", - element_id="elem123", - type="shape", - alt_description="Main title text box", - content_markdown="# Welcome", - ) - assert outline.element_name == "Title" - assert outline.element_id == "elem123" - assert outline.type == "shape" - assert outline.alt_description == "Main title text box" - assert outline.content_markdown == "# Welcome" - - -class TestSlideOutline: - """Tests for SlideOutline model.""" - - def test_minimal_slide_outline(self): - """Test creating a minimal slide outline.""" - outline = SlideOutline( - slide_id="slide123", - ) - assert outline.slide_id == "slide123" - assert outline.slide_name is None - assert outline.elements == [] - - def test_slide_outline_with_elements(self): - """Test creating a slide outline with elements.""" - elements = [ - ElementOutline(element_id="e1", type="shape"), - ElementOutline(element_id="e2", type="image"), - ] - outline = SlideOutline( - slide_name="Introduction", - slide_id="slide123", - elements=elements, - ) - assert outline.slide_name == "Introduction" - assert len(outline.elements) == 2 - assert outline.elements[0].element_id == "e1" - - -class TestPresentationOutline: - """Tests for PresentationOutline model.""" - - def test_minimal_presentation_outline(self): - """Test creating a minimal presentation outline.""" - outline = PresentationOutline( - presentation_id="pres123", - title="My Presentation", - ) - assert outline.presentation_id == "pres123" - assert outline.title == "My Presentation" - assert outline.slides == [] - - def test_presentation_outline_with_slides(self): - """Test creating a presentation outline with slides.""" - slides = [ - SlideOutline(slide_id="s1", slide_name="Cover"), - SlideOutline(slide_id="s2", slide_name="Content"), - ] - outline = PresentationOutline( - presentation_id="pres123", - title="My Presentation", - slides=slides, - ) - assert len(outline.slides) == 2 - assert outline.slides[0].slide_name == "Cover" - assert outline.slides[1].slide_name == "Content" - - def test_presentation_outline_model_dump(self): - """Test that presentation outline can be serialized.""" - outline = PresentationOutline( - presentation_id="pres123", - title="My Presentation", - slides=[ - SlideOutline( - slide_id="s1", - slide_name="Cover", - elements=[ - ElementOutline( - element_name="Title", - element_id="e1", - type="shape", - content_markdown="# Welcome", - ) - ], - ) - ], - ) - data = outline.model_dump() - assert data["presentation_id"] == "pres123" - assert data["title"] == "My Presentation" - assert len(data["slides"]) == 1 - assert data["slides"][0]["slide_name"] == "Cover" - assert data["slides"][0]["elements"][0]["element_name"] == "Title" diff --git a/tests/test_adapters/test_abstract_slide_markdown.py b/tests/test_adapters/test_abstract_slide_markdown.py index 754ff0c..34ee88c 100644 --- a/tests/test_adapters/test_abstract_slide_markdown.py +++ b/tests/test_adapters/test_abstract_slide_markdown.py @@ -21,6 +21,7 @@ def _make_shape_element( object_id="shape1", title=None, + description=None, text="Hello World", x=0.5, y=0.3, @@ -31,7 +32,7 @@ def _make_shape_element( """Create a mock AbstractShapeElement.""" elem = MagicMock(spec=AbstractShapeElement) elem.objectId = object_id - elem.alt_text = AbstractAltText(title=title) + elem.alt_text = AbstractAltText(title=title, description=description) elem.type = "SHAPE" type(elem).has_text = PropertyMock(return_value=True) elem.read_text.return_value = text @@ -49,6 +50,7 @@ def _make_shape_element( def _make_image_element( object_id="img1", title="Chart", + description=None, x=1.0, y=3.0, w=8.0, @@ -57,7 +59,7 @@ def _make_image_element( """Create a mock AbstractImageElement.""" elem = MagicMock(spec=AbstractImageElement) elem.objectId = object_id - elem.alt_text = AbstractAltText(title=title) + elem.alt_text = AbstractAltText(title=title, description=description) elem.type = "IMAGE" elem.absolute_position.return_value = (x, y) elem.absolute_size.return_value = (w, h) @@ -67,6 +69,7 @@ def _make_image_element( def _make_table_element( object_id="table1", title="Data", + description=None, x=0.5, y=7.5, w=9.0, @@ -85,7 +88,7 @@ def _make_table_element( elem = MagicMock(spec=AbstractTableElement) elem.objectId = object_id - elem.alt_text = AbstractAltText(title=title) + elem.alt_text = AbstractAltText(title=title, description=description) elem.type = "TABLE" elem.absolute_position.return_value = (x, y) elem.absolute_size.return_value = (w, h) @@ -232,12 +235,12 @@ def test_mixed_elements(self): assert "image: Chart" in parts[1] assert "table: Data" in parts[2] - def test_element_uses_object_id_when_no_title(self): + def test_unnamed_element_is_skipped(self): shape = _make_shape_element(object_id="abc123", title=None) slide = _make_slide([shape]) md = slide.markdown() - assert "text: abc123 |" in md + assert md == "" def test_empty_slide(self): slide = _make_slide([]) @@ -315,3 +318,48 @@ def test_shape_without_text(self): # Should fall through to the generic case since has_text is False assert "<!-- SHAPE: EmptyBox |" in md assert "text:" not in md + + def test_unnamed_elements_skipped_named_kept(self): + """Unnamed elements should be skipped, named ones kept.""" + named = _make_shape_element(title="Title", text="Hello") + unnamed = _make_shape_element(object_id="no_name", title=None, text="Hidden") + slide = _make_slide([named, unnamed]) + md = slide.markdown() + + assert "text: Title" in md + assert "no_name" not in md + assert "Hidden" not in md + + def test_alt_description_in_text_comment(self): + """Alt description should appear in text element comment.""" + shape = _make_shape_element( + title="Title", description="Main heading", text="Hello" + ) + slide = _make_slide([shape]) + md = slide.markdown() + + assert 'desc="Main heading"' in md + + def test_alt_description_in_image_comment(self): + """Alt description should appear in image element comment.""" + img = _make_image_element(title="Chart", description="Revenue chart") + slide = _make_slide([img]) + md = slide.markdown() + + assert 'desc="Revenue chart"' in md + + def test_alt_description_in_table_comment(self): + """Alt description should appear in table element comment.""" + table = _make_table_element(title="Data", description="Quarterly data") + slide = _make_slide([table]) + md = slide.markdown() + + assert 'desc="Quarterly data"' in md + + def test_no_description_no_desc_field(self): + """When no description, desc field should not appear.""" + shape = _make_shape_element(title="Title", description=None, text="Hello") + slide = _make_slide([shape]) + md = slide.markdown() + + assert "desc=" not in md