Accept a sequence of strings in extract_summary() (#14014)

AA-Turner · web-flow · commit 48f67398389b · 2025-11-02T21:11:40.000Z
diff --git a/sphinx/ext/autosummary/__init__.py b/sphinx/ext/autosummary/__init__.py
@@ -301,6 +301,7 @@ def get_items(self, names: list[str]) -> list[tuple[str, str | None, str, str]]:
             )
             raise ValueError(msg)
 
+        document_settings = self.state.document.settings
         env = self.env
         config = env.config
         current_document = env.current_document
@@ -395,7 +396,7 @@ def get_items(self, names: list[str]) -> list[tuple[str, str | None, str, str]]:
                 props=props,
                 options=opts,
             )
-            summary = extract_summary(list(docstring_lines), self.state.document)
+            summary = extract_summary(list(docstring_lines), document_settings)
 
             items.append((display_name, sig, summary, real_name))
 
@@ -537,51 +538,47 @@ def mangle_signature(sig: str, max_chars: int = 30) -> str:
     return '(%s)' % sig
 
 
-def extract_summary(doc: list[str], document: Any) -> str:
+def extract_summary(doc: Sequence[str], settings: Any) -> str:
     """Extract summary from docstring."""
+    # Find the first stanza (heading, sentence, paragraph, etc.).
+    # If there's a blank line, then we can assume that the stanza has ended,
+    # so anything after shouldn't be part of the summary.
+    first_stanza = []
+    content_started = False
+    for line in doc:
+        is_blank_line = not line or line.isspace()
+        if not content_started:
+            # Skip any blank lines at the start
+            if is_blank_line:
+                continue
+            content_started = True
+        if content_started:
+            if is_blank_line:
+                break
+            first_stanza.append(line)
 
-    def parse(doc: list[str], settings: Any) -> nodes.document:
-        state_machine = RSTStateMachine(state_classes, 'Body')
-        node = new_document('', settings)
-        node.reporter = NullReporter()
-        state_machine.run(doc, node)
-
-        return node
-
-    # Skip a blank lines at the top
-    while doc and not doc[0].strip():
-        doc.pop(0)
-
-    # If there's a blank line, then we can assume the first sentence /
-    # paragraph has ended, so anything after shouldn't be part of the
-    # summary
-    for i, piece in enumerate(doc):
-        if not piece.strip():
-            doc = doc[:i]
-            break
-
-    if doc == []:
+    if not first_stanza:
         return ''
 
     # parse the docstring
-    node = parse(doc, document.settings)
+    node = _parse_summary(first_stanza, settings)
     if isinstance(node[0], nodes.section):
         # document starts with a section heading, so use that.
         summary = node[0].astext().strip()
     elif not isinstance(node[0], nodes.paragraph):
         # document starts with non-paragraph: pick up the first line
-        summary = doc[0].strip()
+        summary = first_stanza[0].strip()
     else:
         # Try to find the "first sentence", which may span multiple lines
-        sentences = periods_re.split(' '.join(doc))
+        sentences = periods_re.split(' '.join(first_stanza))
         if len(sentences) == 1:
             summary = sentences[0].strip()
         else:
             summary = ''
             for i in range(len(sentences)):
                 summary = '. '.join(sentences[: i + 1]).rstrip('.') + '.'
                 node[:] = []
-                node = parse(doc, document.settings)
+                node = _parse_summary(first_stanza, settings)
                 if summary.endswith(WELL_KNOWN_ABBREVIATIONS):
                     pass
                 elif not any(node.findall(nodes.system_message)):
@@ -594,6 +591,15 @@ def parse(doc: list[str], settings: Any) -> nodes.document:
     return summary
 
 
+def _parse_summary(doc: Sequence[str], settings: Any) -> nodes.document:
+    state_machine = RSTStateMachine(state_classes, 'Body')
+    node = new_document('', settings)
+    node.reporter = NullReporter()
+    state_machine.run(doc, node)
+
+    return node
+
+
 def limited_join(
     sep: str, items: list[str], max_chars: int = 30, overflow_marker: str = '...'
 ) -> str:
diff --git a/tests/test_ext_autosummary/test_ext_autosummary.py b/tests/test_ext_autosummary/test_ext_autosummary.py
@@ -26,7 +26,6 @@
 )
 from sphinx.ext.autosummary.generate import main as autogen_main
 from sphinx.testing.util import assert_node, etree_parse
-from sphinx.util.docutils import new_document
 
 if TYPE_CHECKING:
     from xml.etree.ElementTree import Element
@@ -86,7 +85,6 @@ def test_extract_summary(capsys):
         pep_reference=False,
         rfc_reference=False,
     )
-    document = new_document('', settings)
 
     # normal case
     doc = [
@@ -95,52 +93,52 @@ def test_extract_summary(capsys):
         '',
         'Second block is here',
     ]
-    assert extract_summary(doc, document) == 'This is a first sentence.'
+    assert extract_summary(doc, settings) == 'This is a first sentence.'
 
     # inliner case
     doc = [
         'This sentence contains *emphasis text having dots.*,',
         'it does not break sentence.',
     ]
-    assert extract_summary(doc, document) == ' '.join(doc)
+    assert extract_summary(doc, settings) == ' '.join(doc)
 
     # abbreviations
     doc = ['Blabla, i.e. bla.']
-    assert extract_summary(doc, document) == ' '.join(doc)
+    assert extract_summary(doc, settings) == ' '.join(doc)
 
     doc = ['Blabla, (i.e. bla).']
-    assert extract_summary(doc, document) == ' '.join(doc)
+    assert extract_summary(doc, settings) == ' '.join(doc)
 
     doc = ['Blabla, e.g. bla.']
-    assert extract_summary(doc, document) == ' '.join(doc)
+    assert extract_summary(doc, settings) == ' '.join(doc)
 
     doc = ['Blabla, (e.g. bla).']
-    assert extract_summary(doc, document) == ' '.join(doc)
+    assert extract_summary(doc, settings) == ' '.join(doc)
 
     doc = ['Blabla, et al. bla.']
-    assert extract_summary(doc, document) == ' '.join(doc)
+    assert extract_summary(doc, settings) == ' '.join(doc)
 
     # literal
     doc = ['blah blah::']
-    assert extract_summary(doc, document) == 'blah blah.'
+    assert extract_summary(doc, settings) == 'blah blah.'
 
     # heading
     doc = [
         'blah blah',
         '=========',
     ]
-    assert extract_summary(doc, document) == 'blah blah'
+    assert extract_summary(doc, settings) == 'blah blah'
 
     doc = [
         '=========',
         'blah blah',
         '=========',
     ]
-    assert extract_summary(doc, document) == 'blah blah'
+    assert extract_summary(doc, settings) == 'blah blah'
 
     # hyperlink target
     doc = ['Do `this <https://www.sphinx-doc.org/>`_ and that. blah blah blah.']
-    extracted = extract_summary(doc, document)
+    extracted = extract_summary(doc, settings)
     assert extracted == 'Do `this <https://www.sphinx-doc.org/>`_ and that.'
 
     _, err = capsys.readouterr()