mudrd8mz · Fragonite · Oct 17, 2023 · Oct 17, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/classes/autotitle.php b/classes/autotitle.php
@@ -29,9 +29,10 @@ class autotitle {
      * Extract suitable title from the HTML section summary text
      *
      * @param string $summary
+     * @param array $errors (passed by reference)
      * @return string
      */
-    public static function extract_title(string $summary): string {
+    public static function extract_title(string $summary, &$errors = []): string {
 
         if ($summary === '') {
             return '';
@@ -40,6 +41,7 @@ public static function extract_title(string $summary): string {
         $dom = new \DOMDocument();
         libxml_use_internal_errors(true);
         $dom->loadHTML('<?xml encoding="utf-8" ?>' . $summary);
+        $errors = libxml_get_errors();
         libxml_clear_errors();
 
         return static::find_first_nonempty_text_node_value($dom);
@@ -57,6 +59,7 @@ public static function extract_title(string $summary): string {
     public static function find_first_nonempty_text_node_value(\DOMNode $node): string {
 
         if ($node->nodeType == XML_TEXT_NODE) {
+            // We use preg_replace() instead of trim() to remove non-breaking spaces.
             $text = (string) preg_replace('/^\s+|\s+$/u', '', $node->textContent);
 
             if ($text !== '') {

diff --git a/tests/block_course_contents_test.php → tests/autotitle_test.php b/tests/block_course_contents_test.php → tests/autotitle_test.php
@@ -33,7 +33,10 @@ class autotitle_test extends \advanced_testcase {
      * @param string $title
      */
     public function test_extract_title(string $summary, string $title) {
-        $this->assertEquals($title, autotitle::extract_title($summary));
+        $errors = [];
+        $result = autotitle::extract_title($summary, $errors);
+        $this->assertEmpty($errors);
+        $this->assertEquals($title, $result);
     }
 
     /**
@@ -47,10 +50,6 @@ public function extract_title_data(): array {
                 'summary' => 'Welcome to this course!',
                 'title' => 'Welcome to this course!',
             ],
-            'Invalid HTML' => [
-                'summary' => '</span>Hello<<h1>',
-                'title' => 'Hello',
-            ],
             'Heading' => [
                 'summary' => '<h3>Welcome!</h3><p>In this course, you will learn a lot.</p>',
                 'title' => 'Welcome!',
@@ -90,4 +89,37 @@ public function extract_title_data(): array {
             ],
         ];
     }
+
+    /**
+     * Test extracting invalid title from the summary HTML text.
+     * The tested method uses libxml and the output can vary between versions.
+     * This also applies to errors, so we cannot reliably test for them.
+     * Note that despite the invalid HTML, the method will still return a useful title.
+     * 
+     * @dataProvider extract_invalid_title_data
+     * @param string $summary
+     * @param array $potentialtitles
+     */
+    public function test_extract_invalid_title(string $summary, array $potentialtitles) {
+        $result = autotitle::extract_title($summary);
+        $this->assertContains($result, $potentialtitles);
+    }
+
+    /**
+     * Provides data for {@see self::test_extract_invalid_title()}.
+     *
+     * @return array
+     */
+    public function extract_invalid_title_data(): array {
+        return [
+            'Invalid HTML4 Test 1' => [
+                'summary' => '</span>Hello<<h1>',
+                'potentialtitles' => ['Hello', 'Hello<'],
+            ],
+            'Invalid HTML4 Test 2' => [
+                'summary' => '<div><<span>Text</div>',
+                'potentialtitles' => ['Text', '<'],
+            ],
+        ];
+    }
 }