diff --git a/classes/autotitle.php b/classes/autotitle.php index 0cb64bd..f4e4ad9 100644 --- a/classes/autotitle.php +++ b/classes/autotitle.php @@ -29,9 +29,10 @@ class autotitle { * Extract suitable title from the HTML section summary text * * @param string $summary + * @param array $errors (passed by reference) * @return string */ - public static function extract_title(string $summary): string { + public static function extract_title(string $summary, &$errors = []): string { if ($summary === '') { return ''; @@ -40,6 +41,7 @@ public static function extract_title(string $summary): string { $dom = new \DOMDocument(); libxml_use_internal_errors(true); $dom->loadHTML('' . $summary); + $errors = libxml_get_errors(); libxml_clear_errors(); return static::find_first_nonempty_text_node_value($dom); @@ -57,6 +59,7 @@ public static function extract_title(string $summary): string { public static function find_first_nonempty_text_node_value(\DOMNode $node): string { if ($node->nodeType == XML_TEXT_NODE) { + // We use preg_replace() instead of trim() to remove non-breaking spaces. $text = (string) preg_replace('/^\s+|\s+$/u', '', $node->textContent); if ($text !== '') { diff --git a/tests/block_course_contents_test.php b/tests/autotitle_test.php similarity index 71% rename from tests/block_course_contents_test.php rename to tests/autotitle_test.php index cf96bbc..5149459 100644 --- a/tests/block_course_contents_test.php +++ b/tests/autotitle_test.php @@ -33,7 +33,10 @@ class autotitle_test extends \advanced_testcase { * @param string $title */ public function test_extract_title(string $summary, string $title) { - $this->assertEquals($title, autotitle::extract_title($summary)); + $errors = []; + $result = autotitle::extract_title($summary, $errors); + $this->assertEmpty($errors); + $this->assertEquals($title, $result); } /** @@ -47,10 +50,6 @@ public function extract_title_data(): array { 'summary' => 'Welcome to this course!', 'title' => 'Welcome to this course!', ], - 'Invalid HTML' => [ - 'summary' => 'Hello<

', - 'title' => 'Hello', - ], 'Heading' => [ 'summary' => '

Welcome!

In this course, you will learn a lot.

', 'title' => 'Welcome!', @@ -90,4 +89,37 @@ public function extract_title_data(): array { ], ]; } + + /** + * Test extracting invalid title from the summary HTML text. + * The tested method uses libxml and the output can vary between versions. + * This also applies to errors, so we cannot reliably test for them. + * Note that despite the invalid HTML, the method will still return a useful title. + * + * @dataProvider extract_invalid_title_data + * @param string $summary + * @param array $potentialtitles + */ + public function test_extract_invalid_title(string $summary, array $potentialtitles) { + $result = autotitle::extract_title($summary); + $this->assertContains($result, $potentialtitles); + } + + /** + * Provides data for {@see self::test_extract_invalid_title()}. + * + * @return array + */ + public function extract_invalid_title_data(): array { + return [ + 'Invalid HTML4 Test 1' => [ + 'summary' => 'Hello<

', + 'potentialtitles' => ['Hello', 'Hello<'], + ], + 'Invalid HTML4 Test 2' => [ + 'summary' => '
<Text
', + 'potentialtitles' => ['Text', '<'], + ], + ]; + } }