From 36ce4af26b10d934e39ff7ca39c5cd14f2e416f8 Mon Sep 17 00:00:00 2001 From: Alexander Van der Bellen Date: Tue, 17 Oct 2023 21:24:07 +0800 Subject: [PATCH 1/4] Handle multiple libxml versions --- classes/autotitle.php | 5 ++- ...e_contents_test.php => autotitle_test.php} | 42 ++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) rename tests/{block_course_contents_test.php => autotitle_test.php} (73%) diff --git a/classes/autotitle.php b/classes/autotitle.php index 0cb64bd..f4e4ad9 100644 --- a/classes/autotitle.php +++ b/classes/autotitle.php @@ -29,9 +29,10 @@ class autotitle { * Extract suitable title from the HTML section summary text * * @param string $summary + * @param array $errors (passed by reference) * @return string */ - public static function extract_title(string $summary): string { + public static function extract_title(string $summary, &$errors = []): string { if ($summary === '') { return ''; @@ -40,6 +41,7 @@ public static function extract_title(string $summary): string { $dom = new \DOMDocument(); libxml_use_internal_errors(true); $dom->loadHTML('' . $summary); + $errors = libxml_get_errors(); libxml_clear_errors(); return static::find_first_nonempty_text_node_value($dom); @@ -57,6 +59,7 @@ public static function extract_title(string $summary): string { public static function find_first_nonempty_text_node_value(\DOMNode $node): string { if ($node->nodeType == XML_TEXT_NODE) { + // We use preg_replace() instead of trim() to remove non-breaking spaces. $text = (string) preg_replace('/^\s+|\s+$/u', '', $node->textContent); if ($text !== '') { diff --git a/tests/block_course_contents_test.php b/tests/autotitle_test.php similarity index 73% rename from tests/block_course_contents_test.php rename to tests/autotitle_test.php index cf96bbc..6348683 100644 --- a/tests/block_course_contents_test.php +++ b/tests/autotitle_test.php @@ -33,7 +33,10 @@ class autotitle_test extends \advanced_testcase { * @param string $title */ public function test_extract_title(string $summary, string $title) { - $this->assertEquals($title, autotitle::extract_title($summary)); + $errors = []; + $result = autotitle::extract_title($summary, $errors); + $this->assertEmpty($errors); + $this->assertEquals($title, $result); } /** @@ -47,10 +50,6 @@ public function extract_title_data(): array { 'summary' => 'Welcome to this course!', 'title' => 'Welcome to this course!', ], - 'Invalid HTML' => [ - 'summary' => 'Hello<

', - 'title' => 'Hello', - ], 'Heading' => [ 'summary' => '

Welcome!

In this course, you will learn a lot.

', 'title' => 'Welcome!', @@ -90,4 +89,37 @@ public function extract_title_data(): array { ], ]; } + + /** + * Test extracting invalid title from the summary HTML text. + * The tested method uses libxml and the output can vary between versions. + * + * @dataProvider extract_invalid_title_data + * @param string $summary + * @param array $potentialtitles + */ + public function test_extract_invalid_title(string $summary, array $potentialtitles) { + $errors = []; + $result = autotitle::extract_title($summary, $errors); + $this->assertNotEmpty($errors); + $this->assertContains($result, $potentialtitles); + } + + /** + * Provides data for {@see self::test_extract_invalid_title()}. + * + * @return array + */ + public function extract_invalid_title_data(): array { + return [ + 'Invalid HTML4 Test 1' => [ + 'summary' => 'Hello<

', + 'potentialtitles' => ['Hello', 'Hello<'], + ], + 'Invalid HTML4 Test 2' => [ + 'summary' => '
<Text
', + 'potentialtitles' => ['Text', ' Date: Tue, 17 Oct 2023 21:45:17 +0800 Subject: [PATCH 2/4] Update unit test description --- tests/autotitle_test.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/autotitle_test.php b/tests/autotitle_test.php index 6348683..7403244 100644 --- a/tests/autotitle_test.php +++ b/tests/autotitle_test.php @@ -93,6 +93,7 @@ public function extract_title_data(): array { /** * Test extracting invalid title from the summary HTML text. * The tested method uses libxml and the output can vary between versions. + * Note that despite the invalid HTML, the method will still return a useful title. * * @dataProvider extract_invalid_title_data * @param string $summary From 4f2e405d4c338c565d99757a19a57096c37103dc Mon Sep 17 00:00:00 2001 From: Alexander Van der Bellen Date: Wed, 18 Oct 2023 09:58:00 +0800 Subject: [PATCH 3/4] Update invalid HTML unit test --- tests/autotitle_test.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/autotitle_test.php b/tests/autotitle_test.php index 7403244..0552e32 100644 --- a/tests/autotitle_test.php +++ b/tests/autotitle_test.php @@ -93,6 +93,7 @@ public function extract_title_data(): array { /** * Test extracting invalid title from the summary HTML text. * The tested method uses libxml and the output can vary between versions. + * This also applies to errors, so we cannot reliably test for them. * Note that despite the invalid HTML, the method will still return a useful title. * * @dataProvider extract_invalid_title_data @@ -100,9 +101,7 @@ public function extract_title_data(): array { * @param array $potentialtitles */ public function test_extract_invalid_title(string $summary, array $potentialtitles) { - $errors = []; - $result = autotitle::extract_title($summary, $errors); - $this->assertNotEmpty($errors); + $result = autotitle::extract_title($summary); $this->assertContains($result, $potentialtitles); } From 6b6e043bc2cbc8ca02f569de70d722bc3ce85d0b Mon Sep 17 00:00:00 2001 From: Alexander Van der Bellen Date: Wed, 18 Oct 2023 10:09:01 +0800 Subject: [PATCH 4/4] Update invalid HTML unit test (potential title) --- tests/autotitle_test.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/autotitle_test.php b/tests/autotitle_test.php index 0552e32..5149459 100644 --- a/tests/autotitle_test.php +++ b/tests/autotitle_test.php @@ -118,7 +118,7 @@ public function extract_invalid_title_data(): array { ], 'Invalid HTML4 Test 2' => [ 'summary' => '
<Text
', - 'potentialtitles' => ['Text', ' ['Text', '<'], ], ]; }