Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion classes/autotitle.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ class autotitle {
* Extract suitable title from the HTML section summary text
*
* @param string $summary
* @param array $errors (passed by reference)
* @return string
*/
public static function extract_title(string $summary): string {
public static function extract_title(string $summary, &$errors = []): string {

if ($summary === '') {
return '';
Expand All @@ -40,6 +41,7 @@ public static function extract_title(string $summary): string {
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $summary);
$errors = libxml_get_errors();
libxml_clear_errors();

return static::find_first_nonempty_text_node_value($dom);
Expand All @@ -57,6 +59,7 @@ public static function extract_title(string $summary): string {
public static function find_first_nonempty_text_node_value(\DOMNode $node): string {

if ($node->nodeType == XML_TEXT_NODE) {
// We use preg_replace() instead of trim() to remove non-breaking spaces.
$text = (string) preg_replace('/^\s+|\s+$/u', '', $node->textContent);

if ($text !== '') {
Expand Down
42 changes: 37 additions & 5 deletions tests/block_course_contents_test.php → tests/autotitle_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ class autotitle_test extends \advanced_testcase {
* @param string $title
*/
public function test_extract_title(string $summary, string $title) {
$this->assertEquals($title, autotitle::extract_title($summary));
$errors = [];
$result = autotitle::extract_title($summary, $errors);
$this->assertEmpty($errors);
$this->assertEquals($title, $result);
}

/**
Expand All @@ -47,10 +50,6 @@ public function extract_title_data(): array {
'summary' => 'Welcome to this course!',
'title' => 'Welcome to this course!',
],
'Invalid HTML' => [
'summary' => '</span>Hello<<h1>',
'title' => 'Hello',
],
'Heading' => [
'summary' => '<h3>Welcome!</h3><p>In this course, you will learn a lot.</p>',
'title' => 'Welcome!',
Expand Down Expand Up @@ -90,4 +89,37 @@ public function extract_title_data(): array {
],
];
}

/**
* Test extracting invalid title from the summary HTML text.
* The tested method uses libxml and the output can vary between versions.
* This also applies to errors, so we cannot reliably test for them.
* Note that despite the invalid HTML, the method will still return a useful title.
*
* @dataProvider extract_invalid_title_data
* @param string $summary
* @param array $potentialtitles
*/
public function test_extract_invalid_title(string $summary, array $potentialtitles) {
$result = autotitle::extract_title($summary);
$this->assertContains($result, $potentialtitles);
}

/**
* Provides data for {@see self::test_extract_invalid_title()}.
*
* @return array
*/
public function extract_invalid_title_data(): array {
return [
'Invalid HTML4 Test 1' => [
'summary' => '</span>Hello<<h1>',
'potentialtitles' => ['Hello', 'Hello<'],
],
'Invalid HTML4 Test 2' => [
'summary' => '<div><<span>Text</div>',
'potentialtitles' => ['Text', '<'],
],
];
}
}