Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion goodreads/book.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ func (e *Edition) Sanitise() {
// Break tags need to be specially handled to add new lines as html2text does
// not convert them to new lines properly
e.Description = descriptionAlternativeCoverRegex.ReplaceAllString(e.Description, "")
e.Description = breakTagRegex.ReplaceAllString(e.Description, "\n")
// HACK: html2text only handles <br> and <br/>, goodreads uses <br />
// replace unsupported br tag with a supported one
e.Description = breakTagRegex.ReplaceAllString(e.Description, "<br>")
e.Description = html2text.HTML2TextWithOptions(e.Description, html2text.WithUnixLineBreaks())
e.Description = strings.TrimSpace(e.Description)

Expand Down
37 changes: 37 additions & 0 deletions goodreads/book_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,40 @@ func TestUnmarshalGenres(t *testing.T) {
expectedGenres := goodreads.Genres{"Fantasy", "Classic", "Fiction"}
require.Equal(t, expectedGenres, genres)
}

func TestBookUnmarshalBrTagReplacement(t *testing.T) {
testXML := `
<GoodreadsResponse>
<book>
<id>123</id>
<title>Test Book</title>
<description><![CDATA[Test description<br />2. line<br/>3. line<br>4. line]]></description>
<work>
<original_title>Test Book</original_title>
<ratings_sum>100</ratings_sum>
<ratings_count>10</ratings_count>
</work>
<popular_shelves>
<shelf name="fiction" count="100"/>
</popular_shelves>
</book>
</GoodreadsResponse>
`

var response struct {
Book goodreads.Book `xml:"book"`
}

err := xml.Unmarshal([]byte(testXML), &response)
require.NoError(t, err)

description := response.Book.BestEdition.Description
t.Logf("Description after processing: %q", description)

// Verify that <br> tags have been correctly converted to newlines
require.Contains(t, description, "Test description\n2. line\n3. line\n4. line")

// Verify that no HTML br tags remain
require.NotContains(t, description, "<br")
require.NotContains(t, description, "br>")
}
Loading