diff --git a/goodreads/book.go b/goodreads/book.go index 2e82830..e52a35d 100644 --- a/goodreads/book.go +++ b/goodreads/book.go @@ -141,7 +141,9 @@ func (e *Edition) Sanitise() { // Break tags need to be specially handled to add new lines as html2text does // not convert them to new lines properly e.Description = descriptionAlternativeCoverRegex.ReplaceAllString(e.Description, "") - e.Description = breakTagRegex.ReplaceAllString(e.Description, "\n") + // HACK: html2text only handles
and
, goodreads uses
+ // replace unsupported br tag with a supported one + e.Description = breakTagRegex.ReplaceAllString(e.Description, "
") e.Description = html2text.HTML2TextWithOptions(e.Description, html2text.WithUnixLineBreaks()) e.Description = strings.TrimSpace(e.Description) diff --git a/goodreads/book_test.go b/goodreads/book_test.go index aed5d4d..a57a466 100644 --- a/goodreads/book_test.go +++ b/goodreads/book_test.go @@ -37,3 +37,40 @@ func TestUnmarshalGenres(t *testing.T) { expectedGenres := goodreads.Genres{"Fantasy", "Classic", "Fiction"} require.Equal(t, expectedGenres, genres) } + +func TestBookUnmarshalBrTagReplacement(t *testing.T) { + testXML := ` + + + 123 + Test Book + 2. line
3. line
4. line]]>
+ + Test Book + 100 + 10 + + + + +
+
+ ` + + var response struct { + Book goodreads.Book `xml:"book"` + } + + err := xml.Unmarshal([]byte(testXML), &response) + require.NoError(t, err) + + description := response.Book.BestEdition.Description + t.Logf("Description after processing: %q", description) + + // Verify that
tags have been correctly converted to newlines + require.Contains(t, description, "Test description\n2. line\n3. line\n4. line") + + // Verify that no HTML br tags remain + require.NotContains(t, description, "") +}