From dd1250446490241f26d2cd79279c0edc2e75504b Mon Sep 17 00:00:00 2001
From: Dmitry Verkhoturov <paskal.07@gmail.com>
Date: Sat, 22 Mar 2025 22:07:03 +0100
Subject: [PATCH] Refactor UReadability methods to use pointer receiver

Needed for OpenAI support later on
---
 backend/extractor/pics.go        |  4 ++--
 backend/extractor/readability.go | 10 +++++-----
 backend/extractor/text.go        |  6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/backend/extractor/pics.go b/backend/extractor/pics.go
index abe583aa..7117dc7f 100644
--- a/backend/extractor/pics.go
+++ b/backend/extractor/pics.go
@@ -11,7 +11,7 @@ import (
 	log "github.com/go-pkgz/lgr"
 )
 
-func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {
+func (f *UReadability) extractPics(iselect *goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {
 	images := make(map[int]string)
 
 	type imgInfo struct {
@@ -58,7 +58,7 @@ func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainI
 }
 
 // getImageSize loads image to get size
-func (f UReadability) getImageSize(url string) (size int) {
+func (f *UReadability) getImageSize(url string) (size int) {
 	httpClient := &http.Client{Timeout: time.Second * 30}
 	req, err := http.NewRequest("GET", url, nil)
 	if err != nil {
diff --git a/backend/extractor/readability.go b/backend/extractor/readability.go
index 89afb8b0..d837f484 100644
--- a/backend/extractor/readability.go
+++ b/backend/extractor/readability.go
@@ -59,17 +59,17 @@ var (
 const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15"
 
 // Extract fetches page and retrieves article
-func (f UReadability) Extract(ctx context.Context, reqURL string) (*Response, error) {
+func (f *UReadability) Extract(ctx context.Context, reqURL string) (*Response, error) {
 	return f.extractWithRules(ctx, reqURL, nil)
 }
 
 // ExtractByRule fetches page and retrieves article using a specific rule
-func (f UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
+func (f *UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
 	return f.extractWithRules(ctx, reqURL, rule)
 }
 
 // ExtractWithRules is the core function that handles extraction with or without a specific rule
-func (f UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
+func (f *UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
 	log.Printf("[INFO] extract %s", reqURL)
 	rb := &Response{}
 
@@ -140,7 +140,7 @@ func (f UReadability) extractWithRules(ctx context.Context, reqURL string, rule
 // getContent retrieves content from raw body string, both content (text only) and rich (with html tags)
 // if rule is provided, it uses custom rule, otherwise tries to retrieve one from the storage,
 // and at last tries to use general readability parser
-func (f UReadability) getContent(ctx context.Context, body, reqURL string, rule *datastore.Rule) (content, rich string, err error) {
+func (f *UReadability) getContent(ctx context.Context, body, reqURL string, rule *datastore.Rule) (content, rich string, err error) {
 	// general parser
 	genParser := func(body, _ string) (content, rich string, err error) {
 		doc, err := readability.NewDocument(body)
@@ -192,7 +192,7 @@ func (f UReadability) getContent(ctx context.Context, body, reqURL string, rule
 }
 
 // makes all links absolute and returns all found links
-func (f UReadability) normalizeLinks(data string, reqContext *http.Request) (result string, links []string) {
+func (f *UReadability) normalizeLinks(data string, reqContext *http.Request) (result string, links []string) {
 	absoluteLink := func(link string) (absLink string, changed bool) {
 		if r, err := reqContext.URL.Parse(link); err == nil {
 			return r.String(), r.String() != link
diff --git a/backend/extractor/text.go b/backend/extractor/text.go
index 0f8c2dfe..ba21cefe 100644
--- a/backend/extractor/text.go
+++ b/backend/extractor/text.go
@@ -12,7 +12,7 @@ import (
 )
 
 // get clean text from html content
-func (f UReadability) getText(content, title string) string {
+func (f *UReadability) getText(content, title string) string {
 	cleanText := sanitize.HTML(content)
 	cleanText = strings.Replace(cleanText, title, "", 1) // get rid of title in snippet
 	cleanText = strings.ReplaceAll(cleanText, "\t", " ")
@@ -32,7 +32,7 @@ func (f UReadability) getText(content, title string) string {
 }
 
 // get snippet from clean text content
-func (f UReadability) getSnippet(cleanText string) string {
+func (f *UReadability) getSnippet(cleanText string) string {
 	cleanText = strings.ReplaceAll(cleanText, "\n", " ")
 	size := len([]rune(cleanText))
 	if size > f.SnippetSize {
@@ -50,7 +50,7 @@ func (f UReadability) getSnippet(cleanText string) string {
 }
 
 // detect encoding, content type and convert content to utf8
-func (f UReadability) toUtf8(content []byte, header http.Header) (contentType, origEncoding, result string) {
+func (f *UReadability) toUtf8(content []byte, header http.Header) (contentType, origEncoding, result string) {
 	getContentTypeAndEncoding := func(str string) (contentType, encoding string) { // from "text/html; charset=windows-1251"
 		elems := strings.Split(str, ";")
 		contentType = strings.TrimSpace(elems[0])