Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/extractor/pics.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (f *UReadability) extractPics(iselect *goquery.Selection, url string) (main

// getImageSize loads image to get size
func (f *UReadability) getImageSize(url string) (size int) {
httpClient := &http.Client{Timeout: time.Second * 30}
httpClient := &http.Client{Timeout: 30 * time.Second}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Printf("[WARN] can't create request to get pic from %s", url)
Expand Down
5 changes: 3 additions & 2 deletions backend/extractor/pics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"strings"
"testing"
"time"

"github.com/PuerkitoBio/goquery"
"github.com/stretchr/testify/assert"
Expand All @@ -27,7 +28,7 @@ func TestExtractPics(t *testing.T) {
defer ts.Close()

t.Log("test main pic")
lr := UReadability{TimeOut: 30, SnippetSize: 200}
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}
a, err := lr.Extract(context.Background(), ts.URL+"/2015/09/25/poiezdka-s-apple-maps/")
require.NoError(t, err)
allImages := []string{
Expand All @@ -41,7 +42,7 @@ func TestExtractPics(t *testing.T) {

func TestExtractPicsDirectly(t *testing.T) {
t.Log("test pic directly")
lr := UReadability{TimeOut: 30, SnippetSize: 200}
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}
t.Run("normal image retrieval", func(t *testing.T) {
data := `<body>
<img class="alignright size-full wp-image-944214 lazyloadableImage lazyLoad-fadeIn" alt="View Page Source" width="308" height="508" data-original="https://cdn1.tnwcdn.com/wp-content/blogs.dir/1/files/2016/01/page-source.jpg" src="https://cdn1.tnwcdn.com/wp-content/blogs.dir/1/files/2016/01/page-source.jpg"></body>`
Expand Down
2 changes: 1 addition & 1 deletion backend/extractor/readability.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (f *UReadability) extractWithRules(ctx context.Context, reqURL string, rule
log.Printf("[INFO] extract %s", reqURL)
rb := &Response{}

httpClient := &http.Client{Timeout: time.Second * f.TimeOut}
httpClient := &http.Client{Timeout: f.TimeOut}
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
if err != nil {
log.Printf("[WARN] failed to create request for %s, error=%v", reqURL, err)
Expand Down
85 changes: 58 additions & 27 deletions backend/extractor/readability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,29 +40,60 @@ func TestExtractURL(t *testing.T) {
}))
defer ts.Close()

lr := UReadability{TimeOut: 30, SnippetSize: 200}
t.Log("full url")
rb, err := lr.Extract(context.Background(), ts.URL+"/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/")
assert.NoError(t, err)
assert.Equal(t, ts.URL+"/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/", rb.URL, "not changed")
assert.Equal(t, "Всем миром для общей пользы • Umputun тут был", rb.Title)
assert.Equal(t, 9665, len(rb.Content))
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}

tests := []struct {
name string
url string
wantURL string
wantTitle string
wantContentLen int
wantErr bool
}{
{
name: "full url",
url: ts.URL + "/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/",
wantURL: ts.URL + "/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/",
wantTitle: "Всем миром для общей пользы • Umputun тут был",
wantContentLen: 9665,
wantErr: false,
},
{
name: "short url",
url: ts.URL + "/IAvTHr",
wantURL: ts.URL + "/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/",
wantTitle: "Всем миром для общей пользы • Umputun тут был",
wantContentLen: 9665,
wantErr: false,
},
{
name: "bad body",
url: ts.URL + "/bad_body",
wantErr: true,
},
{
name: "bad url",
url: "http://bad_url",
wantErr: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rb, err := lr.Extract(context.Background(), tt.url)

if tt.wantErr {
assert.Error(t, err)
assert.Nil(t, rb)
return
}

t.Log("short url")
rb, err = lr.Extract(context.Background(), ts.URL+"/IAvTHr")
assert.NoError(t, err)
assert.Equal(t, ts.URL+"/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/", rb.URL, "full url")
assert.Equal(t, 9665, len(rb.Content))

t.Log("bad body")
rb, err = lr.Extract(context.Background(), ts.URL+"/bad_body")
//assert.Error(t, err) // TODO: uncomment, wtf?! this should return error!
assert.Nil(t, rb)

t.Log("bad url")
rb, err = lr.Extract(context.Background(), "http://bad_url")
assert.Error(t, err)
assert.Nil(t, rb)
assert.NoError(t, err)
assert.Equal(t, tt.wantURL, rb.URL)
assert.Equal(t, tt.wantTitle, rb.Title)
assert.Equal(t, tt.wantContentLen, len(rb.Content))
})
}
}

func TestExtractGeneral(t *testing.T) {
Expand Down Expand Up @@ -92,7 +123,7 @@ func TestExtractGeneral(t *testing.T) {
}))
defer ts.Close()

lr := UReadability{TimeOut: 30, SnippetSize: 200}
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}
a, err := lr.Extract(context.Background(), ts.URL+"/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/")
assert.NoError(t, err)
assert.Equal(t, "Всем миром для общей пользы • Umputun тут был", a.Title)
Expand All @@ -114,7 +145,7 @@ func TestExtractGeneral(t *testing.T) {
}

func TestNormalizeLinks(t *testing.T) {
lr := UReadability{TimeOut: 30, SnippetSize: 200}
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}
inp := `blah <img src="/aaa.png"/> sdfasd <a href="/blah2/aa.link">something</a> blah33 <img src="//aaa.com/xyz.jpg">xx</img>`
u, _ := url.Parse("http://ukeeper.com/blah")
out, links := lr.normalizeLinks(inp, &http.Request{URL: u})
Expand All @@ -130,7 +161,7 @@ func TestNormalizeLinks(t *testing.T) {
}

func TestNormalizeLinksIssue(t *testing.T) {
lr := UReadability{TimeOut: 30, SnippetSize: 200}
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}
_, err := lr.Extract(context.Background(), "https://git-scm.com/book/en/v2/Git-Tools-Submodules")
assert.NoError(t, err)
}
Expand All @@ -150,8 +181,8 @@ func (m RulesMock) Disable(_ context.Context, _ primitive.ObjectID) error { retu
func (m RulesMock) All(_ context.Context) []datastore.Rule { return make([]datastore.Rule, 0) }

func TestGetContentCustom(t *testing.T) {
lr := UReadability{TimeOut: 30, SnippetSize: 200, Rules: RulesMock{}}
httpClient := &http.Client{Timeout: time.Second * 30}
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200, Rules: RulesMock{}}
httpClient := &http.Client{Timeout: 30 * time.Second}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.String() == "/2015/09/25/poiezdka-s-apple-maps/" {
fh, err := os.Open("testdata/poiezdka-s-apple-maps.html")
Expand Down
2 changes: 1 addition & 1 deletion backend/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func main() {
log.Fatalf("[ERROR] can't connect to mongo %v", err)
}
srv := rest.Server{
Readability: extractor.UReadability{TimeOut: 30, SnippetSize: 300, Rules: db.GetStores()},
Readability: extractor.UReadability{TimeOut: 30 * time.Second, SnippetSize: 300, Rules: db.GetStores()},
Token: opts.Token,
Credentials: opts.Credentials,
Version: revision,
Expand Down
4 changes: 2 additions & 2 deletions backend/rest/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func TestServer_FileServer(t *testing.T) {
require.NoError(t, err)

srv := Server{
Readability: extractor.UReadability{TimeOut: 30, SnippetSize: 300},
Readability: extractor.UReadability{TimeOut: 30 * time.Second, SnippetSize: 300},
Credentials: map[string]string{"admin": "password"},
}
ts := httptest.NewServer(srv.routes(dir))
Expand Down Expand Up @@ -613,7 +613,7 @@ func startupT(t *testing.T) (*httptest.Server, *Server) {
db, err := datastore.New("mongodb://localhost:27017/", "test_ureadability", 0)
assert.NoError(t, err)
srv := Server{
Readability: extractor.UReadability{TimeOut: 30, SnippetSize: 300, Rules: db.GetStores()},
Readability: extractor.UReadability{TimeOut: 30 * time.Second, SnippetSize: 300, Rules: db.GetStores()},
Credentials: map[string]string{"admin": "password"},
Version: "dev-test",
}
Expand Down