Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
ENABLE_MONGO_TESTS: "true"

- name: golangci-lint
uses: golangci/golangci-lint-action@v6
uses: golangci/golangci-lint-action@v7
with:
version: latest
working-directory: backend
Expand Down
68 changes: 57 additions & 11 deletions backend/.golangci.yml
Original file line number Diff line number Diff line change
@@ -1,37 +1,83 @@
version: "2"
run:
tests: false
timeout: 5m
linters:
default: none
enable:
- bodyclose
- copyloopvar
- dogsled
- dupl
- errcheck
- gochecknoinits
- gocognit
- goconst
- gocritic
- gocyclo
- gofmt
- goimports
- goprintffuncname
- gosec
- gosimple
- govet
- ineffassign
- misspell
- nakedret
- nolintlint
- prealloc
- revive
- rowserrcheck
- staticcheck
- stylecheck
- typecheck
- testifylint
- unconvert
- unparam
- unused
- whitespace
disable-all: true

issues:
exclude-dirs:
- vendor
settings:
goconst:
min-len: 2
min-occurrences: 2
revive:
enable-all-rules: true
rules:
- name: unused-receiver
disabled: true
- name: line-length-limit
disabled: true
- name: add-constant
disabled: true
- name: cognitive-complexity
disabled: true
- name: function-length
disabled: true
- name: cyclomatic
disabled: true
- name: nested-structs
disabled: true
gocritic:
disabled-checks:
- hugeParam
enabled-tags:
- performance
- style
- experimental
govet:
enable:
- shadow
lll:
line-length: 140
misspell:
locale: US
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$
formatters:
enable:
- gofmt
- goimports
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$
4 changes: 2 additions & 2 deletions backend/datastore/mongo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ func TestMongoCreation(t *testing.T) {

func TestWrongConnectionString(t *testing.T) {
server, err := New("wrong", "test_ureadability", time.Millisecond*100)
assert.Error(t, err)
require.Error(t, err)
assert.Nil(t, server)
server, err = New("", "", time.Millisecond*100)
assert.Error(t, err)
require.Error(t, err)
assert.Nil(t, server)
}
12 changes: 7 additions & 5 deletions backend/datastore/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func (r RulesDAO) Get(ctx context.Context, rURL string) (Rule, bool) {
var rules []Rule
q := bson.M{"domain": u.Host, "enabled": true}
log.Printf("[DEBUG] query %v", q)
cursor, err := r.Collection.Find(ctx, q)
cursor, err := r.Find(ctx, q)
if err != nil {
log.Printf("[DEBUG] error looking for rules for %s", rURL)
return Rule{}, false
Expand All @@ -65,13 +65,15 @@ func (r RulesDAO) GetByID(ctx context.Context, id primitive.ObjectID) (Rule, boo

// Save upsert rule
func (r RulesDAO) Save(ctx context.Context, rule Rule) (Rule, error) {
ch, err := r.Collection.UpdateOne(ctx, bson.M{"domain": rule.Domain}, bson.M{"$set": rule}, options.Update().SetUpsert(true))
ch, err := r.UpdateOne(ctx, bson.M{"domain": rule.Domain}, bson.M{"$set": rule}, options.Update().SetUpsert(true))
if err != nil {
log.Printf("[WARN] failed to save, error=%v, article=%v", err, rule)
return rule, err
}
if ch.UpsertedID != nil {
rule.ID = ch.UpsertedID.(primitive.ObjectID)
if oid, ok := ch.UpsertedID.(primitive.ObjectID); ok {
rule.ID = oid
}
}
// if rule was updated, we have no id, so try to find it by domain
if rule.ID == primitive.NilObjectID {
Expand All @@ -86,13 +88,13 @@ func (r RulesDAO) Save(ctx context.Context, rule Rule) (Rule, error) {

// Disable marks enabled=false, by id
func (r RulesDAO) Disable(ctx context.Context, id primitive.ObjectID) error {
_, err := r.Collection.UpdateOne(ctx, bson.M{"_id": id}, bson.M{"$set": bson.M{"enabled": false}})
_, err := r.UpdateOne(ctx, bson.M{"_id": id}, bson.M{"$set": bson.M{"enabled": false}})
return err
}

// All returns list of all rules, both enabled and disabled
func (r RulesDAO) All(ctx context.Context) []Rule {
cursor, err := r.Collection.Find(ctx, bson.M{})
cursor, err := r.Find(ctx, bson.M{})
if err != nil {
log.Printf("[WARN] failed to retrieve all rules, error=%v", err)
return []Rule{}
Expand Down
10 changes: 5 additions & 5 deletions backend/datastore/rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestRules(t *testing.T) {

// save a rule
srule, err := rules.Save(context.Background(), rule)
assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, rule.Domain, srule.Domain)
ruleID := srule.ID

Expand All @@ -46,7 +46,7 @@ func TestRules(t *testing.T) {

// disable the rule
err = rules.Disable(context.Background(), grule.ID)
assert.NoError(t, err)
require.NoError(t, err)
assert.NotContains(t, rules.All(context.Background()), grule)

// get the rule by ID, should be marked as disabled
Expand All @@ -64,7 +64,7 @@ func TestRules(t *testing.T) {

// save a rule once more, should result in the same ID
updatedRule, err := rules.Save(context.Background(), rule)
assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, rule.Domain, updatedRule.Domain)
assert.Equal(t, ruleID, updatedRule.ID)
}
Expand All @@ -84,7 +84,7 @@ func TestRulesCanceledContext(t *testing.T) {
rule := Rule{Domain: "example.com", Enabled: true}
srule, err := rules.Save(ctx, rule)
assert.Equal(t, rule, srule)
assert.Error(t, err)
require.Error(t, err)

// retrieve a rule, wrong rule
grule, found := rules.Get(context.Background(), "http://user^:passwo^rd@foo.com/")
Expand All @@ -95,7 +95,7 @@ func TestRulesCanceledContext(t *testing.T) {
assert.Empty(t, grule, "canceled context")
assert.False(t, found, "canceled context")
assert.Empty(t, rules.All(ctx))
assert.Error(t, rules.Disable(ctx, rule.ID))
require.Error(t, rules.Disable(ctx, rule.ID))
// get a rule by ID with canceled context
grule, found = rules.GetByID(ctx, rule.ID)
assert.Empty(t, grule)
Expand Down
2 changes: 1 addition & 1 deletion backend/extractor/pics.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (f *UReadability) extractPics(iselect *goquery.Selection, url string) (main
// getImageSize loads image to get size
func (f *UReadability) getImageSize(url string) (size int) {
httpClient := &http.Client{Timeout: 30 * time.Second}
req, err := http.NewRequest("GET", url, nil)
req, err := http.NewRequest("GET", url, http.NoBody)
if err != nil {
log.Printf("[WARN] can't create request to get pic from %s", url)
return 0
Expand Down
14 changes: 7 additions & 7 deletions backend/extractor/pics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ func TestExtractPics(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fh, err := os.Open("testdata/poiezdka-s-apple-maps.html")
testHTML, err := io.ReadAll(fh)
require.NoError(t, err)
require.NoError(t, fh.Close())
assert.NoError(t, err)
assert.NoError(t, fh.Close())
_, err = w.Write(testHTML)
require.NoError(t, err)
assert.NoError(t, err)
}))
defer ts.Close()

Expand Down Expand Up @@ -51,7 +51,7 @@ func TestExtractPicsDirectly(t *testing.T) {
sel := d.Find("img")
im, allImages, ok := lr.extractPics(sel, "url")
assert.True(t, ok)
assert.Equal(t, 1, len(allImages))
assert.Len(t, allImages, 1)
assert.Equal(t, "https://cdn1.tnwcdn.com/wp-content/blogs.dir/1/files/2016/01/page-source.jpg", im)
})

Expand All @@ -73,7 +73,7 @@ func TestExtractPicsDirectly(t *testing.T) {
sel := d.Find("img")
im, allImages, ok := lr.extractPics(sel, "url")
assert.True(t, ok)
assert.Equal(t, 1, len(allImages))
assert.Len(t, allImages, 1)
assert.Equal(t, "http://bad_url", im)
})

Expand All @@ -82,13 +82,13 @@ func TestExtractPicsDirectly(t *testing.T) {
w.Header().Set("Content-Length", "1") // error on reading body
}))
defer ts.Close()
data := fmt.Sprintf(`<body><img src="%s"></body>`, ts.URL)
data := fmt.Sprintf(`<body><img src=%q></body>`, ts.URL)
d, err := goquery.NewDocumentFromReader(strings.NewReader(data))
require.NoError(t, err)
sel := d.Find("img")
im, allImages, ok := lr.extractPics(sel, "url")
assert.True(t, ok)
assert.Equal(t, 1, len(allImages))
assert.Len(t, allImages, 1)
assert.Equal(t, ts.URL, im)
})
}
8 changes: 4 additions & 4 deletions backend/extractor/readability.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func (f *UReadability) extractWithRules(ctx context.Context, reqURL string, rule
rb := &Response{}

httpClient := &http.Client{Timeout: f.TimeOut}
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, http.NoBody)
if err != nil {
log.Printf("[WARN] failed to create request for %s, error=%v", reqURL, err)
return nil, err
Expand Down Expand Up @@ -180,7 +180,7 @@ func (f *UReadability) getContent(ctx context.Context, body, reqURL string, rule
r := f.Rules
if rule, found := r.Get(ctx, reqURL); found {
if content, rich, err = customParser(body, reqURL, rule); err == nil {
return content, rich, err
return content, rich, nil
}
log.Printf("[WARN] custom extractor failed for %s, error=%v", reqURL, err) // back to general parser
}
Expand Down Expand Up @@ -208,8 +208,8 @@ func (f *UReadability) normalizeLinks(data string, reqContext *http.Request) (re
dstLink := srcLink
if absLink, changed := absoluteLink(srcLink); changed {
dstLink = absLink
srcLink = fmt.Sprintf(`"%s"`, srcLink)
absLink = fmt.Sprintf(`"%s"`, absLink)
srcLink = fmt.Sprintf("%q", srcLink)
absLink = fmt.Sprintf("%q", absLink)
result = strings.ReplaceAll(result, srcLink, absLink)
log.Printf("[DEBUG] normalized %s -> %s", srcLink, dstLink)
normalizedCount++
Expand Down
30 changes: 15 additions & 15 deletions backend/extractor/readability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.mongodb.org/mongo-driver/bson/primitive"

"github.com/ukeeper/ukeeper-redabilty/backend/datastore"
Expand Down Expand Up @@ -83,15 +84,15 @@ func TestExtractURL(t *testing.T) {
rb, err := lr.Extract(context.Background(), tt.url)

if tt.wantErr {
assert.Error(t, err)
require.Error(t, err)
assert.Nil(t, rb)
return
}

assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, tt.wantURL, rb.URL)
assert.Equal(t, tt.wantTitle, rb.Title)
assert.Equal(t, tt.wantContentLen, len(rb.Content))
assert.Len(t, rb.Content, tt.wantContentLen)
})
}
}
Expand Down Expand Up @@ -125,20 +126,20 @@ func TestExtractGeneral(t *testing.T) {

lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}
a, err := lr.Extract(context.Background(), ts.URL+"/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/")
assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, "Всем миром для общей пользы • Umputun тут был", a.Title)
assert.Equal(t, ts.URL+"/2015/11/26/vsiem-mirom-dlia-obshchiei-polzy/", a.URL)
assert.Equal(t, "Не первый раз я практикую идею “а давайте, ребята, сделаем для общего блага …”, и вот опять. В нашем подкасте радио-т есть незаменимый инструмент, позволяющий собирать новости, готовить их к выпуску, ...", a.Excerpt)
assert.Contains(t, ts.URL, a.Domain)

a, err = lr.Extract(context.Background(), ts.URL+"/v48b6Q")
assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, "UWP - Выпуск 369", a.Title)
assert.Equal(t, ts.URL+"/p/2015/11/22/podcast-369/", a.URL)
assert.Equal(t, "2015-11-22 Нагло ходил в гости. Табличка на двери сработала на 50%Никогда нас школа не хвалила. Девочка осваивает новый прибор. Мое неприятие их логики. И разошлись по будкам …Отбиваюсь от опасных ...", a.Excerpt)
assert.Equal(t, "https://podcast.umputun.com/images/uwp/uwp369.jpg", a.Image)
assert.Contains(t, ts.URL, a.Domain)
assert.Equal(t, 13, len(a.AllLinks))
assert.Len(t, a.AllLinks, 13)
assert.Contains(t, a.AllLinks, "https://podcast.umputun.com/media/ump_podcast369.mp3")
assert.Contains(t, a.AllLinks, "https://podcast.umputun.com/images/uwp/uwp369.jpg")
log.Printf("links=%v", a.AllLinks)
Expand All @@ -150,20 +151,19 @@ func TestNormalizeLinks(t *testing.T) {
u, _ := url.Parse("http://ukeeper.com/blah")
out, links := lr.normalizeLinks(inp, &http.Request{URL: u})
assert.Equal(t, `blah <img src="http://ukeeper.com/aaa.png"/> sdfasd <a href="http://ukeeper.com/blah2/aa.link">something</a> blah33 <img src="http://aaa.com/xyz.jpg">xx</img>`, out)
assert.Equal(t, 3, len(links))
assert.Len(t, links, 3)

inp = `<body>
<img class="alignright size-full wp-image-944214 lazyloadableImage lazyLoad-fadeIn" alt="View Page Source" width="308" height="508" data-original="http://cdn1.tnwcdn.com/wp-content/blogs.dir/1/files/2016/01/page-source.jpg" src="http://cdn1.tnwcdn.com/wp-content/blogs.dir/1/files/2016/01/page-source.jpg"></body>`
_, links = lr.normalizeLinks(inp, &http.Request{URL: u})
assert.Equal(t, 1, len(links))
assert.Len(t, links, 1)
assert.Equal(t, "http://cdn1.tnwcdn.com/wp-content/blogs.dir/1/files/2016/01/page-source.jpg", links[0])

}

func TestNormalizeLinksIssue(t *testing.T) {
lr := UReadability{TimeOut: 30 * time.Second, SnippetSize: 200}
_, err := lr.Extract(context.Background(), "https://git-scm.com/book/en/v2/Git-Tools-Submodules")
assert.NoError(t, err)
require.NoError(t, err)
}

type RulesMock struct{}
Expand Down Expand Up @@ -196,14 +196,14 @@ func TestGetContentCustom(t *testing.T) {
}))
defer ts.Close()
resp, err := httpClient.Get(ts.URL + "/2015/09/25/poiezdka-s-apple-maps/")
assert.NoError(t, err)
require.NoError(t, err)
defer resp.Body.Close()
dataBytes, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
require.NoError(t, err)
body := string(dataBytes)

content, rich, err := lr.getContent(context.Background(), body, ts.URL+"/2015/09/25/poiezdka-s-apple-maps/", nil)
assert.NoError(t, err)
assert.Equal(t, 6988, len(content))
assert.Equal(t, 7169, len(rich))
require.NoError(t, err)
assert.Len(t, content, 6988)
assert.Len(t, rich, 7169)
}
Loading
Loading