Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 104 additions & 3 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"net"
"net/http"
"net/http/httptest"
"os"
"os/exec"
"path/filepath"
Expand Down Expand Up @@ -467,13 +468,14 @@ func TestSSRFProtection(t *testing.T) {
out, err := cmd.CombinedOutput()
require.NoError(t, err, "add should succeed: %s", string(out))

// Scan WITHOUT --unsafe-client — the safe client should block loopback and fail.
// Scan WITHOUT --unsafe-client — the safe client should block loopback.
// The scan command exits 0 (partial success) but reports the per-blog error.
cmd = exec.CommandContext(context.Background(), binaryPath,
"--db", dbPath, "scan")
cmd.Env = cleanEnv
out, err = cmd.CombinedOutput()
require.Error(t, err, "scan should fail when SSRF protection blocks loopback")
require.Contains(t, string(out), "failed to fetch feed:", "expected our wrapped error message")
require.NoError(t, err, "scan should exit 0 even when blogs fail: %s", string(out))
require.Contains(t, string(out), "failed to fetch feed:", "expected per-blog error message in output")
}

func filterEnv(env []string, key string) []string {
Expand All @@ -487,6 +489,105 @@ func filterEnv(env []string, key string) []string {
return filtered
}

func TestScanPartialFailure(t *testing.T) {
baseURL := startTestServer(t)

// Start a server that always returns 500.
badServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer badServer.Close()

for _, mode := range []string{"flags", "env"} {
t.Run(mode, func(t *testing.T) {
c := &cliOpts{
mode: mode,
dbPath: filepath.Join(t.TempDir(), "test.db"),
}

// Add a working blog.
c.ok(t, []string{"add", "good-blog", baseURL + "/go/"}, map[string]string{
"feed-url": baseURL + "/go/feed.atom",
})

// Add a blog pointing to the always-failing server.
c.ok(t, []string{"add", "bad-blog", badServer.URL}, map[string]string{
"feed-url": badServer.URL + "/feed",
})

// Scan should exit 0 (partial success).
stdout := c.ok(t, []string{"scan"}, nil)

// Verify the output contains success for the good blog and error for the bad blog.
assert.Contains(t, stdout, "good-blog")
assert.Contains(t, stdout, "Source: RSS")
assert.Contains(t, stdout, "bad-blog")
assert.Contains(t, stdout, "Error: failed to fetch feed: status 500")
assert.Contains(t, stdout, "2 blog(s): 1 succeeded, 1 failed")
assert.Contains(t, stdout, "Found 3 new article(s) total!")
})
}
}

func TestScanSilentPartialFailure(t *testing.T) {
baseURL := startTestServer(t)

badServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer badServer.Close()

for _, mode := range []string{"flags", "env"} {
t.Run(mode, func(t *testing.T) {
c := &cliOpts{
mode: mode,
dbPath: filepath.Join(t.TempDir(), "test.db"),
}

c.ok(t, []string{"add", "good-blog", baseURL + "/go/"}, map[string]string{
"feed-url": baseURL + "/go/feed.atom",
})
c.ok(t, []string{"add", "bad-blog", badServer.URL}, map[string]string{
"feed-url": badServer.URL + "/feed",
})

// Partial failure in silent mode: should still print "scan done" and exit 0.
stdout, stderr, code := c.run(t, []string{"scan"}, map[string]string{"silent": ""})
assert.Equal(t, 0, code, "partial failure should exit 0")
assert.Contains(t, stdout, "scan done")
assert.Contains(t, stderr, "1/2 blog(s) failed")
})
}
}

func TestScanSilentTotalFailure(t *testing.T) {
badServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer badServer.Close()

for _, mode := range []string{"flags", "env"} {
t.Run(mode, func(t *testing.T) {
c := &cliOpts{
mode: mode,
dbPath: filepath.Join(t.TempDir(), "test.db"),
}

c.ok(t, []string{"add", "bad-blog-1", badServer.URL + "/a"}, map[string]string{
"feed-url": badServer.URL + "/feed1",
})
c.ok(t, []string{"add", "bad-blog-2", badServer.URL + "/b"}, map[string]string{
"feed-url": badServer.URL + "/feed2",
})

// Total failure in silent mode: should exit non-zero.
_, stderr, code := c.run(t, []string{"scan"}, map[string]string{"silent": ""})
assert.NotEqual(t, 0, code, "total failure should exit non-zero")
assert.Contains(t, stderr, "2/2 blog(s) failed")
})
}
}

func extractFirstID(t *testing.T, output string) string {
t.Helper()
re := regexp.MustCompile(`\[(\d+)\]`)
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/DataDog/go-secure-sdk v0.0.7
github.com/Masterminds/squirrel v1.5.4
github.com/PuerkitoBio/goquery v1.12.0
github.com/cenkalti/backoff/v5 v5.0.3
github.com/fatih/color v1.19.0
github.com/golang-migrate/migrate/v4 v4.19.1
github.com/mmcdole/gofeed v1.3.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO
github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
32 changes: 26 additions & 6 deletions internal/cli/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,19 +177,35 @@ func newScanCommand() *cobra.Command {
return err
}
totalNew := 0
failed := 0
for _, result := range results {
if !silent {
printScanResult(result)
}
totalNew += result.NewArticles
if result.Error != "" {
failed++
} else {
totalNew += result.NewArticles
}
}
if !silent {
fmt.Println()
succeeded := len(results) - failed
if failed > 0 {
cprintf([]color.Attribute{color.FgYellow}, "Scanned %d blog(s): %d succeeded, %d failed\n", len(results), succeeded, failed)
}
if totalNew > 0 {
cprintf([]color.Attribute{color.FgGreen, color.Bold}, "Found %d new article(s) total!\n", totalNew)
} else {
} else if failed == 0 {
cprintln([]color.Attribute{color.FgYellow}, "No new articles found.")
}
} else {
if failed > 0 {
fmt.Fprintf(os.Stderr, "scan: %d/%d blog(s) failed\n", failed, len(results))
}
if failed == len(results) {
return fmt.Errorf("scan failed: all %d blog(s) failed", failed)
}
}
}

Expand Down Expand Up @@ -381,15 +397,19 @@ func newImportCommand() *cobra.Command {
}

func printScanResult(result scanner.ScanResult) {
statusColor := []color.Attribute{color.FgWhite}
if result.NewArticles > 0 {
statusColor = []color.Attribute{color.FgGreen}
}
cprintf([]color.Attribute{color.FgWhite, color.Bold}, " %s\n", result.BlogName)
if result.Error != "" {
cprintf([]color.Attribute{color.FgRed}, " Error: %s\n", result.Error)
return
}
if result.Source == "none" {
cprintln([]color.Attribute{color.FgYellow}, " No feed or scraper configured")
return
}
statusColor := []color.Attribute{color.FgWhite}
if result.NewArticles > 0 {
statusColor = []color.Attribute{color.FgGreen}
}
sourceLabel := "HTML"
if result.Source == "rss" {
sourceLabel = "RSS"
Expand Down
5 changes: 4 additions & 1 deletion internal/rss/rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ func (f *Fetcher) DiscoverFeedURL(ctx context.Context, blogURL string) (string,
fmt.Fprintf(os.Stderr, "close: %v\n", err)
}
}()
if response.StatusCode >= 500 {
return "", FeedParseError{Message: fmt.Sprintf("discover feed: server error status %d", response.StatusCode)}
}
if response.StatusCode < 200 || response.StatusCode >= 300 {
// Not-found / bad status is not an error — just means no feed at this URL.
// Client errors (4xx) are not transient — just means no feed at this URL.
return "", nil
Comment on lines +99 to 104
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Broaden retryable discovery failures beyond 5xx.

Only the 5xx path becomes a typed FeedParseError here. DiscoverFeedURL still returns plain wrapped errors for client.Do / body-read failures elsewhere in this function, and 408/429 still fall through to ("", nil). Because internal/scanner/scanner.go Lines 45-56 only folds rss.IsFeedError into ScanResult.Error, a single blog without a stored FeedURL can still abort scan all or be silently treated as “no feed found.” Please surface transient discovery failures through a typed error that preserves the underlying cause so cancellation/timeouts remain fatal.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/rss/rss.go` around lines 99 - 104, DiscoverFeedURL currently only
returns FeedParseError for 5xx responses; expand this so transient failures
(HTTP 408, 429 and any errors from client.Do or reading the body) also return a
FeedParseError that wraps the underlying error so callers using rss.IsFeedError
see them as fatal; modify DiscoverFeedURL to return FeedParseError for
response.StatusCode >=500 OR response.StatusCode == 408 || response.StatusCode
== 429, and when client.Do or ioutil.ReadAll (or equivalent) returns an error
wrap that error in FeedParseError (preserving the original error via
fmt.Errorf("%w", err) or similar) so timeouts/cancellations propagate correctly.

}

Expand Down
23 changes: 23 additions & 0 deletions internal/rss/rss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,29 @@ func TestDiscoverFeedURL_XMLContentType(t *testing.T) {
require.Equal(t, server.URL+"/tag/AI/feed/", feedURL, "should return URL directly for feed content-type")
}

func TestDiscoverFeedURL_ServerError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusServiceUnavailable)
}))
defer server.Close()

_, err := newTestFetcher().DiscoverFeedURL(context.Background(), server.URL)
require.Error(t, err, "should return error for 5xx")
require.Contains(t, err.Error(), "server error status 503")
require.True(t, IsFeedError(err), "should be a FeedParseError so it's retryable")
}

func TestDiscoverFeedURL_NotFound(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
}))
defer server.Close()

feedURL, err := newTestFetcher().DiscoverFeedURL(context.Background(), server.URL)
require.NoError(t, err, "404 should not be an error")
require.Empty(t, feedURL, "should return empty for 404")
}

func TestDiscoverFeedURL_RelSelf(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
Expand Down
Loading