Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions internal/rss/rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"mime"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -89,6 +90,16 @@ func DiscoverFeedURL(ctx context.Context, blogURL string, timeout time.Duration)
return "", nil
}

// If the URL already returns a feed content-type, return it directly.
contentType := response.Header.Get("Content-Type")
mediaType, _, err := mime.ParseMediaType(contentType)
if err == nil {
// Only accept explicit feed types, not generic XML (to avoid sitemap false positives).
if mediaType == "application/rss+xml" || mediaType == "application/atom+xml" || mediaType == "application/feed+json" {
return blogURL, nil
}
}

base, err := url.Parse(blogURL)
if err != nil {
return "", nil
Expand All @@ -109,6 +120,10 @@ func DiscoverFeedURL(ctx context.Context, blogURL string, timeout time.Duration)

for _, feedType := range feedTypes {
selection := doc.Find(fmt.Sprintf("link[rel='alternate'][type='%s']", feedType)).First()
if selection.Length() == 0 {
// Also check rel="self" for feeds that use self-referencing links.
selection = doc.Find(fmt.Sprintf("link[rel='self'][type='%s']", feedType)).First()
}
if selection.Length() == 0 {
continue
}
Expand Down
42 changes: 42 additions & 0 deletions internal/rss/rss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,45 @@ func TestDiscoverFeedURL(t *testing.T) {
require.NoError(t, err, "discover feed")
require.NotEmpty(t, feedURL, "expected feed url")
}

func TestDiscoverFeedURL_XMLContentType(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/tag/AI/feed/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/rss+xml; charset=UTF-8")
_, writeErr := w.Write([]byte(sampleFeed))
if writeErr != nil {
http.Error(w, writeErr.Error(), http.StatusInternalServerError)
return
}
})
server := httptest.NewServer(mux)
defer server.Close()

feedURL, err := DiscoverFeedURL(context.Background(), server.URL+"/tag/AI/feed/", 2*time.Second)
require.NoError(t, err)
require.Equal(t, server.URL+"/tag/AI/feed/", feedURL, "should return URL directly for feed content-type")
}

func TestDiscoverFeedURL_RelSelf(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
_, writeErr := w.Write([]byte(`<html><head><link rel="self" type="application/rss+xml" href="/my-feed.xml" /></head></html>`))
if writeErr != nil {
http.Error(w, writeErr.Error(), http.StatusInternalServerError)
return
}
})
mux.HandleFunc("/my-feed.xml", func(w http.ResponseWriter, r *http.Request) {
_, writeErr := w.Write([]byte(sampleFeed))
if writeErr != nil {
http.Error(w, writeErr.Error(), http.StatusInternalServerError)
return
}
})
server := httptest.NewServer(mux)
defer server.Close()

feedURL, err := DiscoverFeedURL(context.Background(), server.URL, 2*time.Second)
require.NoError(t, err)
require.Equal(t, server.URL+"/my-feed.xml", feedURL, "should discover feed from rel=self link")
}