diff --git a/internal/rss/rss.go b/internal/rss/rss.go index 1107eb9..3dd7b85 100644 --- a/internal/rss/rss.go +++ b/internal/rss/rss.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "mime" "net/http" "net/url" "os" @@ -89,6 +90,16 @@ func DiscoverFeedURL(ctx context.Context, blogURL string, timeout time.Duration) return "", nil } + // If the URL already returns a feed content-type, return it directly. + contentType := response.Header.Get("Content-Type") + mediaType, _, err := mime.ParseMediaType(contentType) + if err == nil { + // Only accept explicit feed types, not generic XML (to avoid sitemap false positives). + if mediaType == "application/rss+xml" || mediaType == "application/atom+xml" || mediaType == "application/feed+json" { + return blogURL, nil + } + } + base, err := url.Parse(blogURL) if err != nil { return "", nil @@ -109,6 +120,10 @@ func DiscoverFeedURL(ctx context.Context, blogURL string, timeout time.Duration) for _, feedType := range feedTypes { selection := doc.Find(fmt.Sprintf("link[rel='alternate'][type='%s']", feedType)).First() + if selection.Length() == 0 { + // Also check rel="self" for feeds that use self-referencing links. + selection = doc.Find(fmt.Sprintf("link[rel='self'][type='%s']", feedType)).First() + } if selection.Length() == 0 { continue } diff --git a/internal/rss/rss_test.go b/internal/rss/rss_test.go index 7fefca9..69e8a22 100644 --- a/internal/rss/rss_test.go +++ b/internal/rss/rss_test.go @@ -62,3 +62,45 @@ func TestDiscoverFeedURL(t *testing.T) { require.NoError(t, err, "discover feed") require.NotEmpty(t, feedURL, "expected feed url") } + +func TestDiscoverFeedURL_XMLContentType(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/tag/AI/feed/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml; charset=UTF-8") + _, writeErr := w.Write([]byte(sampleFeed)) + if writeErr != nil { + http.Error(w, writeErr.Error(), http.StatusInternalServerError) + return + } + }) + server := httptest.NewServer(mux) + defer server.Close() + + feedURL, err := DiscoverFeedURL(context.Background(), server.URL+"/tag/AI/feed/", 2*time.Second) + require.NoError(t, err) + require.Equal(t, server.URL+"/tag/AI/feed/", feedURL, "should return URL directly for feed content-type") +} + +func TestDiscoverFeedURL_RelSelf(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + _, writeErr := w.Write([]byte(``)) + if writeErr != nil { + http.Error(w, writeErr.Error(), http.StatusInternalServerError) + return + } + }) + mux.HandleFunc("/my-feed.xml", func(w http.ResponseWriter, r *http.Request) { + _, writeErr := w.Write([]byte(sampleFeed)) + if writeErr != nil { + http.Error(w, writeErr.Error(), http.StatusInternalServerError) + return + } + }) + server := httptest.NewServer(mux) + defer server.Close() + + feedURL, err := DiscoverFeedURL(context.Background(), server.URL, 2*time.Second) + require.NoError(t, err) + require.Equal(t, server.URL+"/my-feed.xml", feedURL, "should discover feed from rel=self link") +}