From cd96c08eb9dce1aaf7e5b4359075251d6a044e40 Mon Sep 17 00:00:00 2001 From: Julien Tant Date: Fri, 3 Apr 2026 15:08:38 -0700 Subject: [PATCH 1/2] Add OPML import command for bulk blog subscriptions Allow users to import blog subscriptions from OPML files exported by feed readers like Feedly, Inoreader, and NewsBlur. Duplicates are gracefully skipped and reported. Based on upstream PR Hyaxia/blogwatcher#12 by @pastukhov, adapted to our project patterns (context threading, testify, squirrel, etc.). Co-Authored-By: Claude Opus 4.6 (1M context) --- e2e/e2e_test.go | 43 ++++++ e2e/expected/30_import_opml.txt | 1 + e2e/expected/31_import_blogs_listed.txt | 10 ++ e2e/expected/32_import_opml_duplicates.txt | 1 + go.mod | 2 +- internal/cli/commands.go | 36 +++++ internal/cli/root.go | 1 + internal/controller/controller.go | 27 ++++ internal/controller/controller_test.go | 87 +++++++++++ internal/opml/opml.go | 67 ++++++++ internal/opml/opml_test.go | 171 +++++++++++++++++++++ 11 files changed, 445 insertions(+), 1 deletion(-) create mode 100644 e2e/expected/30_import_opml.txt create mode 100644 e2e/expected/31_import_blogs_listed.txt create mode 100644 e2e/expected/32_import_opml_duplicates.txt create mode 100644 internal/opml/opml.go create mode 100644 internal/opml/opml_test.go diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 29caf9b..4e71540 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -402,6 +402,49 @@ func TestE2E(t *testing.T) { } } +func TestImportOPML(t *testing.T) { + baseURL := startTestServer(t) + + for _, mode := range []string{"flags", "env"} { + t.Run(mode, func(t *testing.T) { + c := &cliOpts{ + mode: mode, + dbPath: filepath.Join(t.TempDir(), "test.db"), + } + + // Write an OPML file with feeds pointing at the test server. + opmlContent := fmt.Sprintf(` + + Test Subscriptions + + + + + + +`, baseURL, baseURL, baseURL, baseURL) + opmlPath := filepath.Join(t.TempDir(), "subs.opml") + err := os.WriteFile(opmlPath, []byte(opmlContent), 0o644) + require.NoError(t, err) + + // Import the OPML file. + out := c.ok(t, []string{"import", opmlPath}, nil) + checkOutput(t, "30_import_opml", out, baseURL) + + // Verify blogs appear in list. + out = c.ok(t, []string{"blogs"}, nil) + checkOutput(t, "31_import_blogs_listed", out, baseURL) + + // Re-import the same file -- all should be skipped as duplicates. + out = c.ok(t, []string{"import", opmlPath}, nil) + checkOutput(t, "32_import_opml_duplicates", out, baseURL) + + // Import a nonexistent file should fail. + c.fail(t, []string{"import", "/nonexistent/file.opml"}, nil) + }) + } +} + func extractFirstID(t *testing.T, output string) string { t.Helper() re := regexp.MustCompile(`\[(\d+)\]`) diff --git a/e2e/expected/30_import_opml.txt b/e2e/expected/30_import_opml.txt new file mode 100644 index 0000000..7e62e67 --- /dev/null +++ b/e2e/expected/30_import_opml.txt @@ -0,0 +1 @@ +Imported 2 blog(s), skipped 0 duplicate(s) diff --git a/e2e/expected/31_import_blogs_listed.txt b/e2e/expected/31_import_blogs_listed.txt new file mode 100644 index 0000000..19d8c0a --- /dev/null +++ b/e2e/expected/31_import_blogs_listed.txt @@ -0,0 +1,10 @@ +Tracked blogs (2): + + GitHub Blog + URL: {{SERVER}}/github/ + Feed: {{SERVER}}/github/feed/ + + Go Blog + URL: {{SERVER}}/go/ + Feed: {{SERVER}}/go/feed.atom + diff --git a/e2e/expected/32_import_opml_duplicates.txt b/e2e/expected/32_import_opml_duplicates.txt new file mode 100644 index 0000000..ffaeff6 --- /dev/null +++ b/e2e/expected/32_import_opml_duplicates.txt @@ -0,0 +1 @@ +Imported 0 blog(s), skipped 2 duplicate(s) diff --git a/go.mod b/go.mod index c2e1ea3..f1f18b5 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/spf13/cobra v1.10.2 github.com/spf13/viper v1.21.0 github.com/stretchr/testify v1.11.1 + golang.org/x/net v0.52.0 modernc.org/sqlite v1.48.1 ) @@ -40,7 +41,6 @@ require ( github.com/spf13/pflag v1.0.10 // indirect github.com/subosito/gotenv v1.6.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/net v0.52.0 // indirect golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.35.0 // indirect golang.org/x/tools v0.43.0 // indirect diff --git a/internal/cli/commands.go b/internal/cli/commands.go index 3e4a504..fd0589c 100644 --- a/internal/cli/commands.go +++ b/internal/cli/commands.go @@ -379,6 +379,42 @@ func newUnreadCommand() *cobra.Command { return cmd } +func newImportCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "import ", + Short: "Import blogs from an OPML file.", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + f, err := os.Open(args[0]) + if err != nil { + return err + } + defer func() { + if err := f.Close(); err != nil { + fmt.Fprintf(os.Stderr, "close file: %v\n", err) + } + }() + db, err := storage.OpenDatabase(cmd.Context(), viper.GetString("db")) + if err != nil { + return err + } + defer func() { + if err := db.Close(); err != nil { + fmt.Fprintf(os.Stderr, "close db: %v\n", err) + } + }() + added, skipped, err := controller.ImportOPML(cmd.Context(), db, f) + if err != nil { + printError(err) + return markError(err) + } + cprintf([]color.Attribute{color.FgGreen}, "Imported %d blog(s), skipped %d duplicate(s)\n", added, skipped) + return nil + }, + } + return cmd +} + func printScanResult(result scanner.ScanResult) { statusColor := []color.Attribute{color.FgWhite} if result.NewArticles > 0 { diff --git a/internal/cli/root.go b/internal/cli/root.go index f822f7e..765ac5d 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -33,6 +33,7 @@ func NewRootCommand() *cobra.Command { rootCmd.AddCommand(newReadCommand()) rootCmd.AddCommand(newReadAllCommand()) rootCmd.AddCommand(newUnreadCommand()) + rootCmd.AddCommand(newImportCommand()) return rootCmd } diff --git a/internal/controller/controller.go b/internal/controller/controller.go index cea763a..fc4c0d5 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -2,9 +2,12 @@ package controller import ( "context" + "errors" "fmt" + "io" "github.com/JulienTant/blogwatcher-cli/internal/model" + "github.com/JulienTant/blogwatcher-cli/internal/opml" "github.com/JulienTant/blogwatcher-cli/internal/storage" ) @@ -140,6 +143,30 @@ func MarkAllArticlesRead(ctx context.Context, db *storage.Database, blogName str return articles, nil } +func ImportOPML(ctx context.Context, db *storage.Database, r io.Reader) (added int, skipped int, err error) { + feeds, err := opml.Parse(r) + if err != nil { + return 0, 0, err + } + for _, feed := range feeds { + siteURL := feed.SiteURL + if siteURL == "" { + siteURL = feed.FeedURL + } + _, err := AddBlog(ctx, db, feed.Title, siteURL, feed.FeedURL, "") + if err != nil { + var alreadyExists BlogAlreadyExistsError + if errors.As(err, &alreadyExists) { + skipped++ + continue + } + return added, skipped, err + } + added++ + } + return added, skipped, nil +} + func MarkArticleUnread(ctx context.Context, db *storage.Database, articleID int64) (model.Article, error) { article, err := db.GetArticle(ctx, articleID) if err != nil { diff --git a/internal/controller/controller_test.go b/internal/controller/controller_test.go index 7e6b5c0..166169b 100644 --- a/internal/controller/controller_test.go +++ b/internal/controller/controller_test.go @@ -3,10 +3,12 @@ package controller import ( "context" "path/filepath" + "strings" "testing" "github.com/JulienTant/blogwatcher-cli/internal/model" "github.com/JulienTant/blogwatcher-cli/internal/storage" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -66,6 +68,91 @@ func TestGetArticlesFilters(t *testing.T) { require.Error(t, err, "expected blog not found error") } +func TestImportOPML(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + opmlData := ` + + Subscriptions + + + + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 2, added) + assert.Equal(t, 0, skipped) + + // Verify blogs were actually persisted. + blogs, err := db.ListBlogs(ctx) + require.NoError(t, err) + assert.Len(t, blogs, 2) +} + +func TestImportOPMLSkipsDuplicates(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + // Pre-add a blog that will conflict. + _, err := AddBlog(ctx, db, "Blog A", "http://a.com", "http://a.com/feed", "") + require.NoError(t, err) + + opmlData := ` + + Subscriptions + + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 1, added) + assert.Equal(t, 1, skipped) +} + +func TestImportOPMLFallbackSiteURL(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + // Feed with no htmlUrl -- siteURL should fall back to feedURL. + opmlData := ` + + Test + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 1, added) + assert.Equal(t, 0, skipped) + + blog, err := db.GetBlogByName(ctx, "NoSite") + require.NoError(t, err) + require.NotNil(t, blog) + assert.Equal(t, "http://nosite.com/feed", blog.URL, "site URL should fall back to feed URL") +} + +func TestImportOPMLInvalidXML(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + _, _, err := ImportOPML(ctx, db, strings.NewReader("not xml")) + require.Error(t, err) +} + func openTestDB(t *testing.T) *storage.Database { t.Helper() path := filepath.Join(t.TempDir(), "blogwatcher-cli.db") diff --git a/internal/opml/opml.go b/internal/opml/opml.go new file mode 100644 index 0000000..16b0b6e --- /dev/null +++ b/internal/opml/opml.go @@ -0,0 +1,67 @@ +package opml + +import ( + "encoding/xml" + "io" + + "golang.org/x/net/html/charset" +) + +type document struct { + Body body `xml:"body"` +} + +type body struct { + Outlines []outline `xml:"outline"` +} + +type outline struct { + Type string `xml:"type,attr"` + Text string `xml:"text,attr"` + Title string `xml:"title,attr"` + XMLURL string `xml:"xmlUrl,attr"` + HTMLURL string `xml:"htmlUrl,attr"` + Children []outline `xml:"outline"` +} + +// Feed represents a single RSS/Atom feed parsed from an OPML document. +type Feed struct { + Title string + SiteURL string + FeedURL string +} + +// Parse reads an OPML document from r and returns all feeds found within it. +// It supports OPML 1.0/2.0 with nested categories and non-UTF-8 encodings. +func Parse(r io.Reader) ([]Feed, error) { + var doc document + dec := xml.NewDecoder(r) + dec.CharsetReader = charset.NewReaderLabel + if err := dec.Decode(&doc); err != nil { + return nil, err + } + + var feeds []Feed + for _, o := range doc.Body.Outlines { + feeds = collectFeeds(feeds, o) + } + return feeds, nil +} + +func collectFeeds(feeds []Feed, o outline) []Feed { + if o.XMLURL != "" { + title := o.Title + if title == "" { + title = o.Text + } + feeds = append(feeds, Feed{ + Title: title, + SiteURL: o.HTMLURL, + FeedURL: o.XMLURL, + }) + } + for _, child := range o.Children { + feeds = collectFeeds(feeds, child) + } + return feeds +} diff --git a/internal/opml/opml_test.go b/internal/opml/opml_test.go new file mode 100644 index 0000000..e603d2c --- /dev/null +++ b/internal/opml/opml_test.go @@ -0,0 +1,171 @@ +package opml + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParse(t *testing.T) { + input := ` + + Test + + + + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 3) + + expected := []Feed{ + {Title: "Blog One", SiteURL: "http://blog1.com", FeedURL: "http://blog1.com/feed"}, + {Title: "Blog Two", SiteURL: "http://blog2.com", FeedURL: "http://blog2.com/rss"}, + {Title: "Blog Three", SiteURL: "http://blog3.com", FeedURL: "http://blog3.com/atom.xml"}, + } + for i, f := range feeds { + assert.Equal(t, expected[i], f, "feed %d", i) + } +} + +func TestParseEmptyCategories(t *testing.T) { + input := ` + + Test + + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 1) + assert.Equal(t, "Blog", feeds[0].Title) +} + +func TestParseUsesTextWhenTitleEmpty(t *testing.T) { + input := ` + + Test + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 1) + assert.Equal(t, "Fallback Name", feeds[0].Title) +} + +func TestParseSkipsOutlineWithoutXmlUrl(t *testing.T) { + input := ` + + Test + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 1) + assert.Equal(t, "Has Feed", feeds[0].Title) +} + +func TestParseSubscriptionList(t *testing.T) { + input := ` + + mySubscriptions.opml + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 3) + assert.Equal(t, "CNET News.com", feeds[0].Title) + assert.Equal(t, "http://news.com.com/2547-1_3-0-5.xml", feeds[0].FeedURL) + assert.Equal(t, "http://news.com.com/", feeds[0].SiteURL) +} + +func TestParseNoFeeds(t *testing.T) { + input := ` + + states.opml + + + + + + + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + assert.Empty(t, feeds) +} + +func TestParseDirectoryLinks(t *testing.T) { + input := ` + + scriptingNewsDirectory.opml + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + assert.Empty(t, feeds) +} + +func TestParseCategoryAttribute(t *testing.T) { + input := ` + + Illustrating the category attribute + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + assert.Empty(t, feeds) +} + +func TestParseInvalidXML(t *testing.T) { + input := `not valid xml at all` + + _, err := Parse(strings.NewReader(input)) + require.Error(t, err) +} From 76c4e16cc4f88759775ca0c15f411b985625c212 Mon Sep 17 00:00:00 2001 From: Julien Tant Date: Fri, 3 Apr 2026 18:02:56 -0700 Subject: [PATCH 2/2] Fall back to site URL when OPML feed title is empty Prevents multiple untitled feeds from colliding on empty name. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/controller/controller.go | 7 +++++- internal/controller/controller_test.go | 30 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/internal/controller/controller.go b/internal/controller/controller.go index f3a02a7..be786b2 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "strings" "github.com/JulienTant/blogwatcher-cli/internal/model" "github.com/JulienTant/blogwatcher-cli/internal/opml" @@ -158,7 +159,11 @@ func ImportOPML(ctx context.Context, db *storage.Database, r io.Reader) (added i if siteURL == "" { siteURL = feed.FeedURL } - _, err := AddBlog(ctx, db, feed.Title, siteURL, feed.FeedURL, "") + title := strings.TrimSpace(feed.Title) + if title == "" { + title = siteURL + } + _, err := AddBlog(ctx, db, title, siteURL, feed.FeedURL, "") if err != nil { var alreadyExists BlogAlreadyExistsError if errors.As(err, &alreadyExists) { diff --git a/internal/controller/controller_test.go b/internal/controller/controller_test.go index 36aae9a..3e0a005 100644 --- a/internal/controller/controller_test.go +++ b/internal/controller/controller_test.go @@ -153,6 +153,36 @@ func TestImportOPMLInvalidXML(t *testing.T) { require.Error(t, err) } +func TestImportOPMLEmptyTitleFallback(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + opmlData := ` + + Test + + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 2, added, "both feeds should be added with fallback names") + assert.Equal(t, 0, skipped) + + blogA, err := db.GetBlogByURL(ctx, "http://a.com") + require.NoError(t, err) + require.NotNil(t, blogA) + assert.Equal(t, "http://a.com", blogA.Name, "name should fall back to site URL") + + blogB, err := db.GetBlogByURL(ctx, "http://b.com") + require.NoError(t, err) + require.NotNil(t, blogB) + assert.Equal(t, "http://b.com", blogB.Name, "name should fall back to site URL") +} + func TestGetArticlesFilterByCategory(t *testing.T) { ctx := context.Background() db := openTestDB(t)