diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 827d855..af4dae3 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -406,6 +406,49 @@ func TestE2E(t *testing.T) { } } +func TestImportOPML(t *testing.T) { + baseURL := startTestServer(t) + + for _, mode := range []string{"flags", "env"} { + t.Run(mode, func(t *testing.T) { + c := &cliOpts{ + mode: mode, + dbPath: filepath.Join(t.TempDir(), "test.db"), + } + + // Write an OPML file with feeds pointing at the test server. + opmlContent := fmt.Sprintf(` + + Test Subscriptions + + + + + + +`, baseURL, baseURL, baseURL, baseURL) + opmlPath := filepath.Join(t.TempDir(), "subs.opml") + err := os.WriteFile(opmlPath, []byte(opmlContent), 0o644) + require.NoError(t, err) + + // Import the OPML file. + out := c.ok(t, []string{"import", opmlPath}, nil) + checkOutput(t, "30_import_opml", out, baseURL) + + // Verify blogs appear in list. + out = c.ok(t, []string{"blogs"}, nil) + checkOutput(t, "31_import_blogs_listed", out, baseURL) + + // Re-import the same file -- all should be skipped as duplicates. + out = c.ok(t, []string{"import", opmlPath}, nil) + checkOutput(t, "32_import_opml_duplicates", out, baseURL) + + // Import a nonexistent file should fail. + c.fail(t, []string{"import", "/nonexistent/file.opml"}, nil) + }) + } +} + func TestSSRFProtection(t *testing.T) { baseURL := startTestServer(t) dbPath := filepath.Join(t.TempDir(), "test.db") diff --git a/e2e/expected/30_import_opml.txt b/e2e/expected/30_import_opml.txt new file mode 100644 index 0000000..7e62e67 --- /dev/null +++ b/e2e/expected/30_import_opml.txt @@ -0,0 +1 @@ +Imported 2 blog(s), skipped 0 duplicate(s) diff --git a/e2e/expected/31_import_blogs_listed.txt b/e2e/expected/31_import_blogs_listed.txt new file mode 100644 index 0000000..19d8c0a --- /dev/null +++ b/e2e/expected/31_import_blogs_listed.txt @@ -0,0 +1,10 @@ +Tracked blogs (2): + + GitHub Blog + URL: {{SERVER}}/github/ + Feed: {{SERVER}}/github/feed/ + + Go Blog + URL: {{SERVER}}/go/ + Feed: {{SERVER}}/go/feed.atom + diff --git a/e2e/expected/32_import_opml_duplicates.txt b/e2e/expected/32_import_opml_duplicates.txt new file mode 100644 index 0000000..ffaeff6 --- /dev/null +++ b/e2e/expected/32_import_opml_duplicates.txt @@ -0,0 +1 @@ +Imported 0 blog(s), skipped 2 duplicate(s) diff --git a/go.mod b/go.mod index 6738bbf..8a7e7a2 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/spf13/cobra v1.10.2 github.com/spf13/viper v1.21.0 github.com/stretchr/testify v1.11.1 + golang.org/x/net v0.52.0 golang.org/x/sync v0.20.0 modernc.org/sqlite v1.48.1 ) @@ -42,7 +43,6 @@ require ( github.com/spf13/pflag v1.0.10 // indirect github.com/subosito/gotenv v1.6.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/net v0.52.0 // indirect golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.35.0 // indirect golang.org/x/tools v0.43.0 // indirect diff --git a/internal/cli/commands.go b/internal/cli/commands.go index 4c2fac3..7a3f14b 100644 --- a/internal/cli/commands.go +++ b/internal/cli/commands.go @@ -396,6 +396,42 @@ func newUnreadCommand() *cobra.Command { return cmd } +func newImportCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "import ", + Short: "Import blogs from an OPML file.", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + f, err := os.Open(args[0]) + if err != nil { + return err + } + defer func() { + if err := f.Close(); err != nil { + fmt.Fprintf(os.Stderr, "close file: %v\n", err) + } + }() + db, err := storage.OpenDatabase(cmd.Context(), viper.GetString("db")) + if err != nil { + return err + } + defer func() { + if err := db.Close(); err != nil { + fmt.Fprintf(os.Stderr, "close db: %v\n", err) + } + }() + added, skipped, err := controller.ImportOPML(cmd.Context(), db, f) + if err != nil { + printError(err) + return markError(err) + } + cprintf([]color.Attribute{color.FgGreen}, "Imported %d blog(s), skipped %d duplicate(s)\n", added, skipped) + return nil + }, + } + return cmd +} + func printScanResult(result scanner.ScanResult) { statusColor := []color.Attribute{color.FgWhite} if result.NewArticles > 0 { diff --git a/internal/cli/root.go b/internal/cli/root.go index 161840a..383aeeb 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -34,6 +34,7 @@ func NewRootCommand() *cobra.Command { rootCmd.AddCommand(newReadCommand()) rootCmd.AddCommand(newReadAllCommand()) rootCmd.AddCommand(newUnreadCommand()) + rootCmd.AddCommand(newImportCommand()) return rootCmd } diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 62b4c53..be786b2 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -2,9 +2,13 @@ package controller import ( "context" + "errors" "fmt" + "io" + "strings" "github.com/JulienTant/blogwatcher-cli/internal/model" + "github.com/JulienTant/blogwatcher-cli/internal/opml" "github.com/JulienTant/blogwatcher-cli/internal/storage" ) @@ -145,6 +149,34 @@ func MarkAllArticlesRead(ctx context.Context, db *storage.Database, blogName str return articles, nil } +func ImportOPML(ctx context.Context, db *storage.Database, r io.Reader) (added int, skipped int, err error) { + feeds, err := opml.Parse(r) + if err != nil { + return 0, 0, err + } + for _, feed := range feeds { + siteURL := feed.SiteURL + if siteURL == "" { + siteURL = feed.FeedURL + } + title := strings.TrimSpace(feed.Title) + if title == "" { + title = siteURL + } + _, err := AddBlog(ctx, db, title, siteURL, feed.FeedURL, "") + if err != nil { + var alreadyExists BlogAlreadyExistsError + if errors.As(err, &alreadyExists) { + skipped++ + continue + } + return added, skipped, err + } + added++ + } + return added, skipped, nil +} + func MarkArticleUnread(ctx context.Context, db *storage.Database, articleID int64) (model.Article, error) { article, err := db.GetArticle(ctx, articleID) if err != nil { diff --git a/internal/controller/controller_test.go b/internal/controller/controller_test.go index 5cea5a6..3e0a005 100644 --- a/internal/controller/controller_test.go +++ b/internal/controller/controller_test.go @@ -3,10 +3,12 @@ package controller import ( "context" "path/filepath" + "strings" "testing" "github.com/JulienTant/blogwatcher-cli/internal/model" "github.com/JulienTant/blogwatcher-cli/internal/storage" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -66,6 +68,121 @@ func TestGetArticlesFilters(t *testing.T) { require.Error(t, err, "expected blog not found error") } +func TestImportOPML(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + opmlData := ` + + Subscriptions + + + + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 2, added) + assert.Equal(t, 0, skipped) + + // Verify blogs were actually persisted. + blogs, err := db.ListBlogs(ctx) + require.NoError(t, err) + assert.Len(t, blogs, 2) +} + +func TestImportOPMLSkipsDuplicates(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + // Pre-add a blog that will conflict. + _, err := AddBlog(ctx, db, "Blog A", "http://a.com", "http://a.com/feed", "") + require.NoError(t, err) + + opmlData := ` + + Subscriptions + + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 1, added) + assert.Equal(t, 1, skipped) +} + +func TestImportOPMLFallbackSiteURL(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + // Feed with no htmlUrl -- siteURL should fall back to feedURL. + opmlData := ` + + Test + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 1, added) + assert.Equal(t, 0, skipped) + + blog, err := db.GetBlogByName(ctx, "NoSite") + require.NoError(t, err) + require.NotNil(t, blog) + assert.Equal(t, "http://nosite.com/feed", blog.URL, "site URL should fall back to feed URL") +} + +func TestImportOPMLInvalidXML(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + _, _, err := ImportOPML(ctx, db, strings.NewReader("not xml")) + require.Error(t, err) +} + +func TestImportOPMLEmptyTitleFallback(t *testing.T) { + ctx := context.Background() + db := openTestDB(t) + defer func() { require.NoError(t, db.Close()) }() + + opmlData := ` + + Test + + + + +` + + added, skipped, err := ImportOPML(ctx, db, strings.NewReader(opmlData)) + require.NoError(t, err) + assert.Equal(t, 2, added, "both feeds should be added with fallback names") + assert.Equal(t, 0, skipped) + + blogA, err := db.GetBlogByURL(ctx, "http://a.com") + require.NoError(t, err) + require.NotNil(t, blogA) + assert.Equal(t, "http://a.com", blogA.Name, "name should fall back to site URL") + + blogB, err := db.GetBlogByURL(ctx, "http://b.com") + require.NoError(t, err) + require.NotNil(t, blogB) + assert.Equal(t, "http://b.com", blogB.Name, "name should fall back to site URL") +} + func TestGetArticlesFilterByCategory(t *testing.T) { ctx := context.Background() db := openTestDB(t) diff --git a/internal/opml/opml.go b/internal/opml/opml.go new file mode 100644 index 0000000..16b0b6e --- /dev/null +++ b/internal/opml/opml.go @@ -0,0 +1,67 @@ +package opml + +import ( + "encoding/xml" + "io" + + "golang.org/x/net/html/charset" +) + +type document struct { + Body body `xml:"body"` +} + +type body struct { + Outlines []outline `xml:"outline"` +} + +type outline struct { + Type string `xml:"type,attr"` + Text string `xml:"text,attr"` + Title string `xml:"title,attr"` + XMLURL string `xml:"xmlUrl,attr"` + HTMLURL string `xml:"htmlUrl,attr"` + Children []outline `xml:"outline"` +} + +// Feed represents a single RSS/Atom feed parsed from an OPML document. +type Feed struct { + Title string + SiteURL string + FeedURL string +} + +// Parse reads an OPML document from r and returns all feeds found within it. +// It supports OPML 1.0/2.0 with nested categories and non-UTF-8 encodings. +func Parse(r io.Reader) ([]Feed, error) { + var doc document + dec := xml.NewDecoder(r) + dec.CharsetReader = charset.NewReaderLabel + if err := dec.Decode(&doc); err != nil { + return nil, err + } + + var feeds []Feed + for _, o := range doc.Body.Outlines { + feeds = collectFeeds(feeds, o) + } + return feeds, nil +} + +func collectFeeds(feeds []Feed, o outline) []Feed { + if o.XMLURL != "" { + title := o.Title + if title == "" { + title = o.Text + } + feeds = append(feeds, Feed{ + Title: title, + SiteURL: o.HTMLURL, + FeedURL: o.XMLURL, + }) + } + for _, child := range o.Children { + feeds = collectFeeds(feeds, child) + } + return feeds +} diff --git a/internal/opml/opml_test.go b/internal/opml/opml_test.go new file mode 100644 index 0000000..e603d2c --- /dev/null +++ b/internal/opml/opml_test.go @@ -0,0 +1,171 @@ +package opml + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParse(t *testing.T) { + input := ` + + Test + + + + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 3) + + expected := []Feed{ + {Title: "Blog One", SiteURL: "http://blog1.com", FeedURL: "http://blog1.com/feed"}, + {Title: "Blog Two", SiteURL: "http://blog2.com", FeedURL: "http://blog2.com/rss"}, + {Title: "Blog Three", SiteURL: "http://blog3.com", FeedURL: "http://blog3.com/atom.xml"}, + } + for i, f := range feeds { + assert.Equal(t, expected[i], f, "feed %d", i) + } +} + +func TestParseEmptyCategories(t *testing.T) { + input := ` + + Test + + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 1) + assert.Equal(t, "Blog", feeds[0].Title) +} + +func TestParseUsesTextWhenTitleEmpty(t *testing.T) { + input := ` + + Test + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 1) + assert.Equal(t, "Fallback Name", feeds[0].Title) +} + +func TestParseSkipsOutlineWithoutXmlUrl(t *testing.T) { + input := ` + + Test + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 1) + assert.Equal(t, "Has Feed", feeds[0].Title) +} + +func TestParseSubscriptionList(t *testing.T) { + input := ` + + mySubscriptions.opml + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + require.Len(t, feeds, 3) + assert.Equal(t, "CNET News.com", feeds[0].Title) + assert.Equal(t, "http://news.com.com/2547-1_3-0-5.xml", feeds[0].FeedURL) + assert.Equal(t, "http://news.com.com/", feeds[0].SiteURL) +} + +func TestParseNoFeeds(t *testing.T) { + input := ` + + states.opml + + + + + + + + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + assert.Empty(t, feeds) +} + +func TestParseDirectoryLinks(t *testing.T) { + input := ` + + scriptingNewsDirectory.opml + + + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + assert.Empty(t, feeds) +} + +func TestParseCategoryAttribute(t *testing.T) { + input := ` + + Illustrating the category attribute + + + +` + + feeds, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + assert.Empty(t, feeds) +} + +func TestParseInvalidXML(t *testing.T) { + input := `not valid xml at all` + + _, err := Parse(strings.NewReader(input)) + require.Error(t, err) +}