diff --git a/CLAUDE.md b/CLAUDE.md index df7f54d..40087de 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -57,6 +57,12 @@ make lint # Run linter ./msgvault build-cache --full-rebuild # Full rebuild ./msgvault stats # Show archive stats +# Apple Mail import +./msgvault import-emlx # Auto-discover accounts +./msgvault import-emlx ~/Library/Mail # Explicit mail directory +./msgvault import-emlx --account me@gmail.com # Specific account(s) +./msgvault import-emlx /path/to/dir --identifier me@gmail.com # Manual fallback + # Maintenance ./msgvault repair-encoding # Fix UTF-8 encoding issues ``` @@ -71,6 +77,8 @@ make lint # Run linter - `build_cache.go` - Parquet cache builder (DuckDB) - `repair_encoding.go` - UTF-8 encoding repair +- `import_emlx.go` - Apple Mail .emlx import command + ### Core (`internal/`) - `tui/model.go` - Bubble Tea TUI model and update logic - `tui/view.go` - View rendering with lipgloss styling diff --git a/cmd/msgvault/cmd/import_emlx.go b/cmd/msgvault/cmd/import_emlx.go index 531ae9a..c416ae7 100644 --- a/cmd/msgvault/cmd/import_emlx.go +++ b/cmd/msgvault/cmd/import_emlx.go @@ -3,11 +3,15 @@ package cmd import ( "context" "fmt" + "io" "os" "os/signal" + "path/filepath" + "strings" "syscall" "github.com/spf13/cobra" + "github.com/wesm/msgvault/internal/applemail" "github.com/wesm/msgvault/internal/importer" "github.com/wesm/msgvault/internal/store" ) @@ -17,29 +21,60 @@ var ( importEmlxNoResume bool importEmlxCheckpointInterval int importEmlxNoAttachments bool + importEmlxAccountsDB string + importEmlxAccounts []string + importEmlxIdentifier string ) var importEmlxCmd = &cobra.Command{ - Use: "import-emlx ", + Use: "import-emlx [mail-dir]", Short: "Import Apple Mail .emlx files into msgvault", Long: `Import Apple Mail .emlx files into msgvault. -The mail directory should be an Apple Mail mailbox tree containing -.mbox or .imapmbox directories, each with a Messages/ subdirectory -of .emlx files. You can also point directly at a single .mbox directory. +By default, auto-discovers accounts from Apple Mail's V10 directory layout +by reading ~/Library/Accounts/Accounts4.sqlite to map account GUIDs to +email addresses. + +If mail-dir is omitted, defaults to ~/Library/Mail. Labels are derived from directory names. Messages that appear in multiple mailboxes are deduplicated and given labels from each. Examples: - msgvault import-emlx me@gmail.com ~/Downloads/mail-2009/Mail/ - msgvault import-emlx me@gmail.com ~/Mail/INBOX.mbox/ + # Auto-discover accounts from default Apple Mail location + msgvault import-emlx + + # Auto-discover accounts from explicit mail directory + msgvault import-emlx ~/Library/Mail + + # Import only specific account(s) + msgvault import-emlx --account me@gmail.com + msgvault import-emlx --account me@gmail.com --account work@company.com + + # Manual fallback: import a single directory with explicit identifier + msgvault import-emlx ~/Library/Mail/V10/SOME-GUID --identifier me@gmail.com + msgvault import-emlx ~/Mail/INBOX.mbox/ --identifier me@gmail.com `, - Args: cobra.ExactArgs(2), + Args: cobra.MaximumNArgs(1), SilenceUsage: true, RunE: func(cmd *cobra.Command, args []string) error { - identifier := args[0] - mailDir := args[1] + // Determine mail directory. + var mailDir string + if len(args) > 0 { + mailDir = args[0] + } else { + home, err := os.UserHomeDir() + if err != nil { + return fmt.Errorf("determine home directory: %w", err) + } + mailDir = filepath.Join(home, "Library", "Mail") + } + + // Expand ~ if present. + if strings.HasPrefix(mailDir, "~/") { + home, _ := os.UserHomeDir() + mailDir = filepath.Join(home, mailDir[2:]) + } ctx, cancel := context.WithCancel(cmd.Context()) defer cancel() @@ -103,8 +138,131 @@ Examples: attachmentsDir = "" } + if importEmlxIdentifier != "" { + // Manual fallback: single import with explicit identifier. + return importSingleAccount(ctx, cmd, st, mailDir, importEmlxIdentifier, attachmentsDir) + } + + // Auto mode: discover accounts from V10 layout + Accounts4.sqlite. + return importAutoAccounts(ctx, cmd, st, mailDir, attachmentsDir) + }, +} + +func importSingleAccount( + ctx context.Context, + cmd *cobra.Command, + st *store.Store, + mailDir, identifier, attachmentsDir string, +) error { + summary, err := importer.ImportEmlxDir( + ctx, st, mailDir, importer.EmlxImportOptions{ + SourceType: importEmlxSourceType, + Identifier: identifier, + NoResume: importEmlxNoResume, + CheckpointInterval: importEmlxCheckpointInterval, + AttachmentsDir: attachmentsDir, + Logger: logger, + }, + ) + if err != nil { + return err + } + + printImportSummary(cmd, ctx, *summary) + return importResultError(ctx, *summary) +} + +func importAutoAccounts( + ctx context.Context, + cmd *cobra.Command, + st *store.Store, + mailDir, attachmentsDir string, +) error { + accountsDBPath := importEmlxAccountsDB + if strings.HasPrefix(accountsDBPath, "~/") { + home, _ := os.UserHomeDir() + accountsDBPath = filepath.Join(home, accountsDBPath[2:]) + } + + out := cmd.OutOrStdout() + + accounts, err := applemail.DiscoverV10Accounts(mailDir, accountsDBPath, logger) + if err != nil { + return fmt.Errorf("discover accounts: %w", err) + } + + if len(accounts) == 0 { + return fmt.Errorf( + "no V10 accounts found in %s\n\n"+ + "If this is not an Apple Mail V10 directory, use --identifier to specify\n"+ + "the account email manually:\n\n"+ + " msgvault import-emlx %s --identifier you@gmail.com", + mailDir, mailDir, + ) + } + + // Filter by --account flags if set. + if len(importEmlxAccounts) > 0 { + filter := make(map[string]bool) + for _, a := range importEmlxAccounts { + filter[strings.ToLower(a)] = true + } + + var filtered []applemail.AccountInfo + for _, a := range accounts { + if filter[strings.ToLower(a.Email)] || filter[strings.ToLower(a.Identifier())] { + filtered = append(filtered, a) + } + } + + if len(filtered) == 0 { + var available []string + for _, a := range accounts { + available = append(available, a.Identifier()) + } + return fmt.Errorf( + "no matching accounts found for --account filter\n"+ + "Available accounts: %s", + strings.Join(available, ", "), + ) + } + accounts = filtered + } + + fmt.Fprintf(out, "Discovered %d account(s):\n", len(accounts)) + for _, a := range accounts { + if a.Email != "" { + fmt.Fprintf(out, " - %s (%s)\n", a.Email, a.Description) + } else { + fmt.Fprintf(out, " - %s\n", a.Description) + } + } + fmt.Fprintln(out) + + var grandTotal importer.EmlxImportSummary + var importErrors []error + + for _, account := range accounts { + if ctx.Err() != nil { + fmt.Fprintln(out, "Import interrupted between accounts.") + break + } + + identifier := account.Identifier() + accountDir, err := applemail.V10AccountDir(mailDir, account.GUID) + if err != nil { + fmt.Fprintf(out, "Skipping %s: %v\n", identifier, err) + continue + } + + if account.Email != "" { + fmt.Fprintf(out, "Importing %s (%s)...\n", account.Email, account.Description) + } else { + fmt.Fprintf(out, "Importing %s...\n", account.Description) + } + summary, err := importer.ImportEmlxDir( - ctx, st, mailDir, importer.EmlxImportOptions{ + ctx, st, accountDir, importer.EmlxImportOptions{ SourceType: importEmlxSourceType, Identifier: identifier, NoResume: importEmlxNoResume, @@ -114,54 +272,98 @@ Examples: }, ) if err != nil { - return err + importErrors = append(importErrors, fmt.Errorf("%s: %w", identifier, err)) + continue } - out := cmd.OutOrStdout() - if ctx.Err() != nil { - fmt.Fprintln(out, "Import interrupted. Run again to resume.") - } else if summary.Errors > 0 { - fmt.Fprintln(out, "Import complete (with errors).") - } else { - fmt.Fprintln(out, "Import complete.") + printImportSummary(cmd, ctx, *summary) + fmt.Fprintln(out) + + // Accumulate totals. + grandTotal.MailboxesTotal += summary.MailboxesTotal + grandTotal.MailboxesImported += summary.MailboxesImported + grandTotal.MessagesProcessed += summary.MessagesProcessed + grandTotal.MessagesAdded += summary.MessagesAdded + grandTotal.MessagesUpdated += summary.MessagesUpdated + grandTotal.MessagesSkipped += summary.MessagesSkipped + grandTotal.Errors += summary.Errors + if summary.HardErrors { + grandTotal.HardErrors = true } + } - fmt.Fprintf(out, - " Mailboxes: %d discovered, %d imported\n", - summary.MailboxesTotal, summary.MailboxesImported, - ) - fmt.Fprintf(out, - " Processed: %d messages\n", - summary.MessagesProcessed, - ) - fmt.Fprintf(out, - " Added: %d messages\n", - summary.MessagesAdded, - ) - fmt.Fprintf(out, - " Updated: %d messages\n", - summary.MessagesUpdated, - ) - fmt.Fprintf(out, - " Skipped (dup): %d messages\n", - summary.MessagesSkipped, - ) - fmt.Fprintf(out, - " Errors: %d\n", - summary.Errors, - ) + if len(accounts) > 1 { + fmt.Fprintln(out, "=== Grand Total ===") + printImportStats(out, grandTotal) + } - if ctx.Err() == nil && summary.HardErrors { - return fmt.Errorf( - "import completed with %d errors", - summary.Errors, - ) + if len(importErrors) > 0 { + for _, e := range importErrors { + fmt.Fprintf(cmd.ErrOrStderr(), "Error: %v\n", e) } - if ctx.Err() != nil { - return context.Canceled - } - return nil - }, + return fmt.Errorf("import completed with %d account error(s)", len(importErrors)) + } + + if ctx.Err() != nil { + return context.Canceled + } + + if grandTotal.HardErrors { + return fmt.Errorf("import completed with %d errors", grandTotal.Errors) + } + + return nil +} + +func importResultError(ctx context.Context, summary importer.EmlxImportSummary) error { + if ctx.Err() != nil { + return context.Canceled + } + if summary.HardErrors { + return fmt.Errorf("import completed with %d errors", summary.Errors) + } + return nil +} + +func printImportSummary(cmd *cobra.Command, ctx context.Context, summary importer.EmlxImportSummary) { + out := cmd.OutOrStdout() + + if ctx.Err() != nil { + fmt.Fprintln(out, "Import interrupted. Run again to resume.") + } else if summary.Errors > 0 { + fmt.Fprintln(out, "Import complete (with errors).") + } else { + fmt.Fprintln(out, "Import complete.") + } + + printImportStats(out, summary) +} + +func printImportStats(out io.Writer, summary importer.EmlxImportSummary) { + fmt.Fprintf(out, + " Mailboxes: %d discovered, %d imported\n", + summary.MailboxesTotal, summary.MailboxesImported, + ) + fmt.Fprintf(out, + " Processed: %d messages\n", + summary.MessagesProcessed, + ) + fmt.Fprintf(out, + " Added: %d messages\n", + summary.MessagesAdded, + ) + fmt.Fprintf(out, + " Updated: %d messages\n", + summary.MessagesUpdated, + ) + fmt.Fprintf(out, + " Skipped (dup): %d messages\n", + summary.MessagesSkipped, + ) + fmt.Fprintf(out, + " Errors: %d\n", + summary.Errors, + ) } func init() { @@ -183,4 +385,16 @@ func init() { &importEmlxNoAttachments, "no-attachments", false, "Do not store attachments on disk", ) + importEmlxCmd.Flags().StringVar( + &importEmlxAccountsDB, "accounts-db", applemail.DefaultAccountsDBPath(), + "Path to Apple's Accounts4.sqlite database", + ) + importEmlxCmd.Flags().StringSliceVar( + &importEmlxAccounts, "account", nil, + "Filter to specific account email(s) (repeatable)", + ) + importEmlxCmd.Flags().StringVar( + &importEmlxIdentifier, "identifier", "", + "Explicit email/identifier for single-directory import (manual fallback)", + ) } diff --git a/internal/applemail/accounts.go b/internal/applemail/accounts.go new file mode 100644 index 0000000..88d537d --- /dev/null +++ b/internal/applemail/accounts.go @@ -0,0 +1,202 @@ +package applemail + +import ( + "database/sql" + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" + + _ "github.com/mattn/go-sqlite3" + "github.com/wesm/msgvault/internal/emlx" +) + +// AccountInfo describes an Apple Mail account resolved from Accounts4.sqlite. +type AccountInfo struct { + // GUID is the V10 directory UUID. + GUID string + + // Email is the resolved email address. Empty for local accounts + // like "On My Mac". + Email string + + // Description is the account description (e.g. "Google", "Yahoo!", + // "On My Mac"). + Description string +} + +// Identifier returns the best identifier for this account: the email +// address if available, otherwise the description. +func (a AccountInfo) Identifier() string { + if a.Email != "" { + return a.Email + } + return a.Description +} + +// DefaultAccountsDBPath returns the default path to Apple's +// Accounts4.sqlite database. +func DefaultAccountsDBPath() string { + home, err := os.UserHomeDir() + if err != nil { + return "" + } + return filepath.Join(home, "Library", "Accounts", "Accounts4.sqlite") +} + +// ResolveAccounts opens the Accounts4.sqlite database at dbPath and +// resolves the given GUIDs to account information. Returns a map of +// GUID → AccountInfo for each GUID that was found. +func ResolveAccounts(dbPath string, guids []string) (map[string]AccountInfo, error) { + if len(guids) == 0 { + return nil, nil + } + + db, err := sql.Open("sqlite3", dbPath+"?mode=ro") + if err != nil { + return nil, fmt.Errorf("open accounts db: %w", err) + } + defer db.Close() + + // Build placeholders for IN clause. + placeholders := make([]string, len(guids)) + args := make([]interface{}, len(guids)) + for i, g := range guids { + placeholders[i] = "?" + args[i] = g + } + + query := ` + SELECT + child.ZIDENTIFIER, + COALESCE(child.ZUSERNAME, parent.ZUSERNAME, '') AS email, + COALESCE(parent.ZACCOUNTDESCRIPTION, child.ZACCOUNTDESCRIPTION, '') AS description + FROM ZACCOUNT child + LEFT JOIN ZACCOUNT parent ON parent.Z_PK = child.ZPARENTACCOUNT + WHERE child.ZIDENTIFIER IN (` + strings.Join(placeholders, ",") + `) + ` + + rows, err := db.Query(query, args...) + if err != nil { + return nil, fmt.Errorf("query accounts: %w", err) + } + defer rows.Close() + + result := make(map[string]AccountInfo) + for rows.Next() { + var guid, email, description string + if err := rows.Scan(&guid, &email, &description); err != nil { + return nil, fmt.Errorf("scan account row: %w", err) + } + result[guid] = AccountInfo{ + GUID: guid, + Email: email, + Description: description, + } + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate account rows: %w", err) + } + + return result, nil +} + +// DiscoverV10Accounts scans mailDir for V10-style directories containing +// UUID subdirectories and resolves them to account information using the +// Accounts4.sqlite database at accountsDBPath. +func DiscoverV10Accounts(mailDir, accountsDBPath string, logger *slog.Logger) ([]AccountInfo, error) { + if logger == nil { + logger = slog.Default() + } + + // Scan for V* directories containing UUID subdirectories. + guids, err := findV10GUIDs(mailDir) + if err != nil { + return nil, fmt.Errorf("scan V10 directories: %w", err) + } + + if len(guids) == 0 { + return nil, nil + } + + resolved, err := ResolveAccounts(accountsDBPath, guids) + if err != nil { + return nil, fmt.Errorf("resolve accounts: %w", err) + } + + var accounts []AccountInfo + for _, guid := range guids { + info, ok := resolved[guid] + if !ok { + logger.Warn("GUID not found in Accounts4.sqlite, skipping", + "guid", guid) + continue + } + accounts = append(accounts, info) + } + + return accounts, nil +} + +// findV10GUIDs scans mailDir for V*/ directories containing UUID +// subdirectories and returns the unique GUIDs found. +func findV10GUIDs(mailDir string) ([]string, error) { + entries, err := os.ReadDir(mailDir) + if err != nil { + return nil, err + } + + var guids []string + seen := make(map[string]bool) + + for _, e := range entries { + if !e.IsDir() { + continue + } + name := e.Name() + // Look for V* directories (V2, V10, etc.). + if !strings.HasPrefix(name, "V") { + continue + } + + vDir := filepath.Join(mailDir, name) + subEntries, err := os.ReadDir(vDir) + if err != nil { + continue + } + + for _, sub := range subEntries { + if !sub.IsDir() { + continue + } + if emlx.IsUUID(sub.Name()) && !seen[sub.Name()] { + seen[sub.Name()] = true + guids = append(guids, sub.Name()) + } + } + } + + return guids, nil +} + +// V10AccountDir returns the path to a V10 account directory for the +// given GUID within mailDir. It searches all V* directories. +func V10AccountDir(mailDir, guid string) (string, error) { + entries, err := os.ReadDir(mailDir) + if err != nil { + return "", err + } + + for _, e := range entries { + if !e.IsDir() || !strings.HasPrefix(e.Name(), "V") { + continue + } + candidate := filepath.Join(mailDir, e.Name(), guid) + if info, err := os.Stat(candidate); err == nil && info.IsDir() { + return candidate, nil + } + } + + return "", fmt.Errorf("no V10 directory found for GUID %s in %s", guid, mailDir) +} diff --git a/internal/applemail/accounts_test.go b/internal/applemail/accounts_test.go new file mode 100644 index 0000000..ef00c86 --- /dev/null +++ b/internal/applemail/accounts_test.go @@ -0,0 +1,331 @@ +package applemail + +import ( + "database/sql" + "os" + "path/filepath" + "testing" + + _ "github.com/mattn/go-sqlite3" +) + +// createTestAccountsDB creates a temporary Accounts4.sqlite with the +// minimal schema and populates it with the given accounts. +func createTestAccountsDB(t *testing.T, accounts []testAccount) string { + t.Helper() + + dbPath := filepath.Join(t.TempDir(), "Accounts4.sqlite") + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + t.Fatalf("create test db: %v", err) + } + defer db.Close() + + _, err = db.Exec(` + CREATE TABLE ZACCOUNT ( + Z_PK INTEGER PRIMARY KEY, + ZIDENTIFIER TEXT, + ZUSERNAME TEXT, + ZACCOUNTDESCRIPTION TEXT, + ZPARENTACCOUNT INTEGER + ) + `) + if err != nil { + t.Fatalf("create schema: %v", err) + } + + for _, a := range accounts { + _, err := db.Exec( + `INSERT INTO ZACCOUNT (Z_PK, ZIDENTIFIER, ZUSERNAME, ZACCOUNTDESCRIPTION, ZPARENTACCOUNT) + VALUES (?, ?, ?, ?, ?)`, + a.pk, a.identifier, a.username, a.description, a.parentAccount, + ) + if err != nil { + t.Fatalf("insert account: %v", err) + } + } + + return dbPath +} + +type testAccount struct { + pk int + identifier string + username *string + description *string + parentAccount *int +} + +func strPtr(s string) *string { return &s } +func intPtr(i int) *int { return &i } + +func TestResolveAccounts(t *testing.T) { + // Set up accounts mimicking real Accounts4.sqlite: + // - PK 1: Google parent (has email, description "Google") + // - PK 2: IMAP child of Google (GUID, no email, inherits from parent) + // - PK 3: Yahoo parent (has email, description "Yahoo!") + // - PK 4: IMAP child of Yahoo (GUID, no email, inherits from parent) + // - PK 5: Exchange account (GUID, has own email) + // - PK 6: "On My Mac" (GUID, no email, description only) + accounts := []testAccount{ + {pk: 1, identifier: "google-parent-id", username: strPtr("user@gmail.com"), description: strPtr("Google"), parentAccount: nil}, + {pk: 2, identifier: "13C9A646-1234-5678-9ABC-E07FFBDDEED3", username: nil, description: nil, parentAccount: intPtr(1)}, + {pk: 3, identifier: "yahoo-parent-id", username: strPtr("user@yahoo.com"), description: strPtr("Yahoo!"), parentAccount: nil}, + {pk: 4, identifier: "AABBCCDD-1111-2222-3333-445566778899", username: nil, description: nil, parentAccount: intPtr(3)}, + {pk: 5, identifier: "EXCHANGE1-AAAA-BBBB-CCCC-DDDDEEEEEEEE", username: strPtr("user@exchange.com"), description: strPtr("Exchange"), parentAccount: nil}, + {pk: 6, identifier: "LOCALONLY-0000-0000-0000-000000000000", username: nil, description: strPtr("On My Mac"), parentAccount: nil}, + } + + dbPath := createTestAccountsDB(t, accounts) + + tests := []struct { + name string + guids []string + wantLen int + wantEmail map[string]string // guid → expected email + wantDesc map[string]string // guid → expected description + wantMissing []string // guids not in result + }{ + { + name: "IMAP child resolves parent email (Google)", + guids: []string{"13C9A646-1234-5678-9ABC-E07FFBDDEED3"}, + wantLen: 1, + wantEmail: map[string]string{ + "13C9A646-1234-5678-9ABC-E07FFBDDEED3": "user@gmail.com", + }, + wantDesc: map[string]string{ + "13C9A646-1234-5678-9ABC-E07FFBDDEED3": "Google", + }, + }, + { + name: "IMAP child resolves parent email (Yahoo)", + guids: []string{"AABBCCDD-1111-2222-3333-445566778899"}, + wantLen: 1, + wantEmail: map[string]string{ + "AABBCCDD-1111-2222-3333-445566778899": "user@yahoo.com", + }, + wantDesc: map[string]string{ + "AABBCCDD-1111-2222-3333-445566778899": "Yahoo!", + }, + }, + { + name: "Exchange account with own email", + guids: []string{"EXCHANGE1-AAAA-BBBB-CCCC-DDDDEEEEEEEE"}, + wantLen: 1, + wantEmail: map[string]string{ + "EXCHANGE1-AAAA-BBBB-CCCC-DDDDEEEEEEEE": "user@exchange.com", + }, + wantDesc: map[string]string{ + "EXCHANGE1-AAAA-BBBB-CCCC-DDDDEEEEEEEE": "Exchange", + }, + }, + { + name: "On My Mac has no email", + guids: []string{"LOCALONLY-0000-0000-0000-000000000000"}, + wantLen: 1, + wantEmail: map[string]string{ + "LOCALONLY-0000-0000-0000-000000000000": "", + }, + wantDesc: map[string]string{ + "LOCALONLY-0000-0000-0000-000000000000": "On My Mac", + }, + }, + { + name: "Missing GUID returns no entry", + guids: []string{"NOTEXIST-0000-0000-0000-000000000000"}, + wantLen: 0, + wantMissing: []string{"NOTEXIST-0000-0000-0000-000000000000"}, + }, + { + name: "Multiple GUIDs resolved at once", + guids: []string{"13C9A646-1234-5678-9ABC-E07FFBDDEED3", "AABBCCDD-1111-2222-3333-445566778899"}, + wantLen: 2, + wantEmail: map[string]string{ + "13C9A646-1234-5678-9ABC-E07FFBDDEED3": "user@gmail.com", + "AABBCCDD-1111-2222-3333-445566778899": "user@yahoo.com", + }, + }, + { + name: "Empty GUID list", + guids: nil, + wantLen: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ResolveAccounts(dbPath, tt.guids) + if err != nil { + t.Fatalf("ResolveAccounts: %v", err) + } + + if len(result) != tt.wantLen { + t.Errorf("got %d results, want %d", len(result), tt.wantLen) + } + + for guid, wantEmail := range tt.wantEmail { + info, ok := result[guid] + if !ok { + t.Errorf("GUID %s not found in result", guid) + continue + } + if info.Email != wantEmail { + t.Errorf("GUID %s: email = %q, want %q", guid, info.Email, wantEmail) + } + } + + for guid, wantDesc := range tt.wantDesc { + info, ok := result[guid] + if !ok { + continue // already reported above + } + if info.Description != wantDesc { + t.Errorf("GUID %s: description = %q, want %q", guid, info.Description, wantDesc) + } + } + + for _, guid := range tt.wantMissing { + if _, ok := result[guid]; ok { + t.Errorf("GUID %s should not be in result", guid) + } + } + }) + } +} + +func TestResolveAccounts_BadPath(t *testing.T) { + _, err := ResolveAccounts("/nonexistent/path/Accounts4.sqlite", []string{"some-guid"}) + if err == nil { + t.Fatal("expected error for bad DB path") + } +} + +func TestAccountInfo_Identifier(t *testing.T) { + tests := []struct { + name string + info AccountInfo + want string + }{ + { + name: "has email", + info: AccountInfo{Email: "user@gmail.com", Description: "Google"}, + want: "user@gmail.com", + }, + { + name: "no email uses description", + info: AccountInfo{Email: "", Description: "On My Mac"}, + want: "On My Mac", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.info.Identifier(); got != tt.want { + t.Errorf("Identifier() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestDiscoverV10Accounts(t *testing.T) { + // Create a fake Mail directory with V10 layout. + mailDir := t.TempDir() + v10Dir := filepath.Join(mailDir, "V10") + guid1 := "13C9A646-1234-5678-9ABC-E07FFBDDEED3" + guid2 := "AABBCCDD-1111-2222-3333-445566778899" + + // Create UUID dirs under V10. + if err := os.MkdirAll(filepath.Join(v10Dir, guid1), 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(filepath.Join(v10Dir, guid2), 0o755); err != nil { + t.Fatal(err) + } + // Also create a non-UUID dir that should be ignored. + if err := os.MkdirAll(filepath.Join(v10Dir, "MailData"), 0o755); err != nil { + t.Fatal(err) + } + + // Create accounts DB with these GUIDs. + accounts := []testAccount{ + {pk: 1, identifier: "google-parent", username: strPtr("user@gmail.com"), description: strPtr("Google"), parentAccount: nil}, + {pk: 2, identifier: guid1, username: nil, description: nil, parentAccount: intPtr(1)}, + {pk: 3, identifier: "yahoo-parent", username: strPtr("user@yahoo.com"), description: strPtr("Yahoo!"), parentAccount: nil}, + {pk: 4, identifier: guid2, username: nil, description: nil, parentAccount: intPtr(3)}, + } + dbPath := createTestAccountsDB(t, accounts) + + result, err := DiscoverV10Accounts(mailDir, dbPath, nil) + if err != nil { + t.Fatalf("DiscoverV10Accounts: %v", err) + } + + if len(result) != 2 { + t.Fatalf("got %d accounts, want 2", len(result)) + } + + // Check both accounts resolved. + byGUID := make(map[string]AccountInfo) + for _, a := range result { + byGUID[a.GUID] = a + } + + if info, ok := byGUID[guid1]; !ok { + t.Errorf("GUID %s not found", guid1) + } else if info.Email != "user@gmail.com" { + t.Errorf("GUID %s: email = %q, want %q", guid1, info.Email, "user@gmail.com") + } + + if info, ok := byGUID[guid2]; !ok { + t.Errorf("GUID %s not found", guid2) + } else if info.Email != "user@yahoo.com" { + t.Errorf("GUID %s: email = %q, want %q", guid2, info.Email, "user@yahoo.com") + } +} + +func TestFindV10GUIDs(t *testing.T) { + mailDir := t.TempDir() + + // Create V10 with UUID dirs plus non-UUID. + v10 := filepath.Join(mailDir, "V10") + guid := "AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE" + if err := os.MkdirAll(filepath.Join(v10, guid), 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(filepath.Join(v10, "MailData"), 0o755); err != nil { + t.Fatal(err) + } + + // Create V2 with another UUID. + v2 := filepath.Join(mailDir, "V2") + guid2 := "11111111-2222-3333-4444-555555555555" + if err := os.MkdirAll(filepath.Join(v2, guid2), 0o755); err != nil { + t.Fatal(err) + } + + // Non-V directory should be ignored. + if err := os.MkdirAll(filepath.Join(mailDir, "Other", "FFFFFFFF-0000-0000-0000-000000000000"), 0o755); err != nil { + t.Fatal(err) + } + + guids, err := findV10GUIDs(mailDir) + if err != nil { + t.Fatalf("findV10GUIDs: %v", err) + } + + if len(guids) != 2 { + t.Fatalf("got %d GUIDs, want 2: %v", len(guids), guids) + } + + seen := make(map[string]bool) + for _, g := range guids { + seen[g] = true + } + + if !seen[guid] { + t.Errorf("expected GUID %s", guid) + } + if !seen[guid2] { + t.Errorf("expected GUID %s", guid2) + } +} diff --git a/internal/emlx/discover.go b/internal/emlx/discover.go index 4aed607..da3370a 100644 --- a/internal/emlx/discover.go +++ b/internal/emlx/discover.go @@ -13,7 +13,7 @@ type Mailbox struct { // Path is the absolute path to the .mbox or .imapmbox directory. Path string - // MsgDir is the absolute path to the Messages/ directory + // MsgDir is the absolute path to the primary Messages/ directory // containing .emlx files. In legacy layouts this is Path/Messages; // in modern V10 layouts it is Path//Data/Messages. MsgDir string @@ -21,8 +21,27 @@ type Mailbox struct { // Label is the derived label for messages in this mailbox. Label string - // Files contains sorted .emlx filenames within MsgDir. + // Files contains sorted .emlx filenames within MsgDir plus any + // files discovered in numeric partition subdirectories. Files []string + + // FileIndex maps filename → absolute path of the Messages/ subdirectory + // within a numeric partition directory, for V10 partitioned layouts. + // Files in MsgDir itself are absent from this map. Nil when no + // partition files exist. + FileIndex map[string]string +} + +// FilePath returns the absolute path to a .emlx file within this mailbox. +// For files in numeric partition directories, the path is resolved via +// FileIndex; all other files are resolved relative to MsgDir. +func (m *Mailbox) FilePath(fileName string) string { + if m.FileIndex != nil { + if sub, ok := m.FileIndex[fileName]; ok { + return filepath.Join(sub, fileName) + } + } + return filepath.Join(m.MsgDir, fileName) } // DiscoverMailboxes walks an Apple Mail directory tree and returns all @@ -49,7 +68,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { // Auto-detect: if the path itself is a mailbox, import just that one. if isMailboxDir(abs) { - msgDir, files, err := listEmlxFiles(abs) + msgDir, files, fileIndex, err := listEmlxFiles(abs) if err != nil { return nil, err } @@ -58,6 +77,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { return []Mailbox{{ Path: abs, MsgDir: msgDir, Label: label, Files: files, + FileIndex: fileIndex, }}, nil } } @@ -81,7 +101,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { return nil } - msgDir, files, listErr := listEmlxFiles(path) + msgDir, files, fileIndex, listErr := listEmlxFiles(path) if listErr != nil || len(files) == 0 { return nil } @@ -90,6 +110,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { mailboxes = append(mailboxes, Mailbox{ Path: path, MsgDir: msgDir, Label: label, Files: files, + FileIndex: fileIndex, }) return nil @@ -132,7 +153,7 @@ func LabelFromPath(rootDir, mailboxPath string) string { continue } // V10 account GUID directories (e.g. 13C9A646-...-E07FFBDDEED3). - if isUUID(p) { + if IsUUID(p) { continue } filtered = append(filtered, p) @@ -164,7 +185,7 @@ func isMailboxDir(path string) bool { // findMessagesDir locates the Messages/ directory within a .mbox. // Returns "" if none found. Checks both legacy (Messages/) and // modern V10 (/Data/Messages/) layouts. When both exist, -// prefers whichever contains .emlx files. +// prefers whichever contains .emlx files (directly or in partitions). func findMessagesDir(mailboxPath string) string { var candidates []string @@ -175,17 +196,24 @@ func findMessagesDir(mailboxPath string) string { } // Modern V10: /Data/Messages/ subdirectory. + // Also handles partition-only layouts where Data/Messages/ doesn't exist. entries, err := os.ReadDir(mailboxPath) if err == nil { for _, e := range entries { if !e.IsDir() || e.Name() == "Messages" { continue } - modern := filepath.Join( - mailboxPath, e.Name(), "Data", "Messages", - ) - info, statErr := os.Stat(modern) - if statErr == nil && info.IsDir() { + dataDir := filepath.Join(mailboxPath, e.Name(), "Data") + dataStat, statErr := os.Stat(dataDir) + if statErr != nil || !dataStat.IsDir() { + continue + } + modern := filepath.Join(dataDir, "Messages") + msgStat, statErr := os.Stat(modern) + if statErr == nil && msgStat.IsDir() { + candidates = append(candidates, modern) + } else if hasEmlxFilesInPartitions(dataDir) { + // Partition-only: Data/Messages/ absent but partitions exist. candidates = append(candidates, modern) } } @@ -195,11 +223,18 @@ func findMessagesDir(mailboxPath string) string { return "" } - // Prefer the first candidate that has .emlx files. + // Prefer the first candidate that has .emlx files directly or + // within numeric partition subdirectories (V10 only). for _, dir := range candidates { if hasEmlxFiles(dir) { return dir } + // For V10 layout the parent is Data/; check partitions there. + dataDir := filepath.Dir(dir) + if filepath.Base(dataDir) == "Data" && + hasEmlxFilesInPartitions(dataDir) { + return dir + } } // No candidate has files; return first for isMailboxDir. @@ -214,20 +249,40 @@ func hasEmlxFiles(dir string) bool { return false } for _, e := range entries { - if e.IsDir() { + if !e.IsDir() && isEmlxFile(e.Name()) { + return true + } + } + return false +} + +// hasEmlxFilesInPartitions returns true if dir contains .emlx files +// within Messages/ subdirectories or nested numeric partition dirs (0-9). +func hasEmlxFilesInPartitions(dir string) bool { + entries, err := os.ReadDir(dir) + if err != nil { + return false + } + for _, e := range entries { + if !e.IsDir() { continue } - lower := strings.ToLower(e.Name()) - if strings.HasSuffix(lower, ".emlx") && - !strings.HasSuffix(lower, ".partial.emlx") { - return true + name := e.Name() + if name == "Messages" { + if hasEmlxFiles(filepath.Join(dir, name)) { + return true + } + } else if isDigitDir(name) { + if hasEmlxFilesInPartitions(filepath.Join(dir, name)) { + return true + } } } return false } -// isUUID returns true if s matches UUID format (8-4-4-4-12 hex). -func isUUID(s string) bool { +// IsUUID returns true if s matches UUID format (8-4-4-4-12 hex). +func IsUUID(s string) bool { if len(s) != 36 { return false } @@ -249,6 +304,16 @@ func isUUID(s string) bool { return true } +func isDigitDir(name string) bool { + return len(name) == 1 && name[0] >= '0' && name[0] <= '9' +} + +func isEmlxFile(name string) bool { + lower := strings.ToLower(name) + return strings.HasSuffix(lower, ".emlx") && + !strings.HasSuffix(lower, ".partial.emlx") +} + func stripMailboxSuffix(name string) string { lower := strings.ToLower(name) if strings.HasSuffix(lower, ".imapmbox") { @@ -260,43 +325,91 @@ func stripMailboxSuffix(name string) string { return name } -// listEmlxFiles returns the Messages directory path and sorted .emlx -// filenames within it, excluding .partial.emlx. Returns ("", nil, nil) -// if no Messages directory is found. +// listEmlxFiles returns the Messages directory path, sorted .emlx +// filenames (from both the primary Messages/ dir and numeric partition +// subdirectories), and a FileIndex mapping partition filenames to their +// containing subdirectory. Returns ("", nil, nil, nil) if no Messages +// directory is found. func listEmlxFiles( mailboxPath string, -) (string, []string, error) { +) (string, []string, map[string]string, error) { msgDir := findMessagesDir(mailboxPath) if msgDir == "" { - return "", nil, nil + return "", nil, nil, nil } entries, err := os.ReadDir(msgDir) if err != nil { - if os.IsNotExist(err) { - return "", nil, nil + if !os.IsNotExist(err) { + return "", nil, nil, fmt.Errorf("read Messages dir: %w", err) } - return "", nil, fmt.Errorf("read Messages dir: %w", err) + // Primary Messages/ dir absent (partition-only layout); continue + // so that partition files are still collected below. + entries = nil } var files []string for _, e := range entries { - if e.IsDir() { - continue + if !e.IsDir() && isEmlxFile(e.Name()) { + files = append(files, e.Name()) } - name := e.Name() - if !strings.HasSuffix(strings.ToLower(name), ".emlx") { - continue + } + + // Walk numeric partition dirs in Data/ (parent of Messages/). + // Only enter digit dirs (0-9) to avoid re-collecting from the + // primary Messages/ dir which was already handled above. + var fileIndex map[string]string + dataDir := filepath.Dir(msgDir) + if filepath.Base(dataDir) == "Data" { + result := make(map[string]string) + topEntries, readErr := os.ReadDir(dataDir) + if readErr == nil { + for _, e := range topEntries { + if e.IsDir() && isDigitDir(e.Name()) { + collectPartitionFiles( + filepath.Join(dataDir, e.Name()), result, + ) + } + } } - // Skip .partial.emlx files (Apple Mail temp files). - if strings.HasSuffix( - strings.ToLower(name), ".partial.emlx", - ) { - continue + if len(result) > 0 { + fileIndex = result + for name := range result { + files = append(files, name) + } } - files = append(files, name) } sort.Strings(files) - return msgDir, files, nil + return msgDir, files, fileIndex, nil +} + +// collectPartitionFiles recursively walks dir for Messages/ subdirs and +// numeric partition dirs (0-9), collecting .emlx files into result +// (filename → absolute Messages/ dir path). +func collectPartitionFiles(dir string, result map[string]string) { + entries, err := os.ReadDir(dir) + if err != nil { + return + } + for _, e := range entries { + if !e.IsDir() { + continue + } + name := e.Name() + if name == "Messages" { + msgDir := filepath.Join(dir, name) + msgs, err := os.ReadDir(msgDir) + if err != nil { + continue + } + for _, m := range msgs { + if !m.IsDir() && isEmlxFile(m.Name()) { + result[m.Name()] = msgDir + } + } + } else if isDigitDir(name) { + collectPartitionFiles(filepath.Join(dir, name), result) + } + } } diff --git a/internal/emlx/discover_test.go b/internal/emlx/discover_test.go index 1e907cf..9277762 100644 --- a/internal/emlx/discover_test.go +++ b/internal/emlx/discover_test.go @@ -422,6 +422,191 @@ func TestDiscoverMailboxes_MixedLegacyAndV10(t *testing.T) { } } +// mkV10PartitionedMailbox creates a V10 mailbox with .emlx files in +// both the primary Messages/ directory and in numeric partition +// subdirectories at various nesting depths. +// +// Layout created: +// +// base//Data/Messages/1.emlx (top-level) +// base//Data/0/3/Messages/123.emlx (2-level partition) +// base//Data/9/Messages/456.emlx (1-level partition) +func mkV10PartitionedMailbox(t *testing.T, base, guid string) { + t.Helper() + dataDir := filepath.Join(base, guid, "Data") + + writeEmlxFile := func(dir, name string) { + t.Helper() + if err := os.MkdirAll(dir, 0700); err != nil { + t.Fatalf("mkdir %q: %v", dir, err) + } + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("10\nFrom: x\r\n\r\n"), 0600); err != nil { + t.Fatalf("write %q: %v", path, err) + } + } + + writeEmlxFile(filepath.Join(dataDir, "Messages"), "1.emlx") + writeEmlxFile(filepath.Join(dataDir, "0", "3", "Messages"), "123.emlx") + writeEmlxFile(filepath.Join(dataDir, "9", "Messages"), "456.emlx") +} + +func TestDiscoverMailboxes_V10Partitioned(t *testing.T) { + root := t.TempDir() + guid := "9F0F15DD-4CBC-448A-9EBF-C385A47A3A67" + mboxDir := filepath.Join(root, "INBOX.mbox") + mkV10PartitionedMailbox(t, mboxDir, guid) + + mailboxes, err := DiscoverMailboxes(mboxDir) + if err != nil { + t.Fatalf("DiscoverMailboxes: %v", err) + } + if len(mailboxes) != 1 { + t.Fatalf("got %d mailboxes, want 1", len(mailboxes)) + } + + mb := mailboxes[0] + if mb.Label != "INBOX" { + t.Errorf("Label = %q, want %q", mb.Label, "INBOX") + } + + // Should find all 3 files: 1 top-level + 2 from partitions. + if len(mb.Files) != 3 { + t.Fatalf("Files = %v (len %d), want 3 files", mb.Files, len(mb.Files)) + } + + // Verify all expected filenames are present. + fileSet := make(map[string]bool) + for _, f := range mb.Files { + fileSet[f] = true + } + for _, want := range []string{"1.emlx", "123.emlx", "456.emlx"} { + if !fileSet[want] { + t.Errorf("missing file %q in Files: %v", want, mb.Files) + } + } + + // Verify FilePath resolves to an existing file for each entry. + for _, fileName := range mb.Files { + path := mb.FilePath(fileName) + if _, err := os.Stat(path); err != nil { + t.Errorf("FilePath(%q) = %q: stat failed: %v", fileName, path, err) + } + } + + // Top-level file should NOT be in FileIndex. + if mb.FileIndex != nil { + if _, inIndex := mb.FileIndex["1.emlx"]; inIndex { + t.Errorf("top-level 1.emlx should not be in FileIndex") + } + } + + // Partition files should be in FileIndex. + if mb.FileIndex == nil { + t.Fatal("FileIndex is nil but partition files were found") + } + for _, pf := range []string{"123.emlx", "456.emlx"} { + if _, ok := mb.FileIndex[pf]; !ok { + t.Errorf("partition file %q missing from FileIndex", pf) + } + } +} + +func TestDiscoverMailboxes_V10PartitionedOnly(t *testing.T) { + root := t.TempDir() + guid := "9F0F15DD-4CBC-448A-9EBF-C385A47A3A67" + mboxDir := filepath.Join(root, "INBOX.mbox") + + // Create the primary Messages/ dir but leave it empty. + // (Tests the case where Messages/ exists but is empty.) + primaryMsg := filepath.Join(mboxDir, guid, "Data", "Messages") + if err := os.MkdirAll(primaryMsg, 0700); err != nil { + t.Fatalf("mkdir %q: %v", primaryMsg, err) + } + + // Place files only in partition dirs. + partDir := filepath.Join(mboxDir, guid, "Data", "3", "Messages") + if err := os.MkdirAll(partDir, 0700); err != nil { + t.Fatalf("mkdir %q: %v", partDir, err) + } + for _, name := range []string{"100.emlx", "200.emlx"} { + path := filepath.Join(partDir, name) + if err := os.WriteFile(path, []byte("10\nFrom: x\r\n\r\n"), 0600); err != nil { + t.Fatalf("write %q: %v", path, err) + } + } + + mailboxes, err := DiscoverMailboxes(mboxDir) + if err != nil { + t.Fatalf("DiscoverMailboxes: %v", err) + } + if len(mailboxes) != 1 { + t.Fatalf("got %d mailboxes, want 1 (partitioned-only mailbox should be detected)", len(mailboxes)) + } + + mb := mailboxes[0] + if len(mb.Files) != 2 { + t.Fatalf("Files = %v (len %d), want 2", mb.Files, len(mb.Files)) + } + + for _, fileName := range mb.Files { + path := mb.FilePath(fileName) + if _, err := os.Stat(path); err != nil { + t.Errorf("FilePath(%q) = %q: stat failed: %v", fileName, path, err) + } + } +} + +// TestDiscoverMailboxes_V10NoTopLevelMessages tests the case where +// Data/Messages/ does not exist at all — only numeric partition dirs. +// This matches real Apple Mail behavior for large mailboxes. +func TestDiscoverMailboxes_V10NoTopLevelMessages(t *testing.T) { + root := t.TempDir() + guid := "9F0F15DD-4CBC-448A-9EBF-C385A47A3A67" + mboxDir := filepath.Join(root, "Sent Messages.mbox") + + // Do NOT create Data/Messages/ — only create partition dirs. + for _, partPath := range []string{ + filepath.Join(mboxDir, guid, "Data", "9", "9", "Messages"), + filepath.Join(mboxDir, guid, "Data", "0", "0", "1", "Messages"), + } { + if err := os.MkdirAll(partPath, 0700); err != nil { + t.Fatalf("mkdir %q: %v", partPath, err) + } + } + files := map[string]string{ + "500.emlx": filepath.Join(mboxDir, guid, "Data", "9", "9", "Messages"), + "600.emlx": filepath.Join(mboxDir, guid, "Data", "0", "0", "1", "Messages"), + "700.emlx": filepath.Join(mboxDir, guid, "Data", "0", "0", "1", "Messages"), + } + for name, dir := range files { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("10\nFrom: x\r\n\r\n"), 0600); err != nil { + t.Fatalf("write %q: %v", path, err) + } + } + + mailboxes, err := DiscoverMailboxes(mboxDir) + if err != nil { + t.Fatalf("DiscoverMailboxes: %v", err) + } + if len(mailboxes) != 1 { + t.Fatalf("got %d mailboxes, want 1 (no Data/Messages/ dir)", len(mailboxes)) + } + + mb := mailboxes[0] + if len(mb.Files) != 3 { + t.Fatalf("Files = %v, want 3", mb.Files) + } + + for _, fileName := range mb.Files { + path := mb.FilePath(fileName) + if _, err := os.Stat(path); err != nil { + t.Errorf("FilePath(%q) = %q: stat failed: %v", fileName, path, err) + } + } +} + func TestIsUUID(t *testing.T) { tests := []struct { input string @@ -436,9 +621,9 @@ func TestIsUUID(t *testing.T) { {"not-a-uuid-at-all-nope-definitely", false}, } for _, tc := range tests { - got := isUUID(tc.input) + got := IsUUID(tc.input) if got != tc.want { - t.Errorf("isUUID(%q) = %v, want %v", + t.Errorf("IsUUID(%q) = %v, want %v", tc.input, got, tc.want) } } diff --git a/internal/importer/emlx_import.go b/internal/importer/emlx_import.go index 5542b40..3709894 100644 --- a/internal/importer/emlx_import.go +++ b/internal/importer/emlx_import.go @@ -416,7 +416,7 @@ func ImportEmlxDir( } } - filePath := filepath.Join(mb.MsgDir, fileName) + filePath := mb.FilePath(fileName) // Check file size before reading to avoid OOM on oversized files. fi, statErr := os.Stat(filePath)