diff --git a/internal/emlx/discover.go b/internal/emlx/discover.go index 4aed607..c17cca1 100644 --- a/internal/emlx/discover.go +++ b/internal/emlx/discover.go @@ -13,7 +13,7 @@ type Mailbox struct { // Path is the absolute path to the .mbox or .imapmbox directory. Path string - // MsgDir is the absolute path to the Messages/ directory + // MsgDir is the absolute path to the primary Messages/ directory // containing .emlx files. In legacy layouts this is Path/Messages; // in modern V10 layouts it is Path//Data/Messages. MsgDir string @@ -21,8 +21,27 @@ type Mailbox struct { // Label is the derived label for messages in this mailbox. Label string - // Files contains sorted .emlx filenames within MsgDir. + // Files contains sorted .emlx filenames within MsgDir plus any + // files discovered in numeric partition subdirectories. Files []string + + // FileIndex maps filename → absolute path of the Messages/ subdirectory + // within a numeric partition directory, for V10 partitioned layouts. + // Files in MsgDir itself are absent from this map. Nil when no + // partition files exist. + FileIndex map[string]string +} + +// FilePath returns the absolute path to a .emlx file within this mailbox. +// For files in numeric partition directories, the path is resolved via +// FileIndex; all other files are resolved relative to MsgDir. +func (m *Mailbox) FilePath(fileName string) string { + if m.FileIndex != nil { + if sub, ok := m.FileIndex[fileName]; ok { + return filepath.Join(sub, fileName) + } + } + return filepath.Join(m.MsgDir, fileName) } // DiscoverMailboxes walks an Apple Mail directory tree and returns all @@ -49,7 +68,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { // Auto-detect: if the path itself is a mailbox, import just that one. if isMailboxDir(abs) { - msgDir, files, err := listEmlxFiles(abs) + msgDir, files, fileIndex, err := listEmlxFiles(abs) if err != nil { return nil, err } @@ -58,6 +77,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { return []Mailbox{{ Path: abs, MsgDir: msgDir, Label: label, Files: files, + FileIndex: fileIndex, }}, nil } } @@ -81,7 +101,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { return nil } - msgDir, files, listErr := listEmlxFiles(path) + msgDir, files, fileIndex, listErr := listEmlxFiles(path) if listErr != nil || len(files) == 0 { return nil } @@ -90,6 +110,7 @@ func DiscoverMailboxes(rootDir string) ([]Mailbox, error) { mailboxes = append(mailboxes, Mailbox{ Path: path, MsgDir: msgDir, Label: label, Files: files, + FileIndex: fileIndex, }) return nil @@ -164,7 +185,7 @@ func isMailboxDir(path string) bool { // findMessagesDir locates the Messages/ directory within a .mbox. // Returns "" if none found. Checks both legacy (Messages/) and // modern V10 (/Data/Messages/) layouts. When both exist, -// prefers whichever contains .emlx files. +// prefers whichever contains .emlx files (directly or in partitions). func findMessagesDir(mailboxPath string) string { var candidates []string @@ -175,17 +196,24 @@ func findMessagesDir(mailboxPath string) string { } // Modern V10: /Data/Messages/ subdirectory. + // Also handles partition-only layouts where Data/Messages/ doesn't exist. entries, err := os.ReadDir(mailboxPath) if err == nil { for _, e := range entries { if !e.IsDir() || e.Name() == "Messages" { continue } - modern := filepath.Join( - mailboxPath, e.Name(), "Data", "Messages", - ) - info, statErr := os.Stat(modern) - if statErr == nil && info.IsDir() { + dataDir := filepath.Join(mailboxPath, e.Name(), "Data") + dataStat, statErr := os.Stat(dataDir) + if statErr != nil || !dataStat.IsDir() { + continue + } + modern := filepath.Join(dataDir, "Messages") + msgStat, statErr := os.Stat(modern) + if statErr == nil && msgStat.IsDir() { + candidates = append(candidates, modern) + } else if hasEmlxFilesInPartitions(dataDir) { + // Partition-only: Data/Messages/ absent but partitions exist. candidates = append(candidates, modern) } } @@ -195,11 +223,18 @@ func findMessagesDir(mailboxPath string) string { return "" } - // Prefer the first candidate that has .emlx files. + // Prefer the first candidate that has .emlx files directly or + // within numeric partition subdirectories (V10 only). for _, dir := range candidates { if hasEmlxFiles(dir) { return dir } + // For V10 layout the parent is Data/; check partitions there. + dataDir := filepath.Dir(dir) + if filepath.Base(dataDir) == "Data" && + hasEmlxFilesInPartitions(dataDir) { + return dir + } } // No candidate has files; return first for isMailboxDir. @@ -214,13 +249,33 @@ func hasEmlxFiles(dir string) bool { return false } for _, e := range entries { - if e.IsDir() { + if !e.IsDir() && isEmlxFile(e.Name()) { + return true + } + } + return false +} + +// hasEmlxFilesInPartitions returns true if dir contains .emlx files +// within Messages/ subdirectories or nested numeric partition dirs (0-9). +func hasEmlxFilesInPartitions(dir string) bool { + entries, err := os.ReadDir(dir) + if err != nil { + return false + } + for _, e := range entries { + if !e.IsDir() { continue } - lower := strings.ToLower(e.Name()) - if strings.HasSuffix(lower, ".emlx") && - !strings.HasSuffix(lower, ".partial.emlx") { - return true + name := e.Name() + if name == "Messages" { + if hasEmlxFiles(filepath.Join(dir, name)) { + return true + } + } else if isDigitDir(name) { + if hasEmlxFilesInPartitions(filepath.Join(dir, name)) { + return true + } } } return false @@ -249,6 +304,16 @@ func isUUID(s string) bool { return true } +func isDigitDir(name string) bool { + return len(name) == 1 && name[0] >= '0' && name[0] <= '9' +} + +func isEmlxFile(name string) bool { + lower := strings.ToLower(name) + return strings.HasSuffix(lower, ".emlx") && + !strings.HasSuffix(lower, ".partial.emlx") +} + func stripMailboxSuffix(name string) string { lower := strings.ToLower(name) if strings.HasSuffix(lower, ".imapmbox") { @@ -260,43 +325,91 @@ func stripMailboxSuffix(name string) string { return name } -// listEmlxFiles returns the Messages directory path and sorted .emlx -// filenames within it, excluding .partial.emlx. Returns ("", nil, nil) -// if no Messages directory is found. +// listEmlxFiles returns the Messages directory path, sorted .emlx +// filenames (from both the primary Messages/ dir and numeric partition +// subdirectories), and a FileIndex mapping partition filenames to their +// containing subdirectory. Returns ("", nil, nil, nil) if no Messages +// directory is found. func listEmlxFiles( mailboxPath string, -) (string, []string, error) { +) (string, []string, map[string]string, error) { msgDir := findMessagesDir(mailboxPath) if msgDir == "" { - return "", nil, nil + return "", nil, nil, nil } entries, err := os.ReadDir(msgDir) if err != nil { - if os.IsNotExist(err) { - return "", nil, nil + if !os.IsNotExist(err) { + return "", nil, nil, fmt.Errorf("read Messages dir: %w", err) } - return "", nil, fmt.Errorf("read Messages dir: %w", err) + // Primary Messages/ dir absent (partition-only layout); continue + // so that partition files are still collected below. + entries = nil } var files []string for _, e := range entries { - if e.IsDir() { - continue + if !e.IsDir() && isEmlxFile(e.Name()) { + files = append(files, e.Name()) } - name := e.Name() - if !strings.HasSuffix(strings.ToLower(name), ".emlx") { - continue + } + + // Walk numeric partition dirs in Data/ (parent of Messages/). + // Only enter digit dirs (0-9) to avoid re-collecting from the + // primary Messages/ dir which was already handled above. + var fileIndex map[string]string + dataDir := filepath.Dir(msgDir) + if filepath.Base(dataDir) == "Data" { + result := make(map[string]string) + topEntries, readErr := os.ReadDir(dataDir) + if readErr == nil { + for _, e := range topEntries { + if e.IsDir() && isDigitDir(e.Name()) { + collectPartitionFiles( + filepath.Join(dataDir, e.Name()), result, + ) + } + } } - // Skip .partial.emlx files (Apple Mail temp files). - if strings.HasSuffix( - strings.ToLower(name), ".partial.emlx", - ) { - continue + if len(result) > 0 { + fileIndex = result + for name := range result { + files = append(files, name) + } } - files = append(files, name) } sort.Strings(files) - return msgDir, files, nil + return msgDir, files, fileIndex, nil +} + +// collectPartitionFiles recursively walks dir for Messages/ subdirs and +// numeric partition dirs (0-9), collecting .emlx files into result +// (filename → absolute Messages/ dir path). +func collectPartitionFiles(dir string, result map[string]string) { + entries, err := os.ReadDir(dir) + if err != nil { + return + } + for _, e := range entries { + if !e.IsDir() { + continue + } + name := e.Name() + if name == "Messages" { + msgDir := filepath.Join(dir, name) + msgs, err := os.ReadDir(msgDir) + if err != nil { + continue + } + for _, m := range msgs { + if !m.IsDir() && isEmlxFile(m.Name()) { + result[m.Name()] = msgDir + } + } + } else if isDigitDir(name) { + collectPartitionFiles(filepath.Join(dir, name), result) + } + } } diff --git a/internal/emlx/discover_test.go b/internal/emlx/discover_test.go index 1e907cf..48ef319 100644 --- a/internal/emlx/discover_test.go +++ b/internal/emlx/discover_test.go @@ -422,6 +422,191 @@ func TestDiscoverMailboxes_MixedLegacyAndV10(t *testing.T) { } } +// mkV10PartitionedMailbox creates a V10 mailbox with .emlx files in +// both the primary Messages/ directory and in numeric partition +// subdirectories at various nesting depths. +// +// Layout created: +// +// base//Data/Messages/1.emlx (top-level) +// base//Data/0/3/Messages/123.emlx (2-level partition) +// base//Data/9/Messages/456.emlx (1-level partition) +func mkV10PartitionedMailbox(t *testing.T, base, guid string) { + t.Helper() + dataDir := filepath.Join(base, guid, "Data") + + writeEmlxFile := func(dir, name string) { + t.Helper() + if err := os.MkdirAll(dir, 0700); err != nil { + t.Fatalf("mkdir %q: %v", dir, err) + } + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("10\nFrom: x\r\n\r\n"), 0600); err != nil { + t.Fatalf("write %q: %v", path, err) + } + } + + writeEmlxFile(filepath.Join(dataDir, "Messages"), "1.emlx") + writeEmlxFile(filepath.Join(dataDir, "0", "3", "Messages"), "123.emlx") + writeEmlxFile(filepath.Join(dataDir, "9", "Messages"), "456.emlx") +} + +func TestDiscoverMailboxes_V10Partitioned(t *testing.T) { + root := t.TempDir() + guid := "9F0F15DD-4CBC-448A-9EBF-C385A47A3A67" + mboxDir := filepath.Join(root, "INBOX.mbox") + mkV10PartitionedMailbox(t, mboxDir, guid) + + mailboxes, err := DiscoverMailboxes(mboxDir) + if err != nil { + t.Fatalf("DiscoverMailboxes: %v", err) + } + if len(mailboxes) != 1 { + t.Fatalf("got %d mailboxes, want 1", len(mailboxes)) + } + + mb := mailboxes[0] + if mb.Label != "INBOX" { + t.Errorf("Label = %q, want %q", mb.Label, "INBOX") + } + + // Should find all 3 files: 1 top-level + 2 from partitions. + if len(mb.Files) != 3 { + t.Fatalf("Files = %v (len %d), want 3 files", mb.Files, len(mb.Files)) + } + + // Verify all expected filenames are present. + fileSet := make(map[string]bool) + for _, f := range mb.Files { + fileSet[f] = true + } + for _, want := range []string{"1.emlx", "123.emlx", "456.emlx"} { + if !fileSet[want] { + t.Errorf("missing file %q in Files: %v", want, mb.Files) + } + } + + // Verify FilePath resolves to an existing file for each entry. + for _, fileName := range mb.Files { + path := mb.FilePath(fileName) + if _, err := os.Stat(path); err != nil { + t.Errorf("FilePath(%q) = %q: stat failed: %v", fileName, path, err) + } + } + + // Top-level file should NOT be in FileIndex. + if mb.FileIndex != nil { + if _, inIndex := mb.FileIndex["1.emlx"]; inIndex { + t.Errorf("top-level 1.emlx should not be in FileIndex") + } + } + + // Partition files should be in FileIndex. + if mb.FileIndex == nil { + t.Fatal("FileIndex is nil but partition files were found") + } + for _, pf := range []string{"123.emlx", "456.emlx"} { + if _, ok := mb.FileIndex[pf]; !ok { + t.Errorf("partition file %q missing from FileIndex", pf) + } + } +} + +func TestDiscoverMailboxes_V10PartitionedOnly(t *testing.T) { + root := t.TempDir() + guid := "9F0F15DD-4CBC-448A-9EBF-C385A47A3A67" + mboxDir := filepath.Join(root, "INBOX.mbox") + + // Create the primary Messages/ dir but leave it empty. + // (Tests the case where Messages/ exists but is empty.) + primaryMsg := filepath.Join(mboxDir, guid, "Data", "Messages") + if err := os.MkdirAll(primaryMsg, 0700); err != nil { + t.Fatalf("mkdir %q: %v", primaryMsg, err) + } + + // Place files only in partition dirs. + partDir := filepath.Join(mboxDir, guid, "Data", "3", "Messages") + if err := os.MkdirAll(partDir, 0700); err != nil { + t.Fatalf("mkdir %q: %v", partDir, err) + } + for _, name := range []string{"100.emlx", "200.emlx"} { + path := filepath.Join(partDir, name) + if err := os.WriteFile(path, []byte("10\nFrom: x\r\n\r\n"), 0600); err != nil { + t.Fatalf("write %q: %v", path, err) + } + } + + mailboxes, err := DiscoverMailboxes(mboxDir) + if err != nil { + t.Fatalf("DiscoverMailboxes: %v", err) + } + if len(mailboxes) != 1 { + t.Fatalf("got %d mailboxes, want 1 (partitioned-only mailbox should be detected)", len(mailboxes)) + } + + mb := mailboxes[0] + if len(mb.Files) != 2 { + t.Fatalf("Files = %v (len %d), want 2", mb.Files, len(mb.Files)) + } + + for _, fileName := range mb.Files { + path := mb.FilePath(fileName) + if _, err := os.Stat(path); err != nil { + t.Errorf("FilePath(%q) = %q: stat failed: %v", fileName, path, err) + } + } +} + +// TestDiscoverMailboxes_V10NoTopLevelMessages tests the case where +// Data/Messages/ does not exist at all — only numeric partition dirs. +// This matches real Apple Mail behavior for large mailboxes. +func TestDiscoverMailboxes_V10NoTopLevelMessages(t *testing.T) { + root := t.TempDir() + guid := "9F0F15DD-4CBC-448A-9EBF-C385A47A3A67" + mboxDir := filepath.Join(root, "Sent Messages.mbox") + + // Do NOT create Data/Messages/ — only create partition dirs. + for _, partPath := range []string{ + filepath.Join(mboxDir, guid, "Data", "9", "9", "Messages"), + filepath.Join(mboxDir, guid, "Data", "0", "0", "1", "Messages"), + } { + if err := os.MkdirAll(partPath, 0700); err != nil { + t.Fatalf("mkdir %q: %v", partPath, err) + } + } + files := map[string]string{ + "500.emlx": filepath.Join(mboxDir, guid, "Data", "9", "9", "Messages"), + "600.emlx": filepath.Join(mboxDir, guid, "Data", "0", "0", "1", "Messages"), + "700.emlx": filepath.Join(mboxDir, guid, "Data", "0", "0", "1", "Messages"), + } + for name, dir := range files { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("10\nFrom: x\r\n\r\n"), 0600); err != nil { + t.Fatalf("write %q: %v", path, err) + } + } + + mailboxes, err := DiscoverMailboxes(mboxDir) + if err != nil { + t.Fatalf("DiscoverMailboxes: %v", err) + } + if len(mailboxes) != 1 { + t.Fatalf("got %d mailboxes, want 1 (no Data/Messages/ dir)", len(mailboxes)) + } + + mb := mailboxes[0] + if len(mb.Files) != 3 { + t.Fatalf("Files = %v, want 3", mb.Files) + } + + for _, fileName := range mb.Files { + path := mb.FilePath(fileName) + if _, err := os.Stat(path); err != nil { + t.Errorf("FilePath(%q) = %q: stat failed: %v", fileName, path, err) + } + } +} + func TestIsUUID(t *testing.T) { tests := []struct { input string diff --git a/internal/importer/emlx_import.go b/internal/importer/emlx_import.go index 5542b40..3709894 100644 --- a/internal/importer/emlx_import.go +++ b/internal/importer/emlx_import.go @@ -416,7 +416,7 @@ func ImportEmlxDir( } } - filePath := filepath.Join(mb.MsgDir, fileName) + filePath := mb.FilePath(fileName) // Check file size before reading to avoid OOM on oversized files. fi, statErr := os.Stat(filePath)