Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions internal/applescript/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"context"
"embed"
"fmt"
"os"
"path/filepath"
"strings"

"go.uber.org/zap"

Expand All @@ -23,6 +26,11 @@ type NoteExtractor interface {
// If accounts is nil, all accounts are included.
// If folders is nil, all folders are included.
GetAllNotes(ctx context.Context, accounts []string, folders []string) ([]model.Note, error)

// ResolveAttachments locates attachment files in the Apple Notes media
// directory and populates the Data field for each attachment. Attachments
// larger than maxSizeMB are skipped.
ResolveAttachments(ctx context.Context, notes []model.Note, maxSizeMB int) error
}

// AppleScriptExtractor extracts notes from Apple Notes by executing
Expand Down Expand Up @@ -133,3 +141,128 @@ func toSet(items []string) map[string]bool {
}
return set
}

// notesMediaDir is the directory where Apple Notes stores attachment files.
const notesMediaDir = "Library/Group Containers/group.com.apple.notes"

// ResolveAttachments walks the Apple Notes media directory and populates
// attachment Data fields by matching filenames. Attachments larger than
// maxSizeMB are skipped.
func (e *AppleScriptExtractor) ResolveAttachments(ctx context.Context, notes []model.Note, maxSizeMB int) error {
homeDir, err := os.UserHomeDir()
if err != nil {
return fmt.Errorf("getting home directory: %w", err)
}

mediaRoot := filepath.Join(homeDir, notesMediaDir)
if _, err := os.Stat(mediaRoot); os.IsNotExist(err) {
e.logger.Warn("Apple Notes media directory not found", zap.String("path", mediaRoot))
return nil
}

return e.resolveAttachmentsFromDir(ctx, mediaRoot, notes, maxSizeMB)
}

// resolveAttachmentsFromDir is the core implementation of ResolveAttachments,
// separated to allow testing with a custom directory.
func (e *AppleScriptExtractor) resolveAttachmentsFromDir(ctx context.Context, mediaRoot string, notes []model.Note, maxSizeMB int) error {
fileIndex, err := buildFileIndex(ctx, mediaRoot)
if err != nil {
return fmt.Errorf("indexing Apple Notes media: %w", err)
}

e.logger.Debug("built attachment file index", zap.Int("files", len(fileIndex)))

maxBytes := int64(maxSizeMB) * 1024 * 1024
resolved := 0

for i := range notes {
for j := range notes[i].Attachments {
att := &notes[i].Attachments[j]
if att.Name == "" {
continue
}

paths, ok := fileIndex[att.Name]
if !ok || len(paths) == 0 {
e.logger.Debug("attachment file not found in media directory",
zap.String("name", att.Name),
zap.String("note", notes[i].Name),
)
continue
}

// Use the first match. If multiple exist, prefer the one matching
// the content identifier if possible.
filePath := paths[0]
if att.ContentID != "" && len(paths) > 1 {
for _, p := range paths {
if strings.Contains(p, att.ContentID) {
filePath = p
break
}
}
}

info, err := os.Stat(filePath)
if err != nil {
e.logger.Debug("cannot stat attachment file", zap.String("path", filePath), zap.Error(err))
continue
}

if info.Size() > maxBytes {
e.logger.Debug("skipping oversized attachment",
zap.String("name", att.Name),
zap.Int64("size_bytes", info.Size()),
zap.Int("max_mb", maxSizeMB),
)
continue
}

data, err := os.ReadFile(filePath)
if err != nil {
e.logger.Warn("failed to read attachment file",
zap.String("path", filePath),
zap.Error(err),
)
continue
}

att.Data = data
resolved++
}
}

e.logger.Info("resolved attachments", zap.Int("count", resolved))
return nil
}

// buildFileIndex walks a directory tree and returns a map from filename to
// all absolute paths where that filename exists.
func buildFileIndex(ctx context.Context, root string) (map[string][]string, error) {
index := make(map[string][]string)

err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil // Skip inaccessible files.
}

select {
case <-ctx.Done():
return fmt.Errorf("context cancelled: %w", ctx.Err())
default:
}

if info.IsDir() {
return nil
}

index[info.Name()] = append(index[info.Name()], path)
return nil
})
if err != nil {
return nil, fmt.Errorf("walking %q: %w", root, err)
}

return index, nil
}
122 changes: 116 additions & 6 deletions internal/applescript/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package applescript

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -76,8 +78,8 @@ func TestAppleScriptExtractor_GetAllNotes(t *testing.T) {
mockExec := new(MockCommandExecutor)
extractor := NewAppleScriptExtractor(mockExec, newTestLogger())

output := "id1|||FIELD|||Note 1|||FIELD|||<p>body1</p>|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" +
"id2|||FIELD|||Note 2|||FIELD|||<p>body2</p>|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||NOTE|||"
output := "id1|||FIELD|||Note 1|||FIELD|||<p>body1</p>|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" +
"id2|||FIELD|||Note 2|||FIELD|||<p>body2</p>|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||"

mockExec.On("Execute", mock.Anything, "osascript", mock.Anything).
Return(&shell.CommandResult{Stdout: output}, nil)
Expand All @@ -96,8 +98,8 @@ func TestAppleScriptExtractor_GetAllNotes_FilterByAccount(t *testing.T) {
mockExec := new(MockCommandExecutor)
extractor := NewAppleScriptExtractor(mockExec, newTestLogger())

output := "id1|||FIELD|||Note 1|||FIELD|||<p>body1</p>|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" +
"id2|||FIELD|||Note 2|||FIELD|||<p>body2</p>|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||NOTE|||"
output := "id1|||FIELD|||Note 1|||FIELD|||<p>body1</p>|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" +
"id2|||FIELD|||Note 2|||FIELD|||<p>body2</p>|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||"

mockExec.On("Execute", mock.Anything, "osascript", mock.Anything).
Return(&shell.CommandResult{Stdout: output}, nil)
Expand All @@ -114,8 +116,8 @@ func TestAppleScriptExtractor_GetAllNotes_FilterByFolder(t *testing.T) {
mockExec := new(MockCommandExecutor)
extractor := NewAppleScriptExtractor(mockExec, newTestLogger())

output := "id1|||FIELD|||Note 1|||FIELD|||<p>body1</p>|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" +
"id2|||FIELD|||Note 2|||FIELD|||<p>body2</p>|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||iCloud|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||NOTE|||"
output := "id1|||FIELD|||Note 1|||FIELD|||<p>body1</p>|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" +
"id2|||FIELD|||Note 2|||FIELD|||<p>body2</p>|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||iCloud|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||"

mockExec.On("Execute", mock.Anything, "osascript", mock.Anything).
Return(&shell.CommandResult{Stdout: output}, nil)
Expand Down Expand Up @@ -195,3 +197,111 @@ func TestToSet(t *testing.T) {
assert.True(t, s["b"])
assert.False(t, s["c"])
}

func TestBuildFileIndex(t *testing.T) {
dir := t.TempDir()

// Create some files in subdirectories.
subDir := filepath.Join(dir, "sub1", "sub2")
require.NoError(t, os.MkdirAll(subDir, 0755))
require.NoError(t, os.WriteFile(filepath.Join(dir, "photo.jpg"), []byte("img1"), 0644))
require.NoError(t, os.WriteFile(filepath.Join(subDir, "photo.jpg"), []byte("img2"), 0644))
require.NoError(t, os.WriteFile(filepath.Join(subDir, "doc.pdf"), []byte("pdf"), 0644))

index, err := buildFileIndex(context.Background(), dir)
require.NoError(t, err)

assert.Len(t, index["photo.jpg"], 2)
assert.Len(t, index["doc.pdf"], 1)
assert.Empty(t, index["missing.txt"])
}

func TestBuildFileIndex_EmptyDir(t *testing.T) {
dir := t.TempDir()

index, err := buildFileIndex(context.Background(), dir)
require.NoError(t, err)
assert.Empty(t, index)
}

func TestResolveAttachmentsFromDir(t *testing.T) {
// Create a fake Notes media directory.
dir := t.TempDir()
subDir := filepath.Join(dir, "uuid-123")
require.NoError(t, os.MkdirAll(subDir, 0755))
require.NoError(t, os.WriteFile(filepath.Join(subDir, "photo.jpg"), []byte("fake-image"), 0644))
require.NoError(t, os.WriteFile(filepath.Join(subDir, "huge.bin"), make([]byte, 2*1024*1024), 0644))

mockExec := new(MockCommandExecutor)
extractor := NewAppleScriptExtractor(mockExec, newTestLogger())

notes := []model.Note{
{
Name: "Test Note",
Attachments: []model.Attachment{
{Name: "photo.jpg", ContentID: "cid-1"},
{Name: "huge.bin", ContentID: "cid-2"},
{Name: "missing.png", ContentID: "cid-3"},
},
},
}

// Use 1 MB max to test size filtering.
err := extractor.resolveAttachmentsFromDir(context.Background(), dir, notes, 1)
require.NoError(t, err)

// photo.jpg should be resolved (< 1 MB).
assert.Equal(t, []byte("fake-image"), notes[0].Attachments[0].Data)
// huge.bin should be skipped (> 1 MB).
assert.Nil(t, notes[0].Attachments[1].Data)
// missing.png should be nil.
assert.Nil(t, notes[0].Attachments[2].Data)
}

func TestResolveAttachmentsFromDir_ContentIDMatch(t *testing.T) {
// Create two files with the same name in different dirs.
dir := t.TempDir()
dir1 := filepath.Join(dir, "uuid-AAA")
dir2 := filepath.Join(dir, "uuid-BBB")
require.NoError(t, os.MkdirAll(dir1, 0755))
require.NoError(t, os.MkdirAll(dir2, 0755))
require.NoError(t, os.WriteFile(filepath.Join(dir1, "image.png"), []byte("wrong"), 0644))
require.NoError(t, os.WriteFile(filepath.Join(dir2, "image.png"), []byte("correct"), 0644))

mockExec := new(MockCommandExecutor)
extractor := NewAppleScriptExtractor(mockExec, newTestLogger())

notes := []model.Note{
{
Name: "Note",
Attachments: []model.Attachment{
{Name: "image.png", ContentID: "BBB"},
},
},
}

err := extractor.resolveAttachmentsFromDir(context.Background(), dir, notes, 50)
require.NoError(t, err)

// Should match the one containing "BBB" in its path.
assert.Equal(t, []byte("correct"), notes[0].Attachments[0].Data)
}

func TestResolveAttachmentsFromDir_EmptyName(t *testing.T) {
dir := t.TempDir()
mockExec := new(MockCommandExecutor)
extractor := NewAppleScriptExtractor(mockExec, newTestLogger())

notes := []model.Note{
{
Name: "Note",
Attachments: []model.Attachment{
{Name: "", ContentID: "cid"},
},
},
}

err := extractor.resolveAttachmentsFromDir(context.Background(), dir, notes, 50)
require.NoError(t, err)
assert.Nil(t, notes[0].Attachments[0].Data)
}
Loading
Loading