From dea61f87d177dbb3e8bc1c2d14a7de80b9cf6df3 Mon Sep 17 00:00:00 2001 From: Agni Date: Mon, 23 Mar 2026 16:41:55 +0530 Subject: [PATCH 1/2] feat: implement attachment extraction and sync pipeline - Update AppleScript to extract attachment name and content identifier - Add attachment parsing in the delimiter protocol (10th field) - Add ResolveAttachments to NoteExtractor interface, which walks the Apple Notes media directory to locate and read attachment files - Wire attachment saving into the syncer pipeline (between write and clean orphan steps) - Respect attachments.enabled and attachments.max_size_mb config - Add ContentID field to Attachment model - Add tests for attachment parsing, file index, and resolution Co-Authored-By: Claude Opus 4.6 --- internal/applescript/extractor.go | 133 ++++++++++++++++++ internal/applescript/extractor_test.go | 122 +++++++++++++++- internal/applescript/parser.go | 80 +++++++++-- internal/applescript/parser_test.go | 74 +++++++++- .../scripts/get_all_notes.applescript | 24 +++- internal/model/model.go | 3 + internal/syncer/syncer.go | 32 +++++ internal/syncer/syncer_test.go | 79 +++++++++++ 8 files changed, 522 insertions(+), 25 deletions(-) diff --git a/internal/applescript/extractor.go b/internal/applescript/extractor.go index 594e827..fb0172d 100644 --- a/internal/applescript/extractor.go +++ b/internal/applescript/extractor.go @@ -4,6 +4,9 @@ import ( "context" "embed" "fmt" + "os" + "path/filepath" + "strings" "go.uber.org/zap" @@ -23,6 +26,11 @@ type NoteExtractor interface { // If accounts is nil, all accounts are included. // If folders is nil, all folders are included. GetAllNotes(ctx context.Context, accounts []string, folders []string) ([]model.Note, error) + + // ResolveAttachments locates attachment files in the Apple Notes media + // directory and populates the Data field for each attachment. Attachments + // larger than maxSizeMB are skipped. + ResolveAttachments(ctx context.Context, notes []model.Note, maxSizeMB int) error } // AppleScriptExtractor extracts notes from Apple Notes by executing @@ -133,3 +141,128 @@ func toSet(items []string) map[string]bool { } return set } + +// notesMediaDir is the directory where Apple Notes stores attachment files. +const notesMediaDir = "Library/Group Containers/group.com.apple.notes" + +// ResolveAttachments walks the Apple Notes media directory and populates +// attachment Data fields by matching filenames. Attachments larger than +// maxSizeMB are skipped. +func (e *AppleScriptExtractor) ResolveAttachments(ctx context.Context, notes []model.Note, maxSizeMB int) error { + homeDir, err := os.UserHomeDir() + if err != nil { + return fmt.Errorf("getting home directory: %w", err) + } + + mediaRoot := filepath.Join(homeDir, notesMediaDir) + if _, err := os.Stat(mediaRoot); os.IsNotExist(err) { + e.logger.Warn("Apple Notes media directory not found", zap.String("path", mediaRoot)) + return nil + } + + return e.resolveAttachmentsFromDir(ctx, mediaRoot, notes, maxSizeMB) +} + +// resolveAttachmentsFromDir is the core implementation of ResolveAttachments, +// separated to allow testing with a custom directory. +func (e *AppleScriptExtractor) resolveAttachmentsFromDir(ctx context.Context, mediaRoot string, notes []model.Note, maxSizeMB int) error { + fileIndex, err := buildFileIndex(ctx, mediaRoot) + if err != nil { + return fmt.Errorf("indexing Apple Notes media: %w", err) + } + + e.logger.Debug("built attachment file index", zap.Int("files", len(fileIndex))) + + maxBytes := int64(maxSizeMB) * 1024 * 1024 + resolved := 0 + + for i := range notes { + for j := range notes[i].Attachments { + att := ¬es[i].Attachments[j] + if att.Name == "" { + continue + } + + paths, ok := fileIndex[att.Name] + if !ok || len(paths) == 0 { + e.logger.Debug("attachment file not found in media directory", + zap.String("name", att.Name), + zap.String("note", notes[i].Name), + ) + continue + } + + // Use the first match. If multiple exist, prefer the one matching + // the content identifier if possible. + filePath := paths[0] + if att.ContentID != "" && len(paths) > 1 { + for _, p := range paths { + if strings.Contains(p, att.ContentID) { + filePath = p + break + } + } + } + + info, err := os.Stat(filePath) + if err != nil { + e.logger.Debug("cannot stat attachment file", zap.String("path", filePath), zap.Error(err)) + continue + } + + if info.Size() > maxBytes { + e.logger.Debug("skipping oversized attachment", + zap.String("name", att.Name), + zap.Int64("size_bytes", info.Size()), + zap.Int("max_mb", maxSizeMB), + ) + continue + } + + data, err := os.ReadFile(filePath) + if err != nil { + e.logger.Warn("failed to read attachment file", + zap.String("path", filePath), + zap.Error(err), + ) + continue + } + + att.Data = data + resolved++ + } + } + + e.logger.Info("resolved attachments", zap.Int("count", resolved)) + return nil +} + +// buildFileIndex walks a directory tree and returns a map from filename to +// all absolute paths where that filename exists. +func buildFileIndex(ctx context.Context, root string) (map[string][]string, error) { + index := make(map[string][]string) + + err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil // Skip inaccessible files. + } + + select { + case <-ctx.Done(): + return fmt.Errorf("context cancelled: %w", ctx.Err()) + default: + } + + if info.IsDir() { + return nil + } + + index[info.Name()] = append(index[info.Name()], path) + return nil + }) + if err != nil { + return nil, fmt.Errorf("walking %q: %w", root, err) + } + + return index, nil +} diff --git a/internal/applescript/extractor_test.go b/internal/applescript/extractor_test.go index 7b0f821..1eb48e2 100644 --- a/internal/applescript/extractor_test.go +++ b/internal/applescript/extractor_test.go @@ -2,6 +2,8 @@ package applescript import ( "context" + "os" + "path/filepath" "testing" "github.com/stretchr/testify/assert" @@ -76,8 +78,8 @@ func TestAppleScriptExtractor_GetAllNotes(t *testing.T) { mockExec := new(MockCommandExecutor) extractor := NewAppleScriptExtractor(mockExec, newTestLogger()) - output := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" + - "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||NOTE|||" + output := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" + + "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" mockExec.On("Execute", mock.Anything, "osascript", mock.Anything). Return(&shell.CommandResult{Stdout: output}, nil) @@ -96,8 +98,8 @@ func TestAppleScriptExtractor_GetAllNotes_FilterByAccount(t *testing.T) { mockExec := new(MockCommandExecutor) extractor := NewAppleScriptExtractor(mockExec, newTestLogger()) - output := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" + - "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||NOTE|||" + output := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" + + "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" mockExec.On("Execute", mock.Anything, "osascript", mock.Anything). Return(&shell.CommandResult{Stdout: output}, nil) @@ -114,8 +116,8 @@ func TestAppleScriptExtractor_GetAllNotes_FilterByFolder(t *testing.T) { mockExec := new(MockCommandExecutor) extractor := NewAppleScriptExtractor(mockExec, newTestLogger()) - output := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" + - "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||iCloud|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||NOTE|||" + output := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" + + "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||iCloud|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" mockExec.On("Execute", mock.Anything, "osascript", mock.Anything). Return(&shell.CommandResult{Stdout: output}, nil) @@ -195,3 +197,111 @@ func TestToSet(t *testing.T) { assert.True(t, s["b"]) assert.False(t, s["c"]) } + +func TestBuildFileIndex(t *testing.T) { + dir := t.TempDir() + + // Create some files in subdirectories. + subDir := filepath.Join(dir, "sub1", "sub2") + require.NoError(t, os.MkdirAll(subDir, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "photo.jpg"), []byte("img1"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(subDir, "photo.jpg"), []byte("img2"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(subDir, "doc.pdf"), []byte("pdf"), 0644)) + + index, err := buildFileIndex(context.Background(), dir) + require.NoError(t, err) + + assert.Len(t, index["photo.jpg"], 2) + assert.Len(t, index["doc.pdf"], 1) + assert.Empty(t, index["missing.txt"]) +} + +func TestBuildFileIndex_EmptyDir(t *testing.T) { + dir := t.TempDir() + + index, err := buildFileIndex(context.Background(), dir) + require.NoError(t, err) + assert.Empty(t, index) +} + +func TestResolveAttachmentsFromDir(t *testing.T) { + // Create a fake Notes media directory. + dir := t.TempDir() + subDir := filepath.Join(dir, "uuid-123") + require.NoError(t, os.MkdirAll(subDir, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(subDir, "photo.jpg"), []byte("fake-image"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(subDir, "huge.bin"), make([]byte, 2*1024*1024), 0644)) + + mockExec := new(MockCommandExecutor) + extractor := NewAppleScriptExtractor(mockExec, newTestLogger()) + + notes := []model.Note{ + { + Name: "Test Note", + Attachments: []model.Attachment{ + {Name: "photo.jpg", ContentID: "cid-1"}, + {Name: "huge.bin", ContentID: "cid-2"}, + {Name: "missing.png", ContentID: "cid-3"}, + }, + }, + } + + // Use 1 MB max to test size filtering. + err := extractor.resolveAttachmentsFromDir(context.Background(), dir, notes, 1) + require.NoError(t, err) + + // photo.jpg should be resolved (< 1 MB). + assert.Equal(t, []byte("fake-image"), notes[0].Attachments[0].Data) + // huge.bin should be skipped (> 1 MB). + assert.Nil(t, notes[0].Attachments[1].Data) + // missing.png should be nil. + assert.Nil(t, notes[0].Attachments[2].Data) +} + +func TestResolveAttachmentsFromDir_ContentIDMatch(t *testing.T) { + // Create two files with the same name in different dirs. + dir := t.TempDir() + dir1 := filepath.Join(dir, "uuid-AAA") + dir2 := filepath.Join(dir, "uuid-BBB") + require.NoError(t, os.MkdirAll(dir1, 0755)) + require.NoError(t, os.MkdirAll(dir2, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(dir1, "image.png"), []byte("wrong"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(dir2, "image.png"), []byte("correct"), 0644)) + + mockExec := new(MockCommandExecutor) + extractor := NewAppleScriptExtractor(mockExec, newTestLogger()) + + notes := []model.Note{ + { + Name: "Note", + Attachments: []model.Attachment{ + {Name: "image.png", ContentID: "BBB"}, + }, + }, + } + + err := extractor.resolveAttachmentsFromDir(context.Background(), dir, notes, 50) + require.NoError(t, err) + + // Should match the one containing "BBB" in its path. + assert.Equal(t, []byte("correct"), notes[0].Attachments[0].Data) +} + +func TestResolveAttachmentsFromDir_EmptyName(t *testing.T) { + dir := t.TempDir() + mockExec := new(MockCommandExecutor) + extractor := NewAppleScriptExtractor(mockExec, newTestLogger()) + + notes := []model.Note{ + { + Name: "Note", + Attachments: []model.Attachment{ + {Name: "", ContentID: "cid"}, + }, + }, + } + + err := extractor.resolveAttachmentsFromDir(context.Background(), dir, notes, 50) + require.NoError(t, err) + assert.Nil(t, notes[0].Attachments[0].Data) +} diff --git a/internal/applescript/parser.go b/internal/applescript/parser.go index 5e0d02f..c161406 100644 --- a/internal/applescript/parser.go +++ b/internal/applescript/parser.go @@ -17,10 +17,14 @@ const ( noteDelimiter = "|||NOTE|||" // folderDelimiter separates folder records in AppleScript output. folderDelimiter = "|||FOLDER|||" + // attachDelimiter separates attachment records within a note's attachment field. + attachDelimiter = "|||ATTACH|||" + // attachFieldDelimiter separates fields within an attachment record. + attachFieldDelimiter = "|||AFIELD|||" ) // noteFieldCount is the number of fields expected per note record. -const noteFieldCount = 9 +const noteFieldCount = 10 // folderFieldCount is the number of fields expected per folder record. const folderFieldCount = 4 @@ -60,15 +64,16 @@ func ParseNotesOutput(raw string) ([]model.Note, error) { } note := model.Note{ - ID: strings.TrimSpace(fields[0]), - Name: strings.TrimSpace(fields[1]), - BodyHTML: strings.TrimSpace(fields[2]), - FolderPath: strings.TrimSpace(fields[6]), - Account: strings.TrimSpace(fields[5]), - CreatedAt: createdAt, - ModifiedAt: modifiedAt, - Protected: strings.TrimSpace(fields[7]) == "true", - Shared: strings.TrimSpace(fields[8]) == "true", + ID: strings.TrimSpace(fields[0]), + Name: strings.TrimSpace(fields[1]), + BodyHTML: strings.TrimSpace(fields[2]), + FolderPath: strings.TrimSpace(fields[6]), + Account: strings.TrimSpace(fields[5]), + CreatedAt: createdAt, + ModifiedAt: modifiedAt, + Protected: strings.TrimSpace(fields[7]) == "true", + Shared: strings.TrimSpace(fields[8]) == "true", + Attachments: parseAttachments(strings.TrimSpace(fields[9])), } notes = append(notes, note) @@ -177,6 +182,61 @@ func normalizeWhitespace(s string) string { return b.String() } +// parseAttachments parses the attachment field from AppleScript output into +// Attachment structs. Each attachment has name and content identifier separated +// by attachFieldDelimiter, and multiple attachments are separated by attachDelimiter. +func parseAttachments(raw string) []model.Attachment { + if raw == "" { + return nil + } + + records := strings.Split(raw, attachDelimiter) + var attachments []model.Attachment + + for _, record := range records { + record = strings.TrimSpace(record) + if record == "" { + continue + } + + fields := strings.Split(record, attachFieldDelimiter) + if len(fields) < 1 { + continue + } + + att := model.Attachment{ + Name: strings.TrimSpace(fields[0]), + } + if len(fields) >= 2 { + att.ContentID = strings.TrimSpace(fields[1]) + } + + // Infer type from file extension. + att.Type = inferAttachmentType(att.Name) + + attachments = append(attachments, att) + } + + return attachments +} + +// inferAttachmentType guesses the attachment type from the file extension. +func inferAttachmentType(name string) model.AttachmentType { + lower := strings.ToLower(name) + switch { + case strings.HasSuffix(lower, ".jpg"), strings.HasSuffix(lower, ".jpeg"), + strings.HasSuffix(lower, ".png"), strings.HasSuffix(lower, ".gif"), + strings.HasSuffix(lower, ".heic"), strings.HasSuffix(lower, ".webp"), + strings.HasSuffix(lower, ".tiff"), strings.HasSuffix(lower, ".bmp"): + return model.AttachmentImage + case strings.HasSuffix(lower, ".mp4"), strings.HasSuffix(lower, ".mov"), + strings.HasSuffix(lower, ".m4v"), strings.HasSuffix(lower, ".avi"): + return model.AttachmentVideo + default: + return model.AttachmentFile + } +} + // truncate shortens a string to maxLen characters for display in error messages. func truncate(s string, maxLen int) string { if len(s) <= maxLen { diff --git a/internal/applescript/parser_test.go b/internal/applescript/parser_test.go index 6213f51..08ad151 100644 --- a/internal/applescript/parser_test.go +++ b/internal/applescript/parser_test.go @@ -5,10 +5,12 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/PyAgni/apple-notes-syncer/internal/model" ) func TestParseNotesOutput_SingleNote(t *testing.T) { - raw := "x-coredata://123|||FIELD|||My Note|||FIELD|||

My Note

|||FIELD|||Monday, March 18, 2026 at 4:39:41 PM|||FIELD|||Monday, March 18, 2026 at 4:40:05 PM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" + raw := "x-coredata://123|||FIELD|||My Note|||FIELD|||

My Note

|||FIELD|||Monday, March 18, 2026 at 4:39:41 PM|||FIELD|||Monday, March 18, 2026 at 4:40:05 PM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" notes, err := ParseNotesOutput(raw) require.NoError(t, err) @@ -23,11 +25,12 @@ func TestParseNotesOutput_SingleNote(t *testing.T) { assert.False(t, notes[0].Shared) assert.Equal(t, 2026, notes[0].CreatedAt.Year()) assert.Equal(t, 2026, notes[0].ModifiedAt.Year()) + assert.Empty(t, notes[0].Attachments) } func TestParseNotesOutput_MultipleNotes(t *testing.T) { - raw := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||NOTE|||" + - "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Personal|||FIELD|||true|||FIELD|||true|||NOTE|||" + raw := "id1|||FIELD|||Note 1|||FIELD|||

body1

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Work|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" + + "id2|||FIELD|||Note 2|||FIELD|||

body2

|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Tuesday, January 2, 2026 at 11:00:00 AM|||FIELD|||Gmail|||FIELD|||Personal|||FIELD|||true|||FIELD|||true|||FIELD||||||NOTE|||" notes, err := ParseNotesOutput(raw) require.NoError(t, err) @@ -59,18 +62,18 @@ func TestParseNotesOutput_InvalidFieldCount(t *testing.T) { raw := "id|||FIELD|||name|||FIELD|||body|||NOTE|||" _, err := ParseNotesOutput(raw) require.Error(t, err) - assert.Contains(t, err.Error(), "expected 9 fields") + assert.Contains(t, err.Error(), "expected 10 fields") } func TestParseNotesOutput_InvalidDate(t *testing.T) { - raw := "id1|||FIELD|||Note|||FIELD|||

body

|||FIELD|||not-a-date|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||NOTE|||" + raw := "id1|||FIELD|||Note|||FIELD|||

body

|||FIELD|||not-a-date|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" _, err := ParseNotesOutput(raw) require.Error(t, err) assert.Contains(t, err.Error(), "parsing creation date") } func TestParseNotesOutput_ProtectedNote(t *testing.T) { - raw := "id1|||FIELD|||Secret|||FIELD||||||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Private|||FIELD|||true|||FIELD|||false|||NOTE|||" + raw := "id1|||FIELD|||Secret|||FIELD||||||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Private|||FIELD|||true|||FIELD|||false|||FIELD||||||NOTE|||" notes, err := ParseNotesOutput(raw) require.NoError(t, err) @@ -82,7 +85,7 @@ func TestParseNotesOutput_ProtectedNote(t *testing.T) { } func TestParseNotesOutput_NestedFolderPath(t *testing.T) { - raw := "id1|||FIELD|||Note|||FIELD|||

body

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Work/Projects/Go|||FIELD|||false|||FIELD|||false|||NOTE|||" + raw := "id1|||FIELD|||Note|||FIELD|||

body

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Work/Projects/Go|||FIELD|||false|||FIELD|||false|||FIELD||||||NOTE|||" notes, err := ParseNotesOutput(raw) require.NoError(t, err) @@ -203,6 +206,63 @@ func TestParseAppleScriptDate_InvalidFormat(t *testing.T) { assert.Contains(t, err.Error(), "no matching format found") } +func TestParseNotesOutput_WithAttachments(t *testing.T) { + raw := "id1|||FIELD|||Note|||FIELD|||

body

|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||Monday, January 1, 2026 at 10:00:00 AM|||FIELD|||iCloud|||FIELD|||Notes|||FIELD|||false|||FIELD|||false|||FIELD|||photo.jpg|||AFIELD|||ABC-123|||ATTACH|||doc.pdf|||AFIELD|||DEF-456|||NOTE|||" + + notes, err := ParseNotesOutput(raw) + require.NoError(t, err) + require.Len(t, notes, 1) + require.Len(t, notes[0].Attachments, 2) + + assert.Equal(t, "photo.jpg", notes[0].Attachments[0].Name) + assert.Equal(t, "ABC-123", notes[0].Attachments[0].ContentID) + assert.Equal(t, model.AttachmentImage, notes[0].Attachments[0].Type) + + assert.Equal(t, "doc.pdf", notes[0].Attachments[1].Name) + assert.Equal(t, "DEF-456", notes[0].Attachments[1].ContentID) + assert.Equal(t, model.AttachmentFile, notes[0].Attachments[1].Type) +} + +func TestParseAttachments_Empty(t *testing.T) { + result := parseAttachments("") + assert.Nil(t, result) +} + +func TestParseAttachments_Single(t *testing.T) { + result := parseAttachments("image.png|||AFIELD|||CID-123") + require.Len(t, result, 1) + assert.Equal(t, "image.png", result[0].Name) + assert.Equal(t, "CID-123", result[0].ContentID) + assert.Equal(t, model.AttachmentImage, result[0].Type) +} + +func TestParseAttachments_Multiple(t *testing.T) { + result := parseAttachments("a.jpg|||AFIELD|||cid1|||ATTACH|||b.mov|||AFIELD|||cid2") + require.Len(t, result, 2) + assert.Equal(t, model.AttachmentImage, result[0].Type) + assert.Equal(t, model.AttachmentVideo, result[1].Type) +} + +func TestInferAttachmentType(t *testing.T) { + tests := []struct { + name string + expected model.AttachmentType + }{ + {"photo.jpg", model.AttachmentImage}, + {"PHOTO.JPEG", model.AttachmentImage}, + {"image.png", model.AttachmentImage}, + {"pic.heic", model.AttachmentImage}, + {"video.mp4", model.AttachmentVideo}, + {"clip.mov", model.AttachmentVideo}, + {"document.pdf", model.AttachmentFile}, + {"notes.txt", model.AttachmentFile}, + } + + for _, tt := range tests { + assert.Equal(t, tt.expected, inferAttachmentType(tt.name), tt.name) + } +} + func TestTruncate(t *testing.T) { assert.Equal(t, "hello", truncate("hello", 10)) assert.Equal(t, "hel...", truncate("hello world", 3)) diff --git a/internal/applescript/scripts/get_all_notes.applescript b/internal/applescript/scripts/get_all_notes.applescript index e20cf2c..ddc9268 100644 --- a/internal/applescript/scripts/get_all_notes.applescript +++ b/internal/applescript/scripts/get_all_notes.applescript @@ -1,10 +1,13 @@ -- get_all_notes.applescript -- Extracts all notes from Apple Notes. -- Output format: fields separated by |||FIELD|||, records separated by |||NOTE||| --- Fields: id, name, body, creation_date, modification_date, account, folder_path, password_protected, shared +-- Fields: id, name, body, creation_date, modification_date, account, folder_path, password_protected, shared, attachments +-- Attachments field: name|||AFIELD|||content_identifier per attachment, separated by |||ATTACH||| set fieldSep to "|||FIELD|||" set recSep to "|||NOTE|||" +set attachSep to "|||ATTACH|||" +set attachFieldSep to "|||AFIELD|||" set output to "" tell application "Notes" @@ -38,7 +41,24 @@ tell application "Notes" set noteProtected to password protected of n as text set noteShared to shared of n as text - set output to output & noteID & fieldSep & noteName & fieldSep & noteBody & fieldSep & noteCreated & fieldSep & noteModified & fieldSep & acctName & fieldSep & fullPath & fieldSep & noteProtected & fieldSep & noteShared & recSep + -- Extract attachment metadata. + set attachInfo to "" + try + set noteAttachments to attachments of n + repeat with att in noteAttachments + set attName to name of att + set attCID to "" + try + set attCID to content identifier of att + end try + if attachInfo is not "" then + set attachInfo to attachInfo & attachSep + end if + set attachInfo to attachInfo & attName & attachFieldSep & attCID + end repeat + end try + + set output to output & noteID & fieldSep & noteName & fieldSep & noteBody & fieldSep & noteCreated & fieldSep & noteModified & fieldSep & acctName & fieldSep & fullPath & fieldSep & noteProtected & fieldSep & noteShared & fieldSep & attachInfo & recSep end repeat end repeat end repeat diff --git a/internal/model/model.go b/internal/model/model.go index 6bc86ec..0e478c9 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -23,6 +23,9 @@ type Attachment struct { Type AttachmentType // Name is the original filename or link text. Name string + // ContentID is the Apple Notes content identifier used to reference this + // attachment in the note's HTML body (e.g. in cid: URLs). + ContentID string // URL is the hyperlink for link attachments or the file path for local files. URL string // MIMEType is the media type (e.g. "image/png"). diff --git a/internal/syncer/syncer.go b/internal/syncer/syncer.go index 4edf0c2..0dfb48b 100644 --- a/internal/syncer/syncer.go +++ b/internal/syncer/syncer.go @@ -73,6 +73,15 @@ func (s *Syncer) Sync(ctx context.Context) (*model.SyncResult, error) { } result.TotalNotes = len(notes) + // Step 1.5: Resolve attachment file data from the Notes media directory. + if s.cfg.Attachments.Enabled { + s.logger.Info("resolving attachments") + if err := s.extractor.ResolveAttachments(ctx, notes, s.cfg.Attachments.MaxSizeMB); err != nil { + s.logger.Warn("failed to resolve attachments", zap.Error(err)) + result.Errors = append(result.Errors, fmt.Errorf("resolving attachments: %w", err)) + } + } + // Step 2: Apply filters (exclude folders, protected, shared). notes = s.applyFilters(notes) @@ -108,6 +117,29 @@ func (s *Syncer) Sync(ctx context.Context) (*model.SyncResult, error) { result.WrittenNotes = len(writtenPaths) result.SkippedNotes = result.TotalNotes - result.WrittenNotes + // Step 4.5: Save attachments for written notes. + if s.cfg.Attachments.Enabled { + for i, notePath := range writtenPaths { + for j := range notes[i].Attachments { + att := ¬es[i].Attachments[j] + if att.Data == nil { + continue + } + savedPath, err := s.writer.SaveAttachment(ctx, notePath, att) + if err != nil { + s.logger.Warn("failed to save attachment", + zap.String("note", notes[i].Name), + zap.String("attachment", att.Name), + zap.Error(err), + ) + result.Errors = append(result.Errors, fmt.Errorf("saving attachment %q for note %q: %w", att.Name, notes[i].Name, err)) + continue + } + s.logger.Debug("saved attachment", zap.String("path", savedPath)) + } + } + } + // Step 5: Clean orphaned files. if s.cfg.CleanOrphans { removed, err := s.writer.CleanOrphanedFiles(ctx, writtenPaths) diff --git a/internal/syncer/syncer_test.go b/internal/syncer/syncer_test.go index 126da2e..3d230c7 100644 --- a/internal/syncer/syncer_test.go +++ b/internal/syncer/syncer_test.go @@ -28,6 +28,11 @@ func (m *mockExtractor) GetAllNotes(ctx context.Context, accounts []string, fold return args.Get(0).([]model.Note), args.Error(1) } +func (m *mockExtractor) ResolveAttachments(ctx context.Context, notes []model.Note, maxSizeMB int) error { + args := m.Called(ctx, notes, maxSizeMB) + return args.Error(0) +} + type mockConverter struct{ mock.Mock } func (m *mockConverter) Convert(html string) (string, error) { @@ -112,6 +117,11 @@ func defaultConfig() *config.Config { ExcludeFolders: []string{"Recently Deleted"}, SkipProtected: true, }, + Attachments: config.AttachmentConfig{ + Enabled: true, + MaxSizeMB: 50, + Dir: "_attachments", + }, } } @@ -145,6 +155,7 @@ func TestSyncer_Sync_FullPipeline(t *testing.T) { notes := testNotes() ext.On("GetAllNotes", mock.Anything, []string(nil), []string(nil)).Return(notes, nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) conv.On("Convert", "

body1

").Return("body1\n", nil) conv.On("Convert", "

body2

").Return("body2\n", nil) wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"Notes/Note 1.md", "Work/Note 2.md"}, nil) @@ -182,6 +193,7 @@ func TestSyncer_Sync_DryRun(t *testing.T) { notes := testNotes() ext.On("GetAllNotes", mock.Anything, []string(nil), []string(nil)).Return(notes, nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) conv.On("Convert", mock.Anything).Return("markdown\n", nil) result, err := s.Sync(context.Background()) @@ -206,6 +218,7 @@ func TestSyncer_Sync_NoChanges(t *testing.T) { notes := testNotes() ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(notes, nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) conv.On("Convert", mock.Anything).Return("md\n", nil) wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"a.md", "b.md"}, nil) wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) @@ -233,6 +246,7 @@ func TestSyncer_Sync_GitDisabled(t *testing.T) { s := NewSyncer(cfg, ext, conv, wr, git, rc, zap.NewNop()) ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(testNotes(), nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) conv.On("Convert", mock.Anything).Return("md\n", nil) wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"a.md"}, nil) wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) @@ -258,6 +272,7 @@ func TestSyncer_Sync_WithRclone(t *testing.T) { s := NewSyncer(cfg, ext, conv, wr, git, rc, zap.NewNop()) ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(testNotes(), nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) conv.On("Convert", mock.Anything).Return("md\n", nil) wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"a.md"}, nil) wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) @@ -377,6 +392,7 @@ func TestSyncer_Sync_PushDisabled(t *testing.T) { s := NewSyncer(cfg, ext, conv, wr, git, rc, zap.NewNop()) ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(testNotes(), nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) conv.On("Convert", mock.Anything).Return("md\n", nil) wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"a.md"}, nil) wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) @@ -408,6 +424,7 @@ func TestSyncer_Sync_ConvertError_NonFatal(t *testing.T) { } ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(notes, nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) conv.On("Convert", "

ok

").Return("ok\n", nil) conv.On("Convert", "

broken

").Return("", assert.AnError) wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"Notes/Good.md", "Notes/Bad.md"}, nil) @@ -425,3 +442,65 @@ func TestSyncer_Sync_ConvertError_NonFatal(t *testing.T) { assert.Len(t, result.Errors, 1) assert.Contains(t, result.Errors[0].Error(), "converting note") } + +func TestSyncer_Sync_WithAttachments(t *testing.T) { + cfg := defaultConfig() + cfg.Git.Enabled = false + ext := new(mockExtractor) + conv := new(mockConverter) + wr := new(mockWriter) + git := new(mockGit) + rc := new(mockRclone) + + s := NewSyncer(cfg, ext, conv, wr, git, rc, zap.NewNop()) + + notes := []model.Note{ + { + ID: "1", Name: "Note With Image", BodyHTML: "

body

", + FolderPath: "Notes", Account: "iCloud", + CreatedAt: time.Now(), ModifiedAt: time.Now(), + Attachments: []model.Attachment{ + {Name: "photo.jpg", ContentID: "CID-123", Type: model.AttachmentImage, Data: []byte("fake-image-data")}, + }, + }, + } + + ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(notes, nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) + conv.On("Convert", mock.Anything).Return("body\n", nil) + wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"Notes/Note With Image.md"}, nil) + wr.On("SaveAttachment", mock.Anything, "Notes/Note With Image.md", ¬es[0].Attachments[0]).Return("Notes/_attachments/photo.jpg", nil) + wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) + + result, err := s.Sync(context.Background()) + require.NoError(t, err) + assert.Empty(t, result.Errors) + + wr.AssertCalled(t, "SaveAttachment", mock.Anything, "Notes/Note With Image.md", ¬es[0].Attachments[0]) +} + +func TestSyncer_Sync_AttachmentsDisabled(t *testing.T) { + cfg := defaultConfig() + cfg.Attachments.Enabled = false + cfg.Git.Enabled = false + ext := new(mockExtractor) + conv := new(mockConverter) + wr := new(mockWriter) + git := new(mockGit) + rc := new(mockRclone) + + s := NewSyncer(cfg, ext, conv, wr, git, rc, zap.NewNop()) + + ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(testNotes(), nil) + conv.On("Convert", mock.Anything).Return("md\n", nil) + wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"a.md", "b.md"}, nil) + wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) + + result, err := s.Sync(context.Background()) + require.NoError(t, err) + assert.Empty(t, result.Errors) + + // ResolveAttachments and SaveAttachment should NOT be called when disabled. + ext.AssertNotCalled(t, "ResolveAttachments") + wr.AssertNotCalled(t, "SaveAttachment") +} From a80672da1b8cc5d2d1baf06741990bbb83470945 Mon Sep 17 00:00:00 2001 From: Agni Date: Mon, 23 Mar 2026 18:02:08 +0530 Subject: [PATCH 2/2] not usable right now --- internal/filesystem/writer.go | 133 ++++++++++++++++++++++++++++++- internal/syncer/syncer.go | 140 +++++++++++++++++++++++++++------ internal/syncer/syncer_test.go | 93 +++++++++++++++++++++- 3 files changed, 339 insertions(+), 27 deletions(-) diff --git a/internal/filesystem/writer.go b/internal/filesystem/writer.go index aabff75..8df4fe2 100644 --- a/internal/filesystem/writer.go +++ b/internal/filesystem/writer.go @@ -5,6 +5,7 @@ package filesystem import ( "bytes" "context" + "encoding/base64" "fmt" "os" "path/filepath" @@ -34,6 +35,10 @@ type NoteWriter interface { // SaveAttachment writes an attachment to disk alongside its note. // Returns the relative file path of the saved attachment. SaveAttachment(ctx context.Context, notePath string, attachment *model.Attachment) (string, error) + + // NoteRelPath returns the relative path a note would be written to, + // without writing the file. Used to pre-compute paths for attachment saving. + NoteRelPath(note *model.Note) string } // FSNoteWriter is the real filesystem implementation of NoteWriter. @@ -101,6 +106,10 @@ func (w *FSNoteWriter) WriteNote(ctx context.Context, note *model.Note) (string, fileName := note.SanitizedFileName() + ".md" fullPath := filepath.Join(dirPath, fileName) + // Extract inline base64 images from markdown body, save them as files, + // and replace with relative paths. + bodyMarkdown := w.extractInlineImages(dirPath, note.BodyMarkdown) + // Build file content: title heading, body, then metadata table at bottom. var content strings.Builder @@ -110,7 +119,7 @@ func (w *FSNoteWriter) WriteNote(ctx context.Context, note *model.Note) (string, content.WriteString("\n\n") // Note body. - content.WriteString(note.BodyMarkdown) + content.WriteString(bodyMarkdown) // Metadata table at the bottom after a divider. if w.frontMatter { @@ -160,6 +169,17 @@ func (w *FSNoteWriter) WriteAll(ctx context.Context, notes []model.Note) ([]stri return paths, nil } +// NoteRelPath returns the relative path a note would be written to, +// without actually writing the file. Used to compute attachment paths +// before the note is written. +func (w *FSNoteWriter) NoteRelPath(note *model.Note) string { + dirPath := filepath.Join(w.notesDir(), filepath.FromSlash(note.FolderPath)) + fileName := note.SanitizedFileName() + ".md" + fullPath := filepath.Join(dirPath, fileName) + relPath, _ := filepath.Rel(w.basePath, fullPath) + return relPath +} + // CleanOrphanedFiles removes .md files that are not in the currentNotePaths set. func (w *FSNoteWriter) CleanOrphanedFiles(ctx context.Context, currentNotePaths []string) ([]string, error) { currentSet := make(map[string]bool, len(currentNotePaths)) @@ -256,6 +276,117 @@ func (w *FSNoteWriter) SaveAttachment(ctx context.Context, notePath string, atta return relPath, nil } +// dataURIPrefix is the marker we scan for to find inline base64 images. +const dataURIPrefix = "](data:image/" + +// extractInlineImages finds base64-encoded data URI images in the markdown, +// saves each as a file in the _attachments subdirectory, and returns the +// markdown with data URIs replaced by relative file paths. +// +// Uses string scanning instead of regex because base64 payloads can be +// millions of characters, which causes regex backtracking issues. +func (w *FSNoteWriter) extractInlineImages(noteDir string, markdown string) string { + if !strings.Contains(markdown, dataURIPrefix) { + return markdown + } + + var buf strings.Builder + buf.Grow(len(markdown) / 2) // Result will be much smaller. + imageCount := 0 + pos := 0 + + for pos < len(markdown) { + // Find the next "](data:image/" marker. + idx := strings.Index(markdown[pos:], dataURIPrefix) + if idx == -1 { + buf.WriteString(markdown[pos:]) + break + } + + markerStart := pos + idx // Position of "]" in "](data:image/..." + + // Find the "![" that starts this image tag by scanning backwards. + imgStart := strings.LastIndex(markdown[pos:markerStart], "![") + if imgStart == -1 { + // No opening "![" found, write up to past the marker and continue. + buf.WriteString(markdown[pos : markerStart+len(dataURIPrefix)]) + pos = markerStart + len(dataURIPrefix) + continue + } + imgStart += pos // Convert to absolute position. + + // Extract alt text from ![alt]. + altEnd := markerStart + alt := markdown[imgStart+2 : altEnd] + + // Extract image type: "](data:image/TYPE;base64,DATA)" + // Find ";base64," after the marker. + afterMarker := markerStart + 2 // Skip "](" + semicolonIdx := strings.Index(markdown[afterMarker:], ";base64,") + if semicolonIdx == -1 { + buf.WriteString(markdown[pos : markerStart+len(dataURIPrefix)]) + pos = markerStart + len(dataURIPrefix) + continue + } + ext := markdown[afterMarker+len("data:image/") : afterMarker+semicolonIdx] + + // Find the closing ")" — the base64 data runs until the next ")". + b64Start := afterMarker + semicolonIdx + len(";base64,") + closeParen := strings.Index(markdown[b64Start:], ")") + if closeParen == -1 { + buf.WriteString(markdown[pos : markerStart+len(dataURIPrefix)]) + pos = markerStart + len(dataURIPrefix) + continue + } + + b64data := markdown[b64Start : b64Start+closeParen] + fullEnd := b64Start + closeParen + 1 // Past the ")" + + // Write everything before this image tag. + buf.WriteString(markdown[pos:imgStart]) + + // Decode and save. + data, err := base64.StdEncoding.DecodeString(b64data) + if err != nil { + w.logger.Debug("failed to decode base64 image", zap.Error(err)) + buf.WriteString(markdown[imgStart:fullEnd]) + pos = fullEnd + continue + } + + attachDir := filepath.Join(noteDir, w.attachmentDir) + if err := os.MkdirAll(attachDir, 0755); err != nil { + w.logger.Debug("failed to create attachment dir", zap.Error(err)) + buf.WriteString(markdown[imgStart:fullEnd]) + pos = fullEnd + continue + } + + imageCount++ + fileName := fmt.Sprintf("image_%d.%s", imageCount, ext) + filePath := filepath.Join(attachDir, fileName) + + if err := os.WriteFile(filePath, data, 0644); err != nil { + w.logger.Debug("failed to write inline image", zap.String("path", filePath), zap.Error(err)) + buf.WriteString(markdown[imgStart:fullEnd]) + pos = fullEnd + continue + } + + w.logger.Debug("extracted inline image", + zap.String("path", filePath), + zap.Int("bytes", len(data)), + ) + + // Write the replacement markdown. + relPath := filepath.Join(w.attachmentDir, fileName) + fmt.Fprintf(&buf, "![%s](%s)", alt, relPath) + pos = fullEnd + } + + return buf.String() +} + // removeEmptyDirs walks a directory tree bottom-up and removes empty directories. func removeEmptyDirs(root string) error { return filepath.Walk(root, func(path string, info os.FileInfo, err error) error { diff --git a/internal/syncer/syncer.go b/internal/syncer/syncer.go index 0dfb48b..06b93c3 100644 --- a/internal/syncer/syncer.go +++ b/internal/syncer/syncer.go @@ -6,6 +6,9 @@ import ( "bytes" "context" "fmt" + "path/filepath" + "regexp" + "strings" "text/template" "time" @@ -109,6 +112,14 @@ func (s *Syncer) Sync(ctx context.Context) (*model.SyncResult, error) { return result, nil } + // Step 4a: Save attachments as separate files and rewrite markdown + // references before writing the note files. + if s.cfg.Attachments.Enabled { + for i := range notes { + s.saveAndRewriteAttachments(ctx, ¬es[i], result) + } + } + s.logger.Info("writing notes to disk") writtenPaths, err := s.writer.WriteAll(ctx, notes) if err != nil { @@ -117,29 +128,6 @@ func (s *Syncer) Sync(ctx context.Context) (*model.SyncResult, error) { result.WrittenNotes = len(writtenPaths) result.SkippedNotes = result.TotalNotes - result.WrittenNotes - // Step 4.5: Save attachments for written notes. - if s.cfg.Attachments.Enabled { - for i, notePath := range writtenPaths { - for j := range notes[i].Attachments { - att := ¬es[i].Attachments[j] - if att.Data == nil { - continue - } - savedPath, err := s.writer.SaveAttachment(ctx, notePath, att) - if err != nil { - s.logger.Warn("failed to save attachment", - zap.String("note", notes[i].Name), - zap.String("attachment", att.Name), - zap.Error(err), - ) - result.Errors = append(result.Errors, fmt.Errorf("saving attachment %q for note %q: %w", att.Name, notes[i].Name, err)) - continue - } - s.logger.Debug("saved attachment", zap.String("path", savedPath)) - } - } - } - // Step 5: Clean orphaned files. if s.cfg.CleanOrphans { removed, err := s.writer.CleanOrphanedFiles(ctx, writtenPaths) @@ -282,6 +270,112 @@ func (s *Syncer) buildCommitMessage(result *model.SyncResult) (string, error) { return buf.String(), nil } +// dataURIImageRegex matches markdown images with data: URIs (inline base64 images). +// Example: ![alt](data:image/png;base64,iVBOR...) +var dataURIImageRegex = regexp.MustCompile(`!\[([^\]]*)\]\(data:[^)]+\)`) + +// cidImageRegex matches markdown images with cid: URIs (Apple Notes content ID references). +// Example: ![alt](cid:ABC-123-DEF) +var cidImageRegex = regexp.MustCompile(`!\[([^\]]*)\]\(cid:([^)]+)\)`) + +// saveAndRewriteAttachments saves attachment files to disk and rewrites the +// note's markdown body to reference the saved files instead of inline data +// or cid: URIs. +func (s *Syncer) saveAndRewriteAttachments(ctx context.Context, note *model.Note, result *model.SyncResult) { + if len(note.Attachments) == 0 { + return + } + + notePath := s.writer.NoteRelPath(note) + noteDir := filepath.Dir(notePath) + + // Save each attachment and build a content ID → relative path map. + cidToPath := make(map[string]string) + var savedNames []string + + for j := range note.Attachments { + att := ¬e.Attachments[j] + if att.Data == nil { + continue + } + + savedPath, err := s.writer.SaveAttachment(ctx, notePath, att) + if err != nil { + s.logger.Warn("failed to save attachment", + zap.String("note", note.Name), + zap.String("attachment", att.Name), + zap.Error(err), + ) + result.Errors = append(result.Errors, fmt.Errorf("saving attachment %q for note %q: %w", att.Name, note.Name, err)) + continue + } + + // Compute relative path from note's directory to the saved attachment. + relFromNote, _ := filepath.Rel(noteDir, savedPath) + + if att.ContentID != "" { + cidToPath[att.ContentID] = relFromNote + } + savedNames = append(savedNames, relFromNote) + s.logger.Debug("saved attachment", zap.String("path", savedPath)) + } + + if len(savedNames) == 0 { + return + } + + // Rewrite cid: references in markdown with the actual file paths. + md := note.BodyMarkdown + md = cidImageRegex.ReplaceAllStringFunc(md, func(match string) string { + sub := cidImageRegex.FindStringSubmatch(match) + if len(sub) < 3 { + return match + } + alt, cid := sub[1], sub[2] + if path, ok := cidToPath[cid]; ok { + return fmt.Sprintf("![%s](%s)", alt, path) + } + return match + }) + + // Replace inline data: URI images with the first available saved attachment + // that hasn't been mapped via cid. This handles base64-embedded images. + nameIdx := 0 + md = dataURIImageRegex.ReplaceAllStringFunc(md, func(match string) string { + sub := dataURIImageRegex.FindStringSubmatch(match) + if len(sub) < 2 { + return match + } + alt := sub[1] + + // Find the next saved attachment that wasn't already used for a cid ref. + for nameIdx < len(savedNames) { + path := savedNames[nameIdx] + nameIdx++ + // Skip paths already used as cid replacements. + alreadyUsed := false + for _, v := range cidToPath { + if v == path { + alreadyUsed = true + break + } + } + if !alreadyUsed { + return fmt.Sprintf("![%s](%s)", alt, path) + } + } + + // If we run out of saved attachments, keep the original. + return match + }) + + // Also replace any remaining raw data: URIs that might appear as plain + // links (not images) — e.g. + md = strings.ReplaceAll(md, "\n\n\n", "\n\n") + + note.BodyMarkdown = md +} + // rcloneSync performs the rclone sync operation. func (s *Syncer) rcloneSync(ctx context.Context) error { available, err := s.rclone.IsAvailable(ctx) diff --git a/internal/syncer/syncer_test.go b/internal/syncer/syncer_test.go index 3d230c7..7443c76 100644 --- a/internal/syncer/syncer_test.go +++ b/internal/syncer/syncer_test.go @@ -2,6 +2,7 @@ package syncer import ( "context" + "strings" "testing" "time" @@ -62,6 +63,11 @@ func (m *mockWriter) SaveAttachment(ctx context.Context, notePath string, attach return args.String(0), args.Error(1) } +func (m *mockWriter) NoteRelPath(note *model.Note) string { + args := m.Called(note) + return args.String(0) +} + type mockGit struct{ mock.Mock } func (m *mockGit) Init(ctx context.Context) error { @@ -467,16 +473,97 @@ func TestSyncer_Sync_WithAttachments(t *testing.T) { ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(notes, nil) ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) - conv.On("Convert", mock.Anything).Return("body\n", nil) + conv.On("Convert", mock.Anything).Return("![image](cid:CID-123)\n", nil) + wr.On("NoteRelPath", mock.Anything).Return("Notes/Note With Image.md") + wr.On("SaveAttachment", mock.Anything, "Notes/Note With Image.md", mock.Anything).Return("Notes/_attachments/photo.jpg", nil) wr.On("WriteAll", mock.Anything, mock.Anything).Return([]string{"Notes/Note With Image.md"}, nil) - wr.On("SaveAttachment", mock.Anything, "Notes/Note With Image.md", ¬es[0].Attachments[0]).Return("Notes/_attachments/photo.jpg", nil) wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) result, err := s.Sync(context.Background()) require.NoError(t, err) assert.Empty(t, result.Errors) - wr.AssertCalled(t, "SaveAttachment", mock.Anything, "Notes/Note With Image.md", ¬es[0].Attachments[0]) + // Attachment should be saved before WriteAll. + wr.AssertCalled(t, "SaveAttachment", mock.Anything, "Notes/Note With Image.md", mock.Anything) +} + +func TestSyncer_Sync_AttachmentRewritesCidRefs(t *testing.T) { + cfg := defaultConfig() + cfg.Git.Enabled = false + ext := new(mockExtractor) + conv := new(mockConverter) + wr := new(mockWriter) + git := new(mockGit) + rc := new(mockRclone) + + s := NewSyncer(cfg, ext, conv, wr, git, rc, zap.NewNop()) + + notes := []model.Note{ + { + ID: "1", Name: "Image Note", BodyHTML: "

body

", + FolderPath: "Notes", Account: "iCloud", + CreatedAt: time.Now(), ModifiedAt: time.Now(), + Attachments: []model.Attachment{ + {Name: "pic.png", ContentID: "ABC-123", Data: []byte("png-data")}, + }, + }, + } + + ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(notes, nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) + conv.On("Convert", mock.Anything).Return("Some text\n\n![photo](cid:ABC-123)\n\nMore text\n", nil) + wr.On("NoteRelPath", mock.Anything).Return("Notes/Image Note.md") + wr.On("SaveAttachment", mock.Anything, "Notes/Image Note.md", mock.Anything).Return("Notes/_attachments/pic.png", nil) + wr.On("WriteAll", mock.Anything, mock.MatchedBy(func(notes []model.Note) bool { + // The cid: reference should have been rewritten to the attachment path. + return strings.Contains(notes[0].BodyMarkdown, "_attachments/pic.png") && + !strings.Contains(notes[0].BodyMarkdown, "cid:") + })).Return([]string{"Notes/Image Note.md"}, nil) + wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) + + result, err := s.Sync(context.Background()) + require.NoError(t, err) + assert.Empty(t, result.Errors) + wr.AssertExpectations(t) +} + +func TestSyncer_Sync_AttachmentRewritesDataURI(t *testing.T) { + cfg := defaultConfig() + cfg.Git.Enabled = false + ext := new(mockExtractor) + conv := new(mockConverter) + wr := new(mockWriter) + git := new(mockGit) + rc := new(mockRclone) + + s := NewSyncer(cfg, ext, conv, wr, git, rc, zap.NewNop()) + + notes := []model.Note{ + { + ID: "1", Name: "Data URI Note", BodyHTML: "

body

", + FolderPath: "Work", Account: "iCloud", + CreatedAt: time.Now(), ModifiedAt: time.Now(), + Attachments: []model.Attachment{ + {Name: "screenshot.png", Data: []byte("png-data")}, + }, + }, + } + + ext.On("GetAllNotes", mock.Anything, mock.Anything, mock.Anything).Return(notes, nil) + ext.On("ResolveAttachments", mock.Anything, mock.Anything, 50).Return(nil) + conv.On("Convert", mock.Anything).Return("Text\n\n![](data:image/png;base64,aVeryLongBase64String)\n", nil) + wr.On("NoteRelPath", mock.Anything).Return("Work/Data URI Note.md") + wr.On("SaveAttachment", mock.Anything, "Work/Data URI Note.md", mock.Anything).Return("Work/_attachments/screenshot.png", nil) + wr.On("WriteAll", mock.Anything, mock.MatchedBy(func(notes []model.Note) bool { + return strings.Contains(notes[0].BodyMarkdown, "_attachments/screenshot.png") && + !strings.Contains(notes[0].BodyMarkdown, "data:image") + })).Return([]string{"Work/Data URI Note.md"}, nil) + wr.On("CleanOrphanedFiles", mock.Anything, mock.Anything).Return([]string{}, nil) + + result, err := s.Sync(context.Background()) + require.NoError(t, err) + assert.Empty(t, result.Errors) + wr.AssertExpectations(t) } func TestSyncer_Sync_AttachmentsDisabled(t *testing.T) {