diff --git a/nexanote/sync/client.py b/nexanote/sync/client.py index 3f56536..fecde0b 100644 --- a/nexanote/sync/client.py +++ b/nexanote/sync/client.py @@ -18,6 +18,7 @@ from dataclasses import dataclass, field from datetime import datetime, timezone from enum import Enum +from pathlib import Path from typing import Optional from urllib.parse import urljoin, quote, unquote @@ -25,8 +26,9 @@ from requests.auth import HTTPBasicAuth from nexanote.models.note import InkStroke, Note, Notebook, NoteType, Page, Point, SyncStatus -from nexanote.storage.file_store import FileNoteStore +from nexanote.storage.file_store import FileNoteStore, PLAIN_MD_ID_PREFIX from nexanote.sync.conflict import ConflictResolver, ConflictStrategy +from nexanote.sync.sync_state import SyncState logger = logging.getLogger("nexanote.sync.client") @@ -35,6 +37,28 @@ DEFAULT_NOTEBOOK_SLUG = "uncategorized" +def _is_legacy_remote_id(note_id: Optional[str]) -> bool: + """ + EN: Return True when ``note_id`` looks like it was synthesised by the + WebDAV provider for a plain Markdown file with no NexaNote + frontmatter. Such ids start with ``md.`` (the + ``PLAIN_MD_ID_PREFIX``) and are not stable across renames — we + treat them as "legacy / manual" and avoid duplicating them on + every pull. + FR: Vrai si ``note_id`` ressemble à un id synthétisé pour un .md + sans frontmatter NexaNote (préfixe ``md.``). On considère ces + notes comme "héritées / manuelles". + """ + if not note_id: + return True + return note_id.startswith(PLAIN_MD_ID_PREFIX) + + +def _remote_path(nb_slug: str, note_slug: str) -> str: + """Stable string key combining notebook + note slug for the registry.""" + return f"{nb_slug}/{note_slug}" + + def _sanitize_request_error(exc: BaseException) -> str: """ EN: Render a requests/network exception into a short, user-safe reason. @@ -93,6 +117,13 @@ class SyncReport: notes_pulled: int = 0 notes_pushed: int = 0 conflicts_resolved: int = 0 + # EN: Remote .md files we deliberately skipped because they have no + # NexaNote frontmatter id and aren't safely mappable. Surfaces in + # the diagnostic summary so users can tell whether their remote + # folder still carries legacy hand-edited files. + # FR: .md distants délibérément ignorés (sans id NexaNote sûrement + # mappable). Exposé dans le résumé pour diagnostiquer. + notes_ignored_legacy: int = 0 errors: list[str] = field(default_factory=list) events: list[SyncEvent] = field(default_factory=list) @@ -112,6 +143,11 @@ def summary(self) -> str: f"Sync terminée en {self.duration_seconds():.1f}s — " f"{self.notes_pulled} reçues, {self.notes_pushed} envoyées, " f"{self.conflicts_resolved} conflits résolus" + + ( + f", {self.notes_ignored_legacy} héritées ignorées" + if self.notes_ignored_legacy + else "" + ) + (f", {len(self.errors)} erreurs" if self.errors else "") ) @@ -565,6 +601,12 @@ def __init__(self, db: FileNoteStore, config: SyncConfig) -> None: self.config = config self.client = WebDAVClient(config) self.resolver = ConflictResolver(config.conflict_strategy) + # EN: Per-data-dir registry of adopted/ignored remote paths. Loaded + # up-front so a re-pull skips legacy files we've already decided + # about; saved at the end of every ``sync()`` call. + # FR: Registre des chemins distants adoptés/ignorés. Chargé à l'init, + # sauvegardé à la fin de chaque ``sync()``. + self.sync_state = SyncState.load(Path(db.data_dir)) # ------------------------------------------------------------------ # Point d'entrée principal @@ -594,6 +636,14 @@ def sync(self) -> SyncReport: except Exception as e: logger.exception("Erreur inattendue pendant la sync") report.errors.append(str(e)) + finally: + # Persist the registry even when sync errored — we still want + # to remember any "ignored" decisions made during the partial + # run so the next sync skips those remote paths immediately. + try: + self.sync_state.save() + except Exception: + logger.exception("could not persist sync state") report.finish() logger.info(report.summary()) @@ -639,16 +689,117 @@ def _pull_notebook(self, nb_slug: str, report: SyncReport) -> None: report.errors.append(msg) def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: - """Pull une note spécifique depuis le serveur.""" + """ + EN: Pull a single note from the server. Resolves the local target + in three stages — note id, then remote-path mapping, then + (only for non-legacy ids) a fresh adoption. Notes whose remote + id looks legacy/manual (no real NexaNote frontmatter id) get + recorded in the ignore registry on first encounter so we never + re-import them. This is the core of the duplicate-creation fix. + FR: Récupère une note précise depuis le serveur. Résolution en + trois étapes (id → remote_path → adoption). Les notes dont + l'id ressemble à un fichier .md hérité sont enregistrées dans + le registre "ignoré" pour ne pas être réimportées. + """ + remote_path = _remote_path(nb_slug, note_slug) + + # Step 1: short-circuit on previously ignored paths. Touch the entry + # so its `last_seen` timestamp reflects the latest sync — useful for + # "still seeing this file" diagnostics. + if self.sync_state.is_ignored(remote_path): + self.sync_state.touch_ignored(remote_path) + report.notes_ignored_legacy += 1 + return + meta = self.client.get_note_meta(nb_slug, note_slug) if not meta: return note_id = meta.get("id") if not note_id: + # No id at all in the server payload — nothing safe to do but + # remember to skip it. Legacy/manual file with no NexaNote + # metadata; importing would invent a fresh id every time. + reason = "remote note.json carried no id" + self.sync_state.mark_ignored(remote_path, reason) + report.notes_ignored_legacy += 1 + logger.info( + " ⊘ Legacy/manual note ignored (%s): %s", + reason, + remote_path, + ) return - # Charger les pages manuscrites + legacy_id = _is_legacy_remote_id(note_id) + + # Step 2: try to find the local twin. First by id (frontmatter or + # previously-adopted synthetic id), then by the remote-path mapping + # we built up over previous sync sessions. + local_note = self.db.get_note(note_id, load_pages=True) + matched_via_remote_path = False + adopted_local_id: Optional[str] = None + if local_note is None: + adopted_local_id = self.sync_state.get_adopted_local_id(remote_path) + if adopted_local_id and adopted_local_id != note_id: + local_note = self.db.get_note( + adopted_local_id, load_pages=True + ) + if local_note is not None: + matched_via_remote_path = True + + # If a previous adoption mapped this remote_path to a local id that + # is no longer in the store, treat it as "user purged it" — we MUST + # NOT silently re-import, since that would be a duplicate. Record + # an ignore marker (with a clear reason) and bail out. + if ( + local_note is None + and adopted_local_id is not None + and adopted_local_id != note_id + ): + reason = "previously adopted local note no longer present" + self.sync_state.mark_ignored(remote_path, reason) + report.notes_ignored_legacy += 1 + logger.info( + " ⊘ Legacy/manual note ignored (%s): %s [id=%s]", + reason, + remote_path, + note_id, + ) + return + + # Step 3: if we still have no match AND the remote id is legacy, + # do not adopt — record an ignore marker so future pulls bail out + # immediately, and surface it in the report. + if local_note is None and legacy_id: + reason = ( + "no NexaNote frontmatter id; legacy/manual Markdown file" + ) + self.sync_state.mark_ignored(remote_path, reason) + report.notes_ignored_legacy += 1 + logger.info( + " ⊘ Legacy/manual note ignored (%s): %s [id=%s]", + reason, + remote_path, + note_id, + ) + return + + # Remote_path match with a different id: we've already adopted this + # path under another local id. Refuse to merge content (the conflict + # resolver requires same-id notes), but refresh the mapping so the + # registry stays current. The local copy is canonical here. + if matched_via_remote_path and local_note is not None: + self.sync_state.mark_adopted(remote_path, local_note.id) + logger.info( + " ↺ Remote_path match (id mismatch — keeping local %s): %s", + local_note.id, + remote_path, + ) + return + + # From here we know we want to adopt. Pull the ink pages now — + # we skipped them earlier so the legacy-ignore branch never burned + # extra HTTP calls on files we won't import. ink_pages: dict[int, dict] = {} for page_data in meta.get("pages", []): num = page_data["page_number"] @@ -658,12 +809,11 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: remote_note = _deserialize_note(meta, ink_pages) - # Chercher la version locale - local_note = self.db.get_note(note_id, load_pages=True) - if local_note is None: - # Nouvelle note inconnue localement — import direct + # Fresh adoption. The id is non-legacy (filtered above) so it + # is safe to use as-is. self.db.save_note(remote_note) + self.sync_state.mark_adopted(remote_path, remote_note.id) report.notes_pulled += 1 report.events.append(SyncEvent( SyncEventType.NOTE_PULLED, @@ -680,6 +830,7 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: if result.conflict_copy: self.db.save_note(result.conflict_copy) + self.sync_state.mark_adopted(remote_path, result.winner.id) report.notes_pulled += 1 report.conflicts_resolved += 1 report.events.append(SyncEvent( @@ -695,6 +846,9 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: self.db.save_note(remote_note) report.notes_pulled += 1 logger.info(f" ↓ Mise à jour : {remote_note.title}") + # Always refresh the mapping so future pulls go fast even when + # the content hasn't changed. + self.sync_state.mark_adopted(remote_path, local_note.id) # ------------------------------------------------------------------ # PUSH — envoyer les notes modifiées localement diff --git a/nexanote/sync/sync_state.py b/nexanote/sync/sync_state.py new file mode 100644 index 0000000..d5ab4c8 --- /dev/null +++ b/nexanote/sync/sync_state.py @@ -0,0 +1,271 @@ +""" +NexaNote — Sync state registry / Registre d'état de synchronisation. + +EN: Tracks per-data-dir sync metadata that doesn't belong inside individual + notes: + * ``adopted`` — remote paths we've already imported and the local + note id they map to. Lets a re-import of an externally renamed + legacy file fall back from "match by id" to "match by remote path". + * ``ignored`` — remote paths we've decided not to import (typically + legacy / manually-added Markdown files without a NexaNote + frontmatter id). Required to keep pull idempotent — without this + registry the engine re-imports those files on every sync, which + produced the duplicate-creation bug this module fixes. + + The registry lives at ``/.nexanote_sync_state.json``. Failures + to read or write it never break the sync flow — at worst we lose the + "already-decided" knowledge for one session. + +FR: Registre d'état de synchro par data_dir. Mémorise les chemins distants + déjà adoptés (avec leur id local) et ceux ignorés (notes manuelles + sans frontmatter NexaNote). Sans ce registre, le moteur réimportait + les .md hérités à chaque sync et créait des doublons. + + Stocké dans ``/.nexanote_sync_state.json``. Toute erreur + de lecture/écriture est non-bloquante. +""" + +from __future__ import annotations + +import json +import logging +import os +import tempfile +import threading +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +logger = logging.getLogger("nexanote.sync.state") + +SYNC_STATE_FILENAME = ".nexanote_sync_state.json" +SYNC_STATE_VERSION = 1 + + +def _utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +@dataclass +class AdoptedEntry: + """A remote path we've imported into the local store.""" + + local_id: str + first_seen: str + last_seen: str + + def to_dict(self) -> dict: + return { + "local_id": self.local_id, + "first_seen": self.first_seen, + "last_seen": self.last_seen, + } + + @classmethod + def from_dict(cls, data: dict) -> "AdoptedEntry": + return cls( + local_id=str(data.get("local_id", "")), + first_seen=str(data.get("first_seen", "")), + last_seen=str(data.get("last_seen", "")), + ) + + +@dataclass +class IgnoredEntry: + """A remote path we've explicitly decided to skip.""" + + reason: str + first_seen: str + last_seen: str + + def to_dict(self) -> dict: + return { + "reason": self.reason, + "first_seen": self.first_seen, + "last_seen": self.last_seen, + } + + @classmethod + def from_dict(cls, data: dict) -> "IgnoredEntry": + return cls( + reason=str(data.get("reason", "")), + first_seen=str(data.get("first_seen", "")), + last_seen=str(data.get("last_seen", "")), + ) + + +@dataclass +class SyncState: + """ + EN: In-memory view of the on-disk registry. Mutations are thread-safe + but the JSON file is rewritten only when ``save()`` is called — + callers typically save once per sync session. + FR: Vue mémoire du registre. Mutations thread-safe ; on n'écrit le + fichier que via ``save()`` — appelé à la fin d'une session de sync. + """ + + path: Path + adopted: dict[str, AdoptedEntry] = field(default_factory=dict) + ignored: dict[str, IgnoredEntry] = field(default_factory=dict) + _lock: threading.RLock = field(default_factory=threading.RLock) + + # ------------------------------------------------------------------ + # Loading / saving + # ------------------------------------------------------------------ + + @classmethod + def load(cls, data_dir: Path) -> "SyncState": + """ + EN: Load the registry from ``/.nexanote_sync_state.json``. + Returns an empty state when the file is missing or unreadable — + sync still works, just without the "previously-decided" memory. + FR: Charge le registre. Retourne un état vide si le fichier manque + ou est illisible — la sync fonctionne, simplement sans mémoire. + """ + path = Path(data_dir) / SYNC_STATE_FILENAME + state = cls(path=path) + if not path.exists(): + return state + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + logger.warning( + "sync state unreadable (%s): %s — starting fresh", path, exc + ) + return state + if not isinstance(payload, dict): + return state + + adopted_raw = payload.get("adopted") or {} + if isinstance(adopted_raw, dict): + for key, value in adopted_raw.items(): + if isinstance(key, str) and isinstance(value, dict): + state.adopted[key] = AdoptedEntry.from_dict(value) + + ignored_raw = payload.get("ignored") or {} + if isinstance(ignored_raw, dict): + for key, value in ignored_raw.items(): + if isinstance(key, str) and isinstance(value, dict): + state.ignored[key] = IgnoredEntry.from_dict(value) + + return state + + def save(self) -> None: + """Persist the registry to disk atomically. Failures are non-fatal.""" + with self._lock: + payload = { + "version": SYNC_STATE_VERSION, + "adopted": {k: v.to_dict() for k, v in self.adopted.items()}, + "ignored": {k: v.to_dict() for k, v in self.ignored.items()}, + } + data = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8") + + try: + self.path.parent.mkdir(parents=True, exist_ok=True) + tmp_fd, tmp_path = tempfile.mkstemp( + prefix=f".{self.path.name}.", + suffix=".tmp", + dir=str(self.path.parent), + ) + try: + with os.fdopen(tmp_fd, "wb") as f: + f.write(data) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, self.path) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + except OSError as exc: + logger.warning("sync state write failed (%s): %s", self.path, exc) + + # ------------------------------------------------------------------ + # Adopted entries + # ------------------------------------------------------------------ + + def get_adopted_local_id(self, remote_path: str) -> Optional[str]: + """Return the local note id we previously bound to ``remote_path``.""" + with self._lock: + entry = self.adopted.get(remote_path) + return entry.local_id if entry else None + + def mark_adopted(self, remote_path: str, local_id: str) -> None: + """ + EN: Record that ``remote_path`` is bound to ``local_id``. If the + same path was previously ignored, drop the ignored marker — + the user (or NexaNote) has clearly decided to take it. + FR: Mémorise l'adoption. Annule un éventuel marquage "ignoré". + """ + now = _utc_now_iso() + with self._lock: + entry = self.adopted.get(remote_path) + if entry is None: + self.adopted[remote_path] = AdoptedEntry( + local_id=local_id, + first_seen=now, + last_seen=now, + ) + else: + entry.local_id = local_id + entry.last_seen = now + self.ignored.pop(remote_path, None) + + # ------------------------------------------------------------------ + # Ignored entries + # ------------------------------------------------------------------ + + def is_ignored(self, remote_path: str) -> bool: + with self._lock: + return remote_path in self.ignored + + def get_ignored_reason(self, remote_path: str) -> Optional[str]: + with self._lock: + entry = self.ignored.get(remote_path) + return entry.reason if entry else None + + def mark_ignored(self, remote_path: str, reason: str) -> None: + """Record (or refresh) the ignore marker for ``remote_path``.""" + now = _utc_now_iso() + with self._lock: + entry = self.ignored.get(remote_path) + if entry is None: + self.ignored[remote_path] = IgnoredEntry( + reason=reason, + first_seen=now, + last_seen=now, + ) + else: + entry.reason = reason + entry.last_seen = now + + def touch_ignored(self, remote_path: str) -> None: + """Bump the ``last_seen`` timestamp without changing the reason.""" + with self._lock: + entry = self.ignored.get(remote_path) + if entry is not None: + entry.last_seen = _utc_now_iso() + + def count_ignored(self) -> int: + with self._lock: + return len(self.ignored) + + def all_ignored_paths(self) -> list[str]: + with self._lock: + return list(self.ignored.keys()) + + def all_adopted_paths(self) -> list[str]: + with self._lock: + return list(self.adopted.keys()) + + +__all__ = [ + "AdoptedEntry", + "IgnoredEntry", + "SyncState", + "SYNC_STATE_FILENAME", + "SYNC_STATE_VERSION", +] diff --git a/tests/test_sync_legacy_ignore.py b/tests/test_sync_legacy_ignore.py new file mode 100644 index 0000000..ccc0e0b --- /dev/null +++ b/tests/test_sync_legacy_ignore.py @@ -0,0 +1,486 @@ +""" +NexaNote — Legacy/manual Markdown ignore tests. + +EN: Cover the duplicate-creation bug where the WebDAV sync engine kept + re-importing legacy `.md` files (manually dropped into the WebDAV + folder, no NexaNote frontmatter) on every pull. The fix records each + such file in a per-data-dir registry so subsequent syncs short-circuit + instead of producing fresh duplicates. + +FR: Couvre le bug où des .md hérités (ajoutés manuellement, sans + frontmatter NexaNote) étaient réimportés à chaque pull. La correction + enregistre ces chemins dans un registre par data_dir pour que les + sync suivantes les ignorent immédiatement. +""" + +from __future__ import annotations + +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from nexanote.models.note import Note, Notebook, NoteType, SyncStatus +from nexanote.storage import FileNoteStore +from nexanote.sync.client import ( + NexaNoteSyncEngine, + SyncConfig, + SyncReport, + _is_legacy_remote_id, + _remote_path, +) +from nexanote.sync.sync_state import SYNC_STATE_FILENAME, SyncState + + +# --------------------------------------------------------------------------- +# Stub WebDAV client — feeds canned responses to ``NexaNoteSyncEngine``. +# +# We can't reproduce the duplicate-creation bug against the real WsgiDAV +# fixture because the production WebDAV provider only lists notes whose +# `notebook_id` matches the requested notebook — synthesised plain-MD +# files have `notebook_id=None` and therefore stay invisible. The bug +# report concerns clients pointed at WebDAV folders curated by other +# tools (Obsidian, Synology, hand-edited NAS folders) where bare `.md` +# files do show up at the notebook level. The stub mirrors that shape. +# --------------------------------------------------------------------------- + + +class StubWebDAVClient: + """ + EN: Minimal in-memory replacement for ``WebDAVClient`` that returns + canned ``list_notebooks`` / ``list_notes`` / ``get_note_meta`` / + ``get_ink_page`` results from a dictionary handed in by the test. + FR: Remplaçant en mémoire pour ``WebDAVClient`` — réponses canned. + """ + + def __init__(self, layout: dict) -> None: + # layout = {nb_slug: {note_slug: meta_or_None}} + self.layout = layout + self.put_calls: list[tuple] = [] + + def ping(self) -> bool: + return True + + def list_notebooks(self) -> list[dict]: + return [ + {"name": nb_slug, "is_collection": True, "href": f"/{nb_slug}/"} + for nb_slug in self.layout + ] + + def list_notes(self, notebook_slug: str) -> list[dict]: + notes = self.layout.get(notebook_slug, {}) + return [ + {"name": note_slug, "is_collection": True, "href": f"/{notebook_slug}/{note_slug}/"} + for note_slug in notes + ] + + def get_note_meta( + self, notebook_slug: str, note_slug: str + ) -> Optional[dict]: + return self.layout.get(notebook_slug, {}).get(note_slug) + + def get_ink_page( + self, notebook_slug: str, note_slug: str, page_num: int + ) -> Optional[dict]: + return None + + # The push path isn't exercised by the legacy-ignore tests, but we + # stub it harmlessly so engine.sync() can complete cleanly. + def create_notebook_dir(self, notebook_slug: str) -> bool: + return True + + def create_note_dir(self, notebook_slug: str, note_slug: str) -> bool: + return True + + def put_note_meta(self, *args, **kwargs): + self.put_calls.append(("meta", args, kwargs)) + return True, None + + def put_ink_page(self, *args, **kwargs): + self.put_calls.append(("ink", args, kwargs)) + return True, None + + +def _make_engine_with_stub(client_dir: Path, layout: dict) -> tuple[ + NexaNoteSyncEngine, StubWebDAVClient +]: + local_db = FileNoteStore(client_dir) + engine = NexaNoteSyncEngine( + local_db, + SyncConfig( + server_url="http://stub.invalid/", + username="u", + password="p", + timeout_seconds=1, + ), + ) + stub = StubWebDAVClient(layout) + engine.client = stub + return engine, stub + + +def _legacy_meta(note_id: str, title: str, body: str = "raw") -> dict: + """A note.json payload as the WebDAV provider would synthesise it.""" + now = datetime.now(timezone.utc).isoformat() + return { + "id": note_id, + "title": title, + "type": "typed", + "tags": [], + "is_pinned": False, + "created_at": now, + "updated_at": now, + "pages": [ + {"page_number": 1, "template": "blank", "typed_content": body}, + ], + } + + +def _local_note_count(db: FileNoteStore) -> int: + return len(db.list_notes(include_archived=True)) + + +# --------------------------------------------------------------------------- +# Pure-helper tests +# --------------------------------------------------------------------------- + + +class TestLegacyIdHelper: + def test_md_prefix_is_legacy(self): + assert _is_legacy_remote_id("md.aGVsbG8gd29ybGQ") + + def test_uuid_is_not_legacy(self): + assert not _is_legacy_remote_id("abc12345-6789-4abc-9def-0123456789ab") + + def test_empty_id_is_legacy(self): + # No id is the worst case — clearly not safely mappable. + assert _is_legacy_remote_id(None) + assert _is_legacy_remote_id("") + + def test_remote_path_concatenates(self): + assert _remote_path("uncategorized", "foo__md.aGVs") == ( + "uncategorized/foo__md.aGVs" + ) + + +class TestSyncStateRoundtrip: + def test_roundtrip_persists_adopted_and_ignored(self, tmp_path): + state = SyncState.load(tmp_path) + state.mark_adopted("nb/foo__abcd1234", "abcd1234-uuid") + state.mark_ignored("nb/legacy__md.aGVs", "no NexaNote ID") + state.save() + + reloaded = SyncState.load(tmp_path) + assert reloaded.get_adopted_local_id("nb/foo__abcd1234") == "abcd1234-uuid" + assert reloaded.is_ignored("nb/legacy__md.aGVs") + assert reloaded.count_ignored() == 1 + assert ( + reloaded.get_ignored_reason("nb/legacy__md.aGVs") + == "no NexaNote ID" + ) + + def test_mark_adopted_clears_previous_ignore(self, tmp_path): + state = SyncState.load(tmp_path) + state.mark_ignored("nb/foo", "...") + state.mark_adopted("nb/foo", "uuid") + assert not state.is_ignored("nb/foo") + assert state.count_ignored() == 0 + + def test_load_handles_missing_file(self, tmp_path): + state = SyncState.load(tmp_path / "nonexistent") + assert state.adopted == {} + assert state.ignored == {} + + def test_load_handles_corrupt_file(self, tmp_path): + (tmp_path / SYNC_STATE_FILENAME).write_text("{ not json", encoding="utf-8") + state = SyncState.load(tmp_path) + # Corruption is logged but never fatal. + assert state.adopted == {} + assert state.ignored == {} + + +# --------------------------------------------------------------------------- +# End-to-end pull tests +# --------------------------------------------------------------------------- + + +class TestPullIgnoresLegacyManualMarkdown: + """ + EN: A remote ``note.json`` whose id was synthesised from a plain + Markdown file (id begins with ``md.``) must be recorded in the + ignore registry on first encounter and skipped on every later + sync. This prevents the duplicate-creation loop the fix targets. + FR: Une note distante au id synthétique (``md.``) doit être marquée + ignorée dès le premier sync, puis sautée sans bruit ensuite. + """ + + def test_legacy_md_imported_zero_times(self, tmp_path): + layout = { + "uncategorized": { + "recipe__md.UmVjaXBl": _legacy_meta( + "md.UmVjaXBl", "Recipe", "# My recipe\n\nRaw text.\n" + ), + } + } + engine, _ = _make_engine_with_stub( + tmp_path / "client_legacy", layout + ) + + report = engine.sync() + + assert report.success(), f"sync errors: {report.errors}" + # Legacy file must be ignored — never adopted. + assert _local_note_count(engine.db) == 0 + assert report.notes_pulled == 0 + assert report.notes_ignored_legacy == 1, report.summary() + + def test_three_syncs_do_not_grow_local_count(self, tmp_path): + layout = { + "uncategorized": { + "notes__md.Tm90ZXM": _legacy_meta( + "md.Tm90ZXM", "Notes", "Free-form Obsidian note.\n" + ), + } + } + engine, _ = _make_engine_with_stub( + tmp_path / "client_thrice", layout + ) + + counts: list[int] = [] + ignore_counts: list[int] = [] + for _ in range(3): + report = engine.sync() + assert report.success(), f"sync errors: {report.errors}" + counts.append(_local_note_count(engine.db)) + ignore_counts.append(report.notes_ignored_legacy) + + assert counts == [0, 0, 0], ( + f"legacy files must not accumulate; got {counts}" + ) + # Each pull still reports the legacy file (so users can see it's + # still on the server) but never grows the local note count. + assert all(n >= 1 for n in ignore_counts), ignore_counts + + def test_no_weird_md_title_artifacts(self, tmp_path): + """ + EN: The fix must skip the import path entirely for legacy ids, + so no slug-derived `Foo__Md.…` titles can leak into the local + store. A handful of would-be-bogus ids/titles are pulled and + we assert nothing landed locally. + """ + layout = { + "uncategorized": { + "todolist__md.VG9kbw": _legacy_meta( + "md.VG9kbw", "TodoList", "- buy milk\n" + ), + "__md.X19fX19f": _legacy_meta( + "md.X19fX19f", ".md", "no title at all\n" + ), + } + } + engine, _ = _make_engine_with_stub( + tmp_path / "client_titles", layout + ) + + report = engine.sync() + assert report.success(), report.errors + assert report.notes_ignored_legacy == 2 + + # No legacy notes leaked into the local store under ANY title. + local_notes = engine.db.list_notes(include_archived=True) + assert local_notes == [], f"unexpected local notes: {local_notes}" + + def test_ignored_paths_persist_to_disk(self, tmp_path): + layout = { + "uncategorized": { + "old__md.T2xk": _legacy_meta("md.T2xk", "Old"), + } + } + client_dir = tmp_path / "client_persist" + engine, _ = _make_engine_with_stub(client_dir, layout) + engine.sync() + + # A second engine reading the same data dir sees the registry, so + # the next sync session knows to skip this remote_path immediately. + reloaded = SyncState.load(client_dir) + assert reloaded.count_ignored() >= 1 + ignored = reloaded.all_ignored_paths() + assert "uncategorized/old__md.T2xk" in ignored, ignored + + def test_no_id_means_ignored(self, tmp_path): + """A remote payload with no id at all is the worst-case legacy.""" + layout = { + "uncategorized": { + "noid__deadbeef": { + # No "id" key at all. + "title": "ID-less", + "type": "typed", + "tags": [], + "is_pinned": False, + "created_at": "2026-01-01T00:00:00+00:00", + "updated_at": "2026-01-01T00:00:00+00:00", + "pages": [], + }, + } + } + engine, _ = _make_engine_with_stub(tmp_path / "client_noid", layout) + report = engine.sync() + + assert report.success(), report.errors + assert report.notes_ignored_legacy == 1 + assert _local_note_count(engine.db) == 0 + + +# --------------------------------------------------------------------------- +# Adoption flow stays intact +# --------------------------------------------------------------------------- + + +class TestPullAdoptsValidNexaNoteFrontmatter: + """ + EN: The fix must NOT break the normal pull path. A note with valid + NexaNote frontmatter (real UUID id) is still adopted on first sync + and matched by id on subsequent syncs. + """ + + def test_real_note_imports_once_and_only_once(self, tmp_path): + real_id = "abc12345-6789-4abc-9def-0123456789ab" + layout = { + "recettes__abc12345": { + f"soupe__{real_id[:8]}": _legacy_meta( + real_id, "Soupe à l'oignon", "Étape 1…" + ), + } + } + engine, _ = _make_engine_with_stub( + tmp_path / "client_real", layout + ) + + first = engine.sync() + assert first.success(), first.errors + assert first.notes_pulled == 1 + assert _local_note_count(engine.db) == 1 + assert first.notes_ignored_legacy == 0 + + # Second sync against the unchanged remote is a no-op for pull, + # adoption mapping is reused. + second = engine.sync() + assert second.success(), second.errors + assert second.notes_pulled == 0 + assert _local_note_count(engine.db) == 1 + + def test_real_note_alongside_legacy_md(self, tmp_path): + """ + EN: Mixed remote: a real frontmatter note adopts; a legacy plain-md + sibling is ignored. Re-syncing stays steady on both fronts. + """ + real_id = "fedcba98-7654-4abc-9def-1234567890ab" + layout = { + "mixte__fedcba98": { + f"vraie-note__{real_id[:8]}": _legacy_meta( + real_id, "Vraie note", "real content" + ), + "manual__md.TWFudWFs": _legacy_meta( + "md.TWFudWFs", "Manual", "Hand-edited file\n" + ), + } + } + engine, _ = _make_engine_with_stub( + tmp_path / "client_mixed", layout + ) + + report = engine.sync() + assert report.success(), report.errors + + local = engine.db.get_note(real_id, load_pages=True) + assert local is not None + assert local.title == "Vraie note" + assert report.notes_ignored_legacy == 1 + + # Re-running stays steady on both fronts. + second = engine.sync() + assert second.success(), second.errors + assert second.notes_pulled == 0 + assert _local_note_count(engine.db) == 1 + assert second.notes_ignored_legacy == 1 + + +# --------------------------------------------------------------------------- +# remote_path adoption fallback +# --------------------------------------------------------------------------- + + +class TestRemotePathAdoptionFallback: + """ + EN: When a remote note's id changes between syncs but the remote path + stays the same, the engine should match via remote_path so the + local note isn't duplicated. (This is the "match by remote_path + after id miss" branch.) + """ + + def test_remote_path_match_avoids_duplicate(self, tmp_path): + # Pre-seed the registry as if we adopted this remote_path before + # under a different id. The remote returns a fresh-but-legacy id. + client_dir = tmp_path / "client_remote_path" + local_db = FileNoteStore(client_dir) + # Adopted note exists locally with a real UUID. + local_id = "11111111-2222-4333-9444-555566667777" + seeded = Note( + id=local_id, + title="Adopted", + note_type=NoteType.TYPED, + sync_status=SyncStatus.SYNCED, + ) + seeded.add_page().typed_content = "old" + local_db.save_note(seeded) + + state = SyncState.load(client_dir) + state.mark_adopted( + "uncategorized/adopted__11111111", local_id + ) + state.save() + + # Server now returns a different (legacy-shaped) id at the same path. + layout = { + "uncategorized": { + "adopted__11111111": _legacy_meta( + "md.QWRvcHRlZA", "Adopted", "new from remote" + ), + } + } + engine, _ = _make_engine_with_stub(client_dir, layout) + report = engine.sync() + + assert report.success(), report.errors + # The local note still exists — wasn't duplicated. + assert _local_note_count(engine.db) == 1 + # And no new ignore marker was added (we have a local mapping). + assert engine.sync_state.count_ignored() == 0 + + +# --------------------------------------------------------------------------- +# Diagnostics surface +# --------------------------------------------------------------------------- + + +class TestReportSurfaces: + def test_summary_mentions_ignored_count(self): + report = SyncReport() + report.notes_ignored_legacy = 3 + report.finish() + # Summary should call out the ignored legacy count. + assert "3" in report.summary() + assert ( + "ignor" in report.summary().lower() + or "héritées" in report.summary() + ) + + def test_summary_omits_ignored_when_zero(self): + report = SyncReport() + report.finish() + assert "ignor" not in report.summary().lower() + assert "héritées" not in report.summary()