diff --git a/CHANGELOG.md b/CHANGELOG.md index 46d63f0..f1e1dd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,41 @@ This project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). --- +## Unreleased — Sync reliability & diagnostics + +### New + +- **Sync planning.** Each sync session now builds a `SyncPlan` recording + what it will push, pull, ignore, and which notes are in conflict, plus + any warnings. The plan is the single source the dry-run mode and the + diagnostic log read from. + +- **Dry-run mode.** `NexaNoteSyncEngine(db, config, dry_run=True)` — or + `POST /sync/trigger?dry_run=true` — builds the plan without writing any + note files, touching the sync-state registry, uploading to the remote, + or writing a log. Use it to preview what a real sync would do. + +- **Sanitized sync log.** Every real sync writes + `/sync_logs/latest.json` (so `/data/sync_logs/latest.json` in + the Docker image), readable via `GET /sync/log`. It records the + timestamp, duration, counts, pushed/pulled note ids and titles, ignored + legacy remote paths, conflicts, and sanitized errors. It never contains + note body content, passwords, tokens, or server URLs — error strings are + scrubbed and the plan/report only ever hold metadata. + +### Improved + +- **Conflict safety.** When a note changed both locally and remotely, the + conflict is detected and surfaced in the plan/log instead of being + resolved silently. If the chosen strategy would otherwise drop the local + edits, a `(conflit …)` copy is kept so both versions survive on disk. + +- **Idempotent sync state.** A failed sync still persists the sync-state + registry atomically, so a crash mid-session can never leave a corrupt or + half-written `.nexanote_sync_state.json`. + +--- + ## v1.0.0 — File-based storage ### New diff --git a/nexanote/api/routes.py b/nexanote/api/routes.py index 69637ca..a4f87bf 100644 --- a/nexanote/api/routes.py +++ b/nexanote/api/routes.py @@ -170,6 +170,12 @@ class SyncReportSchema(BaseModel): errors: list[str] duration_seconds: float summary: str + # Diagnostics — additive, default-valued so existing clients are unaffected. + notes_ignored_legacy: int = 0 + dry_run: bool = False + conflicts: list[dict] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + plan: dict = Field(default_factory=dict) class ExportRequestSchema(BaseModel): @@ -558,8 +564,15 @@ def configure_sync(config: SyncConfigSchema): return {"status": "configured", "server_url": config.server_url} @app.post("/sync/trigger", response_model=SyncReportSchema) - def trigger_sync(): - """Déclenche une synchronisation manuelle.""" + def trigger_sync(dry_run: bool = Query(False)): + """ + EN: Trigger a manual sync. With ``?dry_run=true`` the engine builds + the sync plan but writes no files, touches no sync state, and + performs no remote uploads — handy to preview what a real sync + would do. + FR: Déclenche une synchronisation manuelle. Avec ``?dry_run=true``, + le moteur construit le plan sans rien écrire ni envoyer. + """ if not _sync_config.get("server_url"): raise HTTPException(400, "Sync non configurée — appeler POST /sync/configure d'abord") @@ -573,9 +586,10 @@ def trigger_sync(): ), ) - engine = NexaNoteSyncEngine(db, config) + engine = NexaNoteSyncEngine(db, config, dry_run=dry_run) report = engine.sync() + plan = report.plan result = SyncReportSchema( success=report.success(), notes_pulled=report.notes_pulled, @@ -584,14 +598,38 @@ def trigger_sync(): errors=report.errors, duration_seconds=report.duration_seconds(), summary=report.summary(), + notes_ignored_legacy=report.notes_ignored_legacy, + dry_run=report.dry_run, + conflicts=[c.to_dict() for c in plan.conflicts] if plan else [], + warnings=list(plan.warnings) if plan else [], + plan=plan.to_dict() if plan else {}, ) - _last_sync_report.update(result.model_dump()) + # A dry-run is a preview — it must not clobber the last *real* status. + if not dry_run: + _last_sync_report.clear() + _last_sync_report.update(result.model_dump()) return result @app.get("/sync/status") def sync_status(): return _last_sync_report or {"status": "never_synced"} + @app.get("/sync/log") + def sync_log(): + """ + EN: Return the latest sanitized sync log written to + ``/sync_logs/latest.json``. Contains note ids/titles, + counts, ignored remote paths, conflicts and sanitized errors — + never note body content or credentials. + FR: Renvoie le dernier journal de sync assaini. + """ + from nexanote.sync.sync_log import read_sync_log + + payload = read_sync_log(db.data_dir) + if payload is None: + return {"status": "no_log"} + return payload + # ------------------------------------------------------------------ # Export Markdown (Obsidian-friendly) # ------------------------------------------------------------------ diff --git a/nexanote/sync/client.py b/nexanote/sync/client.py index fecde0b..b0ec7c5 100644 --- a/nexanote/sync/client.py +++ b/nexanote/sync/client.py @@ -12,9 +12,11 @@ from __future__ import annotations +import copy import json import logging import time +import uuid from dataclasses import dataclass, field from datetime import datetime, timezone from enum import Enum @@ -28,6 +30,8 @@ from nexanote.models.note import InkStroke, Note, Notebook, NoteType, Page, Point, SyncStatus from nexanote.storage.file_store import FileNoteStore, PLAIN_MD_ID_PREFIX from nexanote.sync.conflict import ConflictResolver, ConflictStrategy +from nexanote.sync.plan import SyncPlan +from nexanote.sync.sync_log import write_sync_log from nexanote.sync.sync_state import SyncState logger = logging.getLogger("nexanote.sync.client") @@ -126,6 +130,16 @@ class SyncReport: notes_ignored_legacy: int = 0 errors: list[str] = field(default_factory=list) events: list[SyncEvent] = field(default_factory=list) + # EN: The plan backing this session — what was (or would be) pushed, + # pulled, ignored, or flagged as a conflict. Populated by the engine + # and read by the diagnostic sync log. May be None when a report is + # built standalone (e.g. a direct ``_push`` call in tests). + # FR: Le plan de cette session (poussé/tiré/ignoré/conflits). Rempli par + # le moteur, lu par le journal de diagnostic. Peut être None. + plan: Optional[SyncPlan] = None + # EN: True when this report describes a dry-run (no files/state written). + # FR: Vrai quand le rapport décrit un dry-run (aucune écriture). + dry_run: bool = False def finish(self) -> None: self.finished_at = datetime.now(timezone.utc) @@ -590,13 +604,37 @@ def _serialize_ink_page(page: Page) -> dict: } +def _make_local_conflict_copy(local: Note) -> Note: + """ + EN: Build an independent "(conflit …)" copy of the local note so its + unsynced edits survive when the chosen resolution would otherwise + replace them with the remote version. Mirrors the copy the + ``KEEP_BOTH`` strategy makes, but is created at the engine level so + the safety net applies under *every* conflict strategy. + FR: Crée une copie indépendante "(conflit …)" de la note locale pour + préserver ses modifications non synchronisées quand la résolution + choisie les remplacerait par la version distante. + """ + conflict_copy = copy.deepcopy(local) + conflict_copy.id = str(uuid.uuid4()) + ts = local.updated_at.strftime("%Y-%m-%d_%H-%M") + conflict_copy.title = f"{local.title} (conflit {ts})" + conflict_copy.sync_status = SyncStatus.LOCAL_ONLY + return conflict_copy + + class NexaNoteSyncEngine: """ Moteur de synchronisation principal. Orchestre pull → diff → resolve → push. """ - def __init__(self, db: FileNoteStore, config: SyncConfig) -> None: + def __init__( + self, + db: FileNoteStore, + config: SyncConfig, + dry_run: bool = False, + ) -> None: self.db = db self.config = config self.client = WebDAVClient(config) @@ -607,6 +645,42 @@ def __init__(self, db: FileNoteStore, config: SyncConfig) -> None: # FR: Registre des chemins distants adoptés/ignorés. Chargé à l'init, # sauvegardé à la fin de chaque ``sync()``. self.sync_state = SyncState.load(Path(db.data_dir)) + # EN: Dry-run builds the plan but performs no writes (no note files, + # no sync-state file, no remote PUTs, no sync log). The plan is + # populated as decisions are made; in dry-run that happens with + # every mutating call short-circuited. + # FR: Le dry-run construit le plan sans aucune écriture. + self.dry_run = dry_run + # The active plan for the current session — created in ``sync()`` and + # lazily in ``_pull``/``_push`` for standalone calls (e.g. tests). + self.plan: Optional[SyncPlan] = None + + # ------------------------------------------------------------------ + # Dry-run guards — the single choke point for every state mutation. + # In dry-run mode each of these is a no-op, which is what makes the + # "dry-run never writes" guarantee easy to audit and to test. + # ------------------------------------------------------------------ + + def _ensure_plan(self) -> SyncPlan: + if self.plan is None: + self.plan = SyncPlan() + return self.plan + + def _apply_save_note(self, note: Note, save_pages: bool = True) -> None: + if not self.dry_run: + self.db.save_note(note, save_pages=save_pages) + + def _apply_mark_adopted(self, remote_path: str, local_id: str) -> None: + if not self.dry_run: + self.sync_state.mark_adopted(remote_path, local_id) + + def _apply_mark_ignored(self, remote_path: str, reason: str) -> None: + if not self.dry_run: + self.sync_state.mark_ignored(remote_path, reason) + + def _apply_touch_ignored(self, remote_path: str) -> None: + if not self.dry_run: + self.sync_state.touch_ignored(remote_path) # ------------------------------------------------------------------ # Point d'entrée principal @@ -614,11 +688,22 @@ def __init__(self, db: FileNoteStore, config: SyncConfig) -> None: def sync(self) -> SyncReport: """ - Lance une session de synchronisation complète. - Retourne un rapport détaillé. + EN: Run a full sync session: ping → pull → push. The ``SyncPlan`` is + built as decisions are made; in dry-run no files, sync state, or + remote resources are touched and no log is written. Otherwise the + sync state is persisted (even on error, so partial "ignored" + decisions survive) and a sanitized log is written at the end. + FR: Lance une session complète : ping → pull → push. Le plan est + construit au fil des décisions ; en dry-run rien n'est écrit. """ report = SyncReport() - logger.info("Début de la synchronisation NexaNote") + report.dry_run = self.dry_run + self.plan = SyncPlan() + report.plan = self.plan + logger.info( + "Début de la synchronisation NexaNote%s", + " (dry-run)" if self.dry_run else "", + ) # Vérifier la connexion if not self.client.ping(): @@ -626,6 +711,7 @@ def sync(self) -> SyncReport: logger.error(msg) report.errors.append(msg) report.finish() + self._write_log(report) return report try: @@ -640,15 +726,33 @@ def sync(self) -> SyncReport: # Persist the registry even when sync errored — we still want # to remember any "ignored" decisions made during the partial # run so the next sync skips those remote paths immediately. - try: - self.sync_state.save() - except Exception: - logger.exception("could not persist sync state") + # Skipped entirely in dry-run so state on disk is never touched. + if not self.dry_run: + try: + self.sync_state.save() + except Exception: + logger.exception("could not persist sync state") report.finish() + self._write_log(report) logger.info(report.summary()) return report + def _write_log(self, report: SyncReport) -> None: + """ + EN: Write the sanitized diagnostic log, unless this was a dry-run + (which must not write any files). Never raises. + FR: Écrit le journal de diagnostic assaini, sauf en dry-run. + """ + if self.dry_run: + return + try: + write_sync_log( + self.db.data_dir, report, self.plan, dry_run=self.dry_run + ) + except Exception: + logger.exception("could not write sync log") + # ------------------------------------------------------------------ # PULL — récupérer les changements du serveur # ------------------------------------------------------------------ @@ -661,6 +765,7 @@ def _pull(self, report: SyncReport) -> None: - Si connue et identique → skip - Si connue et différente → résoudre le conflit """ + self._ensure_plan() report.events.append(SyncEvent(SyncEventType.PULL_START, "Pull depuis le serveur")) logger.info("PULL — récupération des notes distantes") @@ -701,14 +806,20 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: l'id ressemble à un fichier .md hérité sont enregistrées dans le registre "ignoré" pour ne pas être réimportées. """ + plan = self._ensure_plan() remote_path = _remote_path(nb_slug, note_slug) # Step 1: short-circuit on previously ignored paths. Touch the entry # so its `last_seen` timestamp reflects the latest sync — useful for # "still seeing this file" diagnostics. if self.sync_state.is_ignored(remote_path): - self.sync_state.touch_ignored(remote_path) + self._apply_touch_ignored(remote_path) report.notes_ignored_legacy += 1 + plan.add_ignore( + remote_path, + self.sync_state.get_ignored_reason(remote_path) + or "previously ignored", + ) return meta = self.client.get_note_meta(nb_slug, note_slug) @@ -721,8 +832,9 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: # remember to skip it. Legacy/manual file with no NexaNote # metadata; importing would invent a fresh id every time. reason = "remote note.json carried no id" - self.sync_state.mark_ignored(remote_path, reason) + self._apply_mark_ignored(remote_path, reason) report.notes_ignored_legacy += 1 + plan.add_ignore(remote_path, reason) logger.info( " ⊘ Legacy/manual note ignored (%s): %s", reason, @@ -757,8 +869,9 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: and adopted_local_id != note_id ): reason = "previously adopted local note no longer present" - self.sync_state.mark_ignored(remote_path, reason) + self._apply_mark_ignored(remote_path, reason) report.notes_ignored_legacy += 1 + plan.add_ignore(remote_path, reason) logger.info( " ⊘ Legacy/manual note ignored (%s): %s [id=%s]", reason, @@ -774,8 +887,9 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: reason = ( "no NexaNote frontmatter id; legacy/manual Markdown file" ) - self.sync_state.mark_ignored(remote_path, reason) + self._apply_mark_ignored(remote_path, reason) report.notes_ignored_legacy += 1 + plan.add_ignore(remote_path, reason) logger.info( " ⊘ Legacy/manual note ignored (%s): %s [id=%s]", reason, @@ -789,7 +903,11 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: # resolver requires same-id notes), but refresh the mapping so the # registry stays current. The local copy is canonical here. if matched_via_remote_path and local_note is not None: - self.sync_state.mark_adopted(remote_path, local_note.id) + self._apply_mark_adopted(remote_path, local_note.id) + plan.add_warning( + f"remote path changed id on server; kept local note " + f"{local_note.id[:8]} for {remote_path}" + ) logger.info( " ↺ Remote_path match (id mismatch — keeping local %s): %s", local_note.id, @@ -812,9 +930,10 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: if local_note is None: # Fresh adoption. The id is non-legacy (filtered above) so it # is safe to use as-is. - self.db.save_note(remote_note) - self.sync_state.mark_adopted(remote_path, remote_note.id) + self._apply_save_note(remote_note) + self._apply_mark_adopted(remote_path, remote_note.id) report.notes_pulled += 1 + plan.add_pull(remote_note.id, remote_note.title) report.events.append(SyncEvent( SyncEventType.NOTE_PULLED, f"Nouvelle note importée : {remote_note.title}", @@ -823,32 +942,58 @@ def _pull_note(self, nb_slug: str, note_slug: str, report: SyncReport) -> None: logger.info(f" ← Importée : {remote_note.title}") elif local_note.sync_status == SyncStatus.MODIFIED: - # Conflit potentiel — résoudre + # Conflict path — local has unsynced edits and a remote copy + # exists. Snapshot the local version *before* resolving so its + # edits can be preserved even when the chosen strategy would + # overwrite them. + local_snapshot = copy.deepcopy(local_note) result = self.resolver.resolve(local_note, remote_note) - self.db.save_note(result.winner) - if result.conflict_copy: - self.db.save_note(result.conflict_copy) + # A genuine conflict = both sides changed. The resolver reports + # no conflict when the timestamps match (identical versions). + is_real_conflict = result.had_conflict() or ( + local_snapshot.updated_at != remote_note.updated_at + ) + local_won = local_snapshot.updated_at >= remote_note.updated_at - self.sync_state.mark_adopted(remote_path, result.winner.id) + # Conflict safety net: never silently drop local edits. If the + # remote version won and the strategy kept no copy of the local + # one, synthesise one so both versions survive on disk. + conflict_copy = result.conflict_copy + if is_real_conflict and conflict_copy is None and not local_won: + conflict_copy = _make_local_conflict_copy(local_snapshot) + + self._apply_save_note(result.winner) + if conflict_copy is not None: + self._apply_save_note(conflict_copy) + + self._apply_mark_adopted(remote_path, result.winner.id) report.notes_pulled += 1 report.conflicts_resolved += 1 + if is_real_conflict: + plan.add_conflict( + note_id, + local_snapshot.title, + result.message, + preserved_both=conflict_copy is not None, + ) report.events.append(SyncEvent( SyncEventType.CONFLICT_RESOLVED, result.message, note_id=note_id, )) - logger.info(f" ⚡ Conflit résolu : {local_note.title} — {result.message}") + logger.info(f" ⚡ Conflit résolu : {local_snapshot.title} — {result.message}") else: # Pas de modification locale — appliquer la version distante si plus récente if remote_note.updated_at > local_note.updated_at: - self.db.save_note(remote_note) + self._apply_save_note(remote_note) report.notes_pulled += 1 + plan.add_pull(remote_note.id, remote_note.title) logger.info(f" ↓ Mise à jour : {remote_note.title}") # Always refresh the mapping so future pulls go fast even when # the content hasn't changed. - self.sync_state.mark_adopted(remote_path, local_note.id) + self._apply_mark_adopted(remote_path, local_note.id) # ------------------------------------------------------------------ # PUSH — envoyer les notes modifiées localement @@ -859,6 +1004,7 @@ def _push(self, report: SyncReport) -> None: Envoie toutes les notes marquées MODIFIED ou LOCAL_ONLY vers le serveur WebDAV. """ + plan = self._ensure_plan() report.events.append(SyncEvent(SyncEventType.PUSH_START, "Push vers le serveur")) logger.info("PUSH — envoi des notes locales modifiées") @@ -871,6 +1017,12 @@ def _push(self, report: SyncReport) -> None: logger.debug(f" {len(to_push)} notes à pousser") for note in to_push: + # Dry-run records the intent to push but performs no network PUT + # — it must never modify the remote server either. + if self.dry_run: + plan.add_push(note.id, note.title) + report.notes_pushed += 1 + continue try: full_note = self.db.get_note(note.id, load_pages=True) if full_note: @@ -958,8 +1110,9 @@ def _push_note(self, note: Note, report: SyncReport) -> None: if meta_ok and pages_ok: # Marquer comme SYNCED note.sync_status = SyncStatus.SYNCED - self.db.save_note(note, save_pages=False) + self._apply_save_note(note, save_pages=False) report.notes_pushed += 1 + self._ensure_plan().add_push(note.id, note.title) report.events.append(SyncEvent( SyncEventType.NOTE_PUSHED, f"Note envoyée : {note.title}", diff --git a/nexanote/sync/plan.py b/nexanote/sync/plan.py new file mode 100644 index 0000000..fb21ae2 --- /dev/null +++ b/nexanote/sync/plan.py @@ -0,0 +1,161 @@ +""" +NexaNote — Sync planning / Plan de synchronisation. + +EN: A ``SyncPlan`` is the *intent* of a sync session: what the engine + would push, pull, ignore, treat as a conflict, or warn about. It is + populated as the engine makes decisions and is the single source the + dry-run mode and the sanitized sync log read from. + + The plan deliberately carries **no note body content** — only stable, + non-sensitive metadata (note ids, titles, remote paths, short reasons). + That invariant is what lets us serialise the plan straight into a + diagnostic log without leaking note text or secrets. + +FR: Un ``SyncPlan`` représente l'intention d'une session de sync : ce qui + serait poussé, tiré, ignoré, considéré comme conflit ou signalé. Il est + rempli au fil des décisions du moteur et sert de source unique au mode + dry-run et au journal de sync. + + Le plan ne contient **aucun contenu de note** — uniquement des + métadonnées non sensibles (ids, titres, chemins distants, motifs). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass +class PlannedNote: + """A note slated to be pushed or pulled. Title is metadata, never body.""" + + note_id: str + title: str + + def to_dict(self) -> dict: + return {"id": self.note_id, "title": self.title} + + +@dataclass +class PlannedIgnore: + """A remote path the engine will skip, with a short reason.""" + + remote_path: str + reason: str + + def to_dict(self) -> dict: + return {"remote_path": self.remote_path, "reason": self.reason} + + +@dataclass +class PlannedConflict: + """ + EN: A note where the local copy has unsynced edits *and* the remote + copy differs — both sides changed. ``preserved_both`` records that + we kept a copy of the local version instead of overwriting it. + FR: Une note modifiée localement dont la version distante diffère — + les deux côtés ont changé. ``preserved_both`` indique qu'une copie + locale a été conservée plutôt qu'écrasée. + """ + + note_id: str + title: str + resolution: str + preserved_both: bool + + def to_dict(self) -> dict: + return { + "id": self.note_id, + "title": self.title, + "resolution": self.resolution, + "preserved_both": self.preserved_both, + } + + +@dataclass +class SyncPlan: + """ + EN: The intent of a single sync session. Built before (and as) changes + are applied; in dry-run mode it is built without any writes at all. + FR: L'intention d'une session de sync. Construit avant (et pendant) + l'application des changements ; en dry-run, sans aucune écriture. + """ + + notes_to_push: list[PlannedNote] = field(default_factory=list) + notes_to_pull: list[PlannedNote] = field(default_factory=list) + notes_to_ignore: list[PlannedIgnore] = field(default_factory=list) + conflicts: list[PlannedConflict] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + # ------------------------------------------------------------------ + # Recording helpers + # ------------------------------------------------------------------ + + def add_push(self, note_id: str, title: str) -> None: + self.notes_to_push.append(PlannedNote(note_id=note_id, title=title)) + + def add_pull(self, note_id: str, title: str) -> None: + self.notes_to_pull.append(PlannedNote(note_id=note_id, title=title)) + + def add_ignore(self, remote_path: str, reason: str) -> None: + self.notes_to_ignore.append( + PlannedIgnore(remote_path=remote_path, reason=reason) + ) + + def add_conflict( + self, + note_id: str, + title: str, + resolution: str, + preserved_both: bool, + ) -> None: + self.conflicts.append( + PlannedConflict( + note_id=note_id, + title=title, + resolution=resolution, + preserved_both=preserved_both, + ) + ) + + def add_warning(self, message: str) -> None: + self.warnings.append(message) + + # ------------------------------------------------------------------ + # Views + # ------------------------------------------------------------------ + + def counts(self) -> dict: + return { + "to_push": len(self.notes_to_push), + "to_pull": len(self.notes_to_pull), + "to_ignore": len(self.notes_to_ignore), + "conflicts": len(self.conflicts), + "warnings": len(self.warnings), + } + + def is_empty(self) -> bool: + return not ( + self.notes_to_push + or self.notes_to_pull + or self.notes_to_ignore + or self.conflicts + or self.warnings + ) + + def to_dict(self) -> dict: + return { + "notes_to_push": [n.to_dict() for n in self.notes_to_push], + "notes_to_pull": [n.to_dict() for n in self.notes_to_pull], + "notes_to_ignore": [i.to_dict() for i in self.notes_to_ignore], + "conflicts": [c.to_dict() for c in self.conflicts], + "warnings": list(self.warnings), + } + + +__all__ = [ + "PlannedNote", + "PlannedIgnore", + "PlannedConflict", + "SyncPlan", +] diff --git a/nexanote/sync/sync_log.py b/nexanote/sync/sync_log.py new file mode 100644 index 0000000..5b1db18 --- /dev/null +++ b/nexanote/sync/sync_log.py @@ -0,0 +1,198 @@ +""" +NexaNote — Sanitized sync log / Journal de synchronisation assaini. + +EN: Writes a single ``latest.json`` diagnostic file after each sync session + so users can answer "what did the last sync actually do?" without + digging through server logs. The file lives at + ``/sync_logs/latest.json`` — in the Docker image where + ``data_dir`` is ``/data`` this resolves to ``/data/sync_logs/latest.json``. + The path is derived from the configured data dir, never hardcoded, so + it works the same in tests (tmp dir) and in any deployment. + + The log is *sanitized by construction*: + * it is built only from the ``SyncReport`` (counts, timing, errors) + and the ``SyncPlan`` (ids, titles, remote paths, reasons); + * neither of those ever carries note body text, so no body can leak; + * error strings are additionally run through :func:`sanitize_error` + to strip URLs (which embed the server host) and any + ``key=value`` pair that looks like a credential. + +FR: Écrit un unique fichier ``latest.json`` après chaque session de sync, + à ``/sync_logs/latest.json`` (donc ``/data/sync_logs/latest.json`` + dans l'image Docker). Le chemin dérive du data_dir configuré, jamais + codé en dur. Le journal est assaini par construction : aucun corps de + note, aucun secret ; les erreurs passent par :func:`sanitize_error`. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import tempfile +from pathlib import Path +from typing import TYPE_CHECKING, Optional + +from nexanote.sync.plan import SyncPlan + +if TYPE_CHECKING: # pragma: no cover - typing only, avoids circular import + from nexanote.sync.client import SyncReport + +logger = logging.getLogger("nexanote.sync.log") + +SYNC_LOG_DIRNAME = "sync_logs" +SYNC_LOG_FILENAME = "latest.json" +SYNC_LOG_VERSION = 1 + +# EN: A URL can embed the server host (and occasionally basic-auth +# userinfo). We never want either in a diagnostic file, so any URL is +# collapsed to a placeholder. +# FR: Une URL peut contenir l'hôte (voire user:pass@). On la remplace. +_URL_RE = re.compile(r"https?://[^\s\"'<>]+", re.IGNORECASE) + +# EN: Defensive scrub for any ``secret = value`` / ``token: value`` pair that +# might slip into an error message. The engine already avoids leaking +# these, but a sync log is a durable artifact — belt and braces. +# FR: Filet de sécurité pour toute paire ``secret=valeur`` dans un message. +_KV_SECRET_RE = re.compile( + r"(?i)\b(password|passwd|pwd|token|secret|api[-_]?key|authorization|auth|credentials?)\s*[=:]\s*\S+" +) + + +def sanitize_error(message: object) -> str: + """ + EN: Render an error into a short, log-safe string: strip URLs and any + credential-looking ``key=value`` pairs. HTTP status reasons such as + "401 Unauthorized" are preserved — they carry no secret. + FR: Convertit une erreur en chaîne sûre : retire URLs et paires + ressemblant à des identifiants. Conserve les motifs HTTP ("401…"). + """ + if message is None: + return "" + text = str(message) + text = _URL_RE.sub("", text) + text = _KV_SECRET_RE.sub(lambda m: f"{m.group(1)}=", text) + return text + + +def sync_log_path(data_dir: Path | str) -> Path: + """Return ``/sync_logs/latest.json`` for the given data dir.""" + return Path(data_dir) / SYNC_LOG_DIRNAME / SYNC_LOG_FILENAME + + +def build_sync_log( + report: "SyncReport", + plan: Optional[SyncPlan], + *, + dry_run: bool = False, +) -> dict: + """ + EN: Assemble the sanitized log payload from a finished ``SyncReport`` + and its ``SyncPlan``. Contains ids/titles/paths/reasons/counts only. + FR: Construit la charge utile assainie à partir du rapport terminé et + de son plan. Contient seulement ids/titres/chemins/motifs/compteurs. + """ + plan = plan or SyncPlan() + + started = getattr(report, "started_at", None) + finished = getattr(report, "finished_at", None) + timestamp = finished or started + + errors = [sanitize_error(e) for e in getattr(report, "errors", [])] + + payload = { + "version": SYNC_LOG_VERSION, + "timestamp": timestamp.isoformat() if timestamp else None, + "started_at": started.isoformat() if started else None, + "finished_at": finished.isoformat() if finished else None, + "duration_seconds": round(report.duration_seconds(), 3), + "dry_run": bool(dry_run), + "success": report.success(), + "counts": { + "pulled": getattr(report, "notes_pulled", 0), + "pushed": getattr(report, "notes_pushed", 0), + "conflicts": getattr(report, "conflicts_resolved", 0), + "ignored_legacy": getattr(report, "notes_ignored_legacy", 0), + "errors": len(errors), + }, + # Note metadata only — ids and titles, never body content. + "pushed": [n.to_dict() for n in plan.notes_to_push], + "pulled": [n.to_dict() for n in plan.notes_to_pull], + "ignored": [i.to_dict() for i in plan.notes_to_ignore], + "conflicts": [c.to_dict() for c in plan.conflicts], + "warnings": list(plan.warnings), + "errors": errors, + } + return payload + + +def write_sync_log( + data_dir: Path | str, + report: "SyncReport", + plan: Optional[SyncPlan], + *, + dry_run: bool = False, +) -> Optional[Path]: + """ + EN: Write the sanitized log to ``/sync_logs/latest.json`` + atomically (tmp file + ``os.replace``). Never raises — a failed log + write must not break sync. Returns the path on success, else None. + FR: Écrit le journal assaini de manière atomique. Ne lève jamais — un + échec d'écriture ne doit pas casser la sync. Retourne le chemin. + """ + path = sync_log_path(data_dir) + try: + payload = build_sync_log(report, plan, dry_run=dry_run) + data = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8") + except Exception: + logger.exception("could not build sync log payload") + return None + + try: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_fd, tmp_path = tempfile.mkstemp( + prefix=f".{path.name}.", + suffix=".tmp", + dir=str(path.parent), + ) + try: + with os.fdopen(tmp_fd, "wb") as f: + f.write(data) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, path) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + except OSError as exc: + logger.warning("sync log write failed (%s): %s", path, exc) + return None + return path + + +def read_sync_log(data_dir: Path | str) -> Optional[dict]: + """Return the parsed latest sync log, or None when absent/unreadable.""" + path = sync_log_path(data_dir) + if not path.exists(): + return None + try: + return json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + logger.warning("sync log unreadable (%s): %s", path, exc) + return None + + +__all__ = [ + "SYNC_LOG_DIRNAME", + "SYNC_LOG_FILENAME", + "SYNC_LOG_VERSION", + "build_sync_log", + "read_sync_log", + "sanitize_error", + "sync_log_path", + "write_sync_log", +] diff --git a/tests/test_sync_reliability.py b/tests/test_sync_reliability.py new file mode 100644 index 0000000..83d0de6 --- /dev/null +++ b/tests/test_sync_reliability.py @@ -0,0 +1,516 @@ +""" +NexaNote — Sync reliability & diagnostics tests. + +EN: Covers the reliability layer added around the WebDAV sync engine: + * a ``SyncPlan`` that records intent (push / pull / ignore / conflict); + * a dry-run mode that builds the plan but writes nothing; + * a sanitized ``/sync_logs/latest.json`` diagnostic log; + * conflict safety — both-changed notes are detected and never silently + overwritten; + * idempotent state — a failed sync leaves the sync-state file intact. + +FR: Couvre la couche de fiabilité ajoutée autour du moteur de sync WebDAV : + plan de sync, mode dry-run, journal assaini, sécurité des conflits, et + état non corrompu après un échec. +""" + +from __future__ import annotations + +import json +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from nexanote.models.note import Note, Notebook, NoteType, SyncStatus +from nexanote.storage import FileNoteStore +from nexanote.sync.client import NexaNoteSyncEngine, SyncConfig, SyncReport +from nexanote.sync.plan import SyncPlan +from nexanote.sync.sync_log import ( + SYNC_LOG_FILENAME, + build_sync_log, + read_sync_log, + sanitize_error, + sync_log_path, +) +from nexanote.sync.sync_state import SYNC_STATE_FILENAME, SyncState + + +# --------------------------------------------------------------------------- +# Stub WebDAV client +# --------------------------------------------------------------------------- + + +class StubWebDAVClient: + """ + EN: In-memory stand-in for ``WebDAVClient``. Returns canned remote + layouts and records every PUT so tests can assert that a dry-run + never touched the server. + FR: Remplaçant en mémoire pour ``WebDAVClient``, avec suivi des PUT. + """ + + def __init__(self, layout: Optional[dict] = None) -> None: + # layout = {nb_slug: {note_slug: meta_dict}} + self.layout = layout or {} + self.put_calls: list[tuple] = [] + self.mkcol_calls: list[str] = [] + self.online = True + self.put_result: tuple = (True, None) + + def ping(self) -> bool: + return self.online + + def list_notebooks(self) -> list[dict]: + return [ + {"name": nb, "is_collection": True, "href": f"/{nb}/"} + for nb in self.layout + ] + + def list_notes(self, notebook_slug: str) -> list[dict]: + return [ + {"name": note, "is_collection": True, "href": f"/{notebook_slug}/{note}/"} + for note in self.layout.get(notebook_slug, {}) + ] + + def get_note_meta(self, notebook_slug: str, note_slug: str) -> Optional[dict]: + return self.layout.get(notebook_slug, {}).get(note_slug) + + def get_ink_page(self, notebook_slug: str, note_slug: str, page_num: int): + return None + + def create_notebook_dir(self, notebook_slug: str) -> bool: + self.mkcol_calls.append(notebook_slug) + return True + + def create_note_dir(self, notebook_slug: str, note_slug: str) -> bool: + self.mkcol_calls.append(f"{notebook_slug}/{note_slug}") + return True + + def put_note_meta(self, *args, **kwargs): + self.put_calls.append(("meta", args, kwargs)) + return self.put_result + + def put_ink_page(self, *args, **kwargs): + self.put_calls.append(("ink", args, kwargs)) + return self.put_result + + +def _meta(note_id: str, title: str, body: str, updated_iso: str) -> dict: + """A note.json payload as the WebDAV provider would synthesise it.""" + return { + "id": note_id, + "title": title, + "type": "typed", + "tags": [], + "is_pinned": False, + "created_at": "2026-01-01T00:00:00+00:00", + "updated_at": updated_iso, + "pages": [ + {"page_number": 1, "template": "blank", "typed_content": body}, + ], + } + + +def _make_engine(data_dir: Path, layout: dict, *, dry_run: bool = False): + db = FileNoteStore(data_dir) + engine = NexaNoteSyncEngine( + db, + SyncConfig( + server_url="http://stub.invalid/", + username="u", + password="hunter2-secret", + timeout_seconds=1, + ), + dry_run=dry_run, + ) + stub = StubWebDAVClient(layout) + engine.client = stub + return engine, stub + + +def _note_count(db: FileNoteStore) -> int: + return len(db.list_notes(include_archived=True)) + + +REAL_ID = "abc12345-6789-4abc-9def-0123456789ab" +LEGACY_ID = "md.TGVnYWN5" # "Legacy" base64-ish — the synthesised plain-md form + + +# --------------------------------------------------------------------------- +# 1. Dry-run does not modify files or state +# --------------------------------------------------------------------------- + + +class TestDryRun: + def _layout_with_pull_and_ignore(self) -> dict: + return { + "carnet__01234567": { + f"remote__{REAL_ID[:8]}": _meta( + REAL_ID, "From Server", "remote body", "2026-02-01T00:00:00+00:00" + ), + "legacy__md.TGVnYWN5": _meta( + LEGACY_ID, "Legacy", "hand-edited", "2026-02-01T00:00:00+00:00" + ), + } + } + + def test_dry_run_writes_no_files_or_state(self, tmp_path): + data_dir = tmp_path / "client" + engine, stub = _make_engine( + data_dir, self._layout_with_pull_and_ignore(), dry_run=True + ) + + # A local note that a real sync would push. + local = Note(title="Pushable", note_type=NoteType.TYPED) + local.add_page().typed_content = "to push" + engine.db.save_note(local) + assert engine.db.get_note(local.id).sync_status == SyncStatus.LOCAL_ONLY + + before = _note_count(engine.db) + report = engine.sync() + + # Plan reflects what *would* happen. + assert report.dry_run is True + plan = report.plan + assert plan is not None + assert [n.note_id for n in plan.notes_to_push] == [local.id] + assert REAL_ID in [n.note_id for n in plan.notes_to_pull] + assert any( + "legacy" in i.remote_path for i in plan.notes_to_ignore + ), plan.to_dict() + + # Nothing was applied: no remote note adopted, push note untouched. + assert _note_count(engine.db) == before + assert engine.db.get_note(REAL_ID) is None + assert engine.db.get_note(local.id).sync_status == SyncStatus.LOCAL_ONLY + + # No server writes, no sync-state file, no sync log file. + assert stub.put_calls == [] + assert not (data_dir / SYNC_STATE_FILENAME).exists() + assert not sync_log_path(data_dir).exists() + + def test_dry_run_leaves_existing_state_untouched(self, tmp_path): + data_dir = tmp_path / "client_state" + # Pre-seed a sync-state file and capture its bytes. + seed = SyncState.load(data_dir) + seed.mark_adopted("carnet__01234567/old__deadbeef", "deadbeef-uuid") + seed.save() + state_path = data_dir / SYNC_STATE_FILENAME + original = state_path.read_bytes() + + engine, _ = _make_engine( + data_dir, self._layout_with_pull_and_ignore(), dry_run=True + ) + engine.sync() + + # Byte-for-byte identical — the dry-run never rewrote the registry. + assert state_path.read_bytes() == original + + +# --------------------------------------------------------------------------- +# 2. Sync log is written after a real sync +# --------------------------------------------------------------------------- + + +class TestSyncLogWritten: + def test_log_written_with_expected_shape(self, tmp_path): + data_dir = tmp_path / "client" + layout = { + "carnet__01234567": { + f"remote__{REAL_ID[:8]}": _meta( + REAL_ID, "From Server", "remote body", "2026-02-01T00:00:00+00:00" + ), + } + } + engine, _ = _make_engine(data_dir, layout) + report = engine.sync() + assert report.success(), report.errors + + log_file = sync_log_path(data_dir) + assert log_file.exists() + # Path resolves to /sync_logs/latest.json. + assert log_file.parent.name == "sync_logs" + assert log_file.name == SYNC_LOG_FILENAME + + payload = json.loads(log_file.read_text("utf-8")) + assert payload["timestamp"] + assert payload["duration_seconds"] >= 0 + assert payload["dry_run"] is False + assert payload["success"] is True + assert payload["counts"]["pulled"] == 1 + # The pulled note is listed by id/title. + assert payload["pulled"][0]["id"] == REAL_ID + assert payload["pulled"][0]["title"] == "From Server" + + def test_dry_run_writes_no_log(self, tmp_path): + data_dir = tmp_path / "client_dry" + engine, _ = _make_engine(data_dir, {}, dry_run=True) + engine.sync() + assert not sync_log_path(data_dir).exists() + + +# --------------------------------------------------------------------------- +# 3. Sync log excludes secrets and note body content +# --------------------------------------------------------------------------- + + +class TestSyncLogSanitization: + def test_log_excludes_body_and_password(self, tmp_path): + data_dir = tmp_path / "client" + engine, stub = _make_engine(data_dir, {}) + + # A local note whose body must never reach the log. + local = Note(title="Secret Note", note_type=NoteType.TYPED) + local.add_page().typed_content = "TOP-SECRET-BODY-CONTENT-12345" + engine.db.save_note(local) + + report = engine.sync() + assert report.success(), report.errors + + raw = sync_log_path(data_dir).read_text("utf-8") + # The push is recorded by id + title only. + assert "Secret Note" in raw + assert local.id in raw + # Never the body, never the configured password. + assert "TOP-SECRET-BODY-CONTENT-12345" not in raw + assert "hunter2-secret" not in raw + + def test_log_sanitizes_error_urls(self, tmp_path): + data_dir = tmp_path / "client_err" + engine, stub = _make_engine(data_dir, {}) + # Force the push to fail with a reason that embeds a server URL. + stub.put_result = ( + False, + "WebDAV upload failed at https://nas.example.com:8765/dav/notes: 500", + ) + local = Note(title="Will Fail", note_type=NoteType.TYPED) + local.add_page().typed_content = "body" + engine.db.save_note(local) + + report = engine.sync() + assert not report.success() + + payload = read_sync_log(data_dir) + assert payload is not None + joined = json.dumps(payload) + # The host/URL is scrubbed; the useful HTTP status survives. + assert "nas.example.com" not in joined + assert "" in joined + assert "500" in joined + + def test_sanitize_error_unit(self): + assert sanitize_error("connect to https://host.tld:8765/dav failed") == ( + "connect to failed" + ) + assert "" in sanitize_error("auth password=hunter2 rejected") + assert "" in sanitize_error("Authorization: Bearer abc.def.ghi") + # HTTP status reasons carry no secret and must be preserved. + assert sanitize_error("WebDAV upload failed: 401 Unauthorized") == ( + "WebDAV upload failed: 401 Unauthorized" + ) + + def test_build_sync_log_carries_no_body(self): + """Even handed a report/plan directly, the builder emits no body.""" + report = SyncReport() + report.notes_pushed = 1 + report.finish() + plan = SyncPlan() + plan.add_push("id-1", "A Title") + payload = build_sync_log(report, plan) + text = json.dumps(payload) + assert "A Title" in text + assert "id-1" in text + # The plan/report never hold body, so none can appear. + assert all("typed_content" not in str(v) for v in payload.values()) + + +# --------------------------------------------------------------------------- +# 4. Ignored legacy files appear in diagnostics +# --------------------------------------------------------------------------- + + +class TestIgnoredDiagnostics: + def test_ignored_legacy_in_report_plan_and_log(self, tmp_path): + data_dir = tmp_path / "client" + layout = { + "carnet__01234567": { + "legacy__md.TGVnYWN5": _meta( + LEGACY_ID, "Legacy", "hand-edited", "2026-02-01T00:00:00+00:00" + ), + } + } + engine, _ = _make_engine(data_dir, layout) + report = engine.sync() + assert report.success(), report.errors + + # Report counter. + assert report.notes_ignored_legacy == 1 + # Plan lists the ignored remote path with a reason. + ignored = report.plan.notes_to_ignore + assert len(ignored) == 1 + assert ignored[0].remote_path == "carnet__01234567/legacy__md.TGVnYWN5" + assert ignored[0].reason + + # The sanitized log surfaces the ignored remote path too. + payload = read_sync_log(data_dir) + assert payload["counts"]["ignored_legacy"] == 1 + assert payload["ignored"][0]["remote_path"] == ( + "carnet__01234567/legacy__md.TGVnYWN5" + ) + # And the note was never adopted into the local store. + assert _note_count(engine.db) == 0 + + +# --------------------------------------------------------------------------- +# 5. Conflict is detected instead of silently overwritten +# --------------------------------------------------------------------------- + + +class TestConflictSafety: + def test_both_changed_preserves_local_copy(self, tmp_path): + data_dir = tmp_path / "client" + layout = { + "carnet__01234567": { + f"remote__{REAL_ID[:8]}": _meta( + REAL_ID, + "Doc", + "REMOTE-EDIT", + "2026-06-01T00:00:00+00:00", # remote is newer + ), + } + } + engine, _ = _make_engine(data_dir, layout) + + # Local note: same id, unsynced local edits, older timestamp. + local = Note(id=REAL_ID, title="Doc", note_type=NoteType.TYPED) + local.add_page().typed_content = "LOCAL-EDIT" + local.updated_at = datetime(2026, 1, 1, tzinfo=timezone.utc) + local.sync_status = SyncStatus.MODIFIED + engine.db.save_note(local) + + # Skip push so we exercise only the pull conflict path. + engine._push = lambda report: None + report = engine.sync() + assert report.success(), report.errors + + # The conflict was detected and marked, not silently resolved away. + assert report.conflicts_resolved == 1 + assert len(report.plan.conflicts) == 1 + conflict = report.plan.conflicts[0] + assert conflict.note_id == REAL_ID + assert conflict.preserved_both is True + + # Both versions survive: the local edit was NOT silently overwritten. + bodies = [] + for n in engine.db.list_notes(include_archived=True): + full = engine.db.get_note(n.id, load_pages=True) + bodies.append(full.pages[0].typed_content if full.pages else "") + assert any("LOCAL-EDIT" in b for b in bodies), bodies + assert any("REMOTE-EDIT" in b for b in bodies), bodies + + # The diagnostic log records the conflict (ids/titles only). + payload = read_sync_log(data_dir) + assert payload["counts"]["conflicts"] == 1 + assert payload["conflicts"][0]["id"] == REAL_ID + assert payload["conflicts"][0]["preserved_both"] is True + + def test_local_newer_does_not_spawn_redundant_copy(self, tmp_path): + """When local already wins, no redundant conflict copy is created.""" + data_dir = tmp_path / "client_localnew" + layout = { + "carnet__01234567": { + f"remote__{REAL_ID[:8]}": _meta( + REAL_ID, "Doc", "REMOTE-OLD", "2026-01-01T00:00:00+00:00" + ), + } + } + engine, _ = _make_engine(data_dir, layout) + + local = Note(id=REAL_ID, title="Doc", note_type=NoteType.TYPED) + local.add_page().typed_content = "LOCAL-NEW" + local.updated_at = datetime(2026, 6, 1, tzinfo=timezone.utc) # newer + local.sync_status = SyncStatus.MODIFIED + engine.db.save_note(local) + + engine._push = lambda report: None + report = engine.sync() + assert report.success(), report.errors + assert report.conflicts_resolved == 1 + # Local won outright — exactly one note, no spurious "(conflit …)" copy. + assert _note_count(engine.db) == 1 + assert report.plan.conflicts[0].preserved_both is False + + +# --------------------------------------------------------------------------- +# 6. A failed sync does not corrupt sync state +# --------------------------------------------------------------------------- + + +class TestFailedSyncKeepsStateValid: + def test_state_survives_push_exception(self, tmp_path): + data_dir = tmp_path / "client" + # Pre-seed a valid registry entry. + seed = SyncState.load(data_dir) + seed.mark_adopted("carnet__01234567/kept__deadbeef", "deadbeef-uuid") + seed.save() + + layout = { + "carnet__01234567": { + "legacy__md.TGVnYWN5": _meta( + LEGACY_ID, "Legacy", "x", "2026-02-01T00:00:00+00:00" + ), + } + } + engine, _ = _make_engine(data_dir, layout) + + # Pull marks the legacy note ignored; then push blows up mid-sync. + def _boom(report): + raise RuntimeError("simulated push crash") + + engine._push = _boom + report = engine.sync() + + # The crash was captured, not swallowed silently. + assert not report.success() + assert any("simulated push crash" in e for e in report.errors) + + # The state file is still valid JSON and fully reloadable. + state_path = data_dir / SYNC_STATE_FILENAME + assert state_path.exists() + json.loads(state_path.read_text("utf-8")) # must not raise + + reloaded = SyncState.load(data_dir) + # Pre-existing adoption preserved … + assert ( + reloaded.get_adopted_local_id("carnet__01234567/kept__deadbeef") + == "deadbeef-uuid" + ) + # … and the partial-run ignore decision was persisted by the finally + # block, so the next sync skips it immediately. + assert reloaded.is_ignored("carnet__01234567/legacy__md.TGVnYWN5") + + def test_unreachable_server_writes_failure_log_without_corruption(self, tmp_path): + data_dir = tmp_path / "client_offline" + engine, stub = _make_engine(data_dir, {}) + stub.online = False # ping fails + + report = engine.sync() + assert not report.success() + + # Even a failed connection produces a diagnostic log … + payload = read_sync_log(data_dir) + assert payload is not None + assert payload["success"] is False + assert payload["counts"]["errors"] >= 1 + # … with the server host scrubbed out of the error (it embeds a URL). + text = json.dumps(payload) + assert "stub.invalid" not in text + assert "" in text + # … and no sync-state file is corrupted (none needed to be written). + state_path = data_dir / SYNC_STATE_FILENAME + if state_path.exists(): + json.loads(state_path.read_text("utf-8"))