diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index d14ee12..6350448 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -755,6 +755,17 @@ def init(ctx: click.Context) -> None: " " + click.style("Indexing project", fg="cyan", bold=True) + "..." ) asyncio.run(_run_index(config, str(project_dir), full=True)) + + # Capture a baseline work_profile so the very first SessionStart + # already has something to inject. Best-effort: a partially-migrated + # db or missing v4 table must not break `cce init`. + try: + _refresh_work_profile(config, project_dir) + except Exception: # pragma: no cover - defensive + # `cce init` runs before there's a meaningful work history, so a + # failure here is almost never user-visible. Swallow silently. + pass + click.echo("") click.echo( click.style(" Done!", fg="green", bold=True) + @@ -763,6 +774,73 @@ def init(ctx: click.Context) -> None: click.echo("") +def _refresh_work_profile(config, project_dir: Path) -> dict | None: + """Rebuild the work_profile row for `project_dir` if missing or stale. + + Returns the persisted profile dict, or None if memory.db is + unavailable. Safe to call from `cce init` and `cce profile`. + """ + from context_engine.memory import db as memory_db + from context_engine.memory.work_profile import refresh_work_profile + + project_name = project_dir.name + storage_base = Path(config.storage_path) / project_name + db_path = memory_db.memory_db_path(storage_base) + if not db_path.exists() and not db_path.parent.exists(): + return None + conn = memory_db.connect(db_path) + try: + return refresh_work_profile(conn, project_name) + finally: + conn.close() + + +@main.command() +@click.option( + "--force", is_flag=True, + help="Regenerate even if the cached profile is fresh.", +) +@click.pass_context +def profile(ctx: click.Context, force: bool) -> None: + """Show (and refresh) the user work-profile injected at SessionStart. + + Extractive — pulled from session cadence, code_areas, rollup + summaries, and decisions — so there's no LLM or network call. + The block is regenerated automatically every 3 days, on + `cce init`, and any time you pass --force. + """ + from context_engine.memory import db as memory_db + from context_engine.memory.work_profile import ( + format_profile_block, refresh_work_profile, + ) + config = ctx.obj["config"] + project_dir = _safe_cwd() + project_name = project_dir.name + storage_base = Path(config.storage_path) / project_name + db_path = memory_db.memory_db_path(storage_base) + if not db_path.exists(): + _warn("No memory.db yet — run `cce serve` for a session first.") + return + conn = memory_db.connect(db_path) + try: + profile_data = refresh_work_profile( + conn, project_name, force=force, + ) + finally: + conn.close() + block = format_profile_block(profile_data) + if not block: + _warn( + "Not enough session history yet — let Claude Code run for a " + "few sessions, then retry." + ) + return + click.echo("") + click.echo(block) + click.echo("") + _ok(f"Work profile stored for {project_name}") + + @main.command() @click.option("--full", is_flag=True, help="Force full re-index of every file") @click.option("--path", type=str, default=None, help="Index only this file or directory") diff --git a/src/context_engine/memory/db.py b/src/context_engine/memory/db.py index 7425797..ee86d10 100644 --- a/src/context_engine/memory/db.py +++ b/src/context_engine/memory/db.py @@ -23,7 +23,7 @@ log = logging.getLogger(__name__) -CURRENT_VERSION = 3 +CURRENT_VERSION = 4 # bge-small-en-v1.5 — the default embedder used everywhere else in cce. # If the project's embedder swaps to a different model, vec tables are @@ -229,6 +229,27 @@ ] +# v4: work_profile — extractive summary of how the user works on this +# project, derived from their accumulated session history (cadence, +# top-touched files, recurring rollup keywords, decision volume). One +# row per project, refreshed by `cce summarize` and on `cce init`. Used +# alongside any per-session resume so each new Claude/Codex session +# opens with both "what is this project" and "how does this user +# typically work on it" context. +_SCHEMA_V4 = [ + """ + CREATE TABLE IF NOT EXISTS work_profile ( + project TEXT PRIMARY KEY, + cadence TEXT NOT NULL DEFAULT '', + top_files TEXT NOT NULL DEFAULT '', + recurring_themes TEXT NOT NULL DEFAULT '', + open_decisions INTEGER NOT NULL DEFAULT 0, + generated_at_epoch INTEGER NOT NULL + ) + """, +] + + def _vec_table_stmts(dim: int) -> list[str]: """vec0 virtual tables for the two surfaces session_recall actually reads. @@ -324,6 +345,8 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None: cur.execute(stmt) for stmt in _SCHEMA_V3: cur.execute(stmt) + for stmt in _SCHEMA_V4: + cur.execute(stmt) cur.execute( "INSERT INTO schema_versions (version, applied_at_epoch) " "VALUES (?, strftime('%s','now'))", @@ -338,7 +361,8 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None: # Existing db — apply additive upgrades up to CURRENT_VERSION. # v1 → v2: add vec tables + cleanup triggers (needs sqlite-vec). # v2 → v3: add savings_log (no extension dependency). - # If sqlite-vec is unavailable we can still apply v3, but we don't + # v3 → v4: add work_profile (no extension dependency). + # If sqlite-vec is unavailable we can still apply v3/v4, but we don't # stamp the version row so a future connection with vec loaded will # complete the v1 → v2 step. current = schema_version(conn) @@ -354,6 +378,9 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None: if current < 3: for stmt in _SCHEMA_V3: cur.execute(stmt) + if current < 4: + for stmt in _SCHEMA_V4: + cur.execute(stmt) if current < 2 and not has_vec: # No version bump — vec step still pending. conn.commit() diff --git a/src/context_engine/memory/hooks.py b/src/context_engine/memory/hooks.py index de384ad..f2d8c59 100644 --- a/src/context_engine/memory/hooks.py +++ b/src/context_engine/memory/hooks.py @@ -102,6 +102,18 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str: """ parts: list[str] = [] + # Extractive user work-profile (v4): cadence, top files, recurring + # rollup keywords, decision volume. Tolerates a missing v4 table on + # partially-migrated dbs so the rest of the resume still renders. + from context_engine.memory.work_profile import ( + format_profile_block, load_work_profile, + ) + try: + profile = load_work_profile(conn, project) + except sqlite3.Error: + profile = None + work_profile_block = format_profile_block(profile) if profile else "" + last_rollup = conn.execute( "SELECT id, rollup_summary, ended_at " "FROM sessions " @@ -118,7 +130,7 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str: savings_line = _build_savings_line(conn) - if not last_rollup and not decisions and not savings_line: + if not (last_rollup or decisions or savings_line or work_profile_block): return "" parts.append(f"## CCE memory · resuming {project}") @@ -126,6 +138,10 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str: # before display so the resume reads as natural prose. from context_engine.memory.grammar import expand as _grammar_expand + if work_profile_block: + parts.append("") + parts.append(work_profile_block) + if savings_line: parts.append("") parts.append(f"**{savings_line}**") diff --git a/src/context_engine/memory/work_profile.py b/src/context_engine/memory/work_profile.py new file mode 100644 index 0000000..b22fe17 --- /dev/null +++ b/src/context_engine/memory/work_profile.py @@ -0,0 +1,321 @@ +"""User work-profile, persisted in memory.db. + +Companion to project_summary: where project_summary describes *what the +project is*, this module describes *how the user works on it* — derived +purely from their accumulated session history so each new Claude / +Codex session opens with a "you typically work in X area, ship Y +sessions a week, and these are your recurring themes" preamble. + +Sources mined (all already populated by the lifecycle hooks): + + * ``sessions`` — cadence, prompt counts, last active + * ``code_areas`` — most-touched files via record_code_area() + * ``sessions.rollup_summary`` + — recurring keywords after stop-word stripping + * ``decisions`` — total decision count (proxy for "open + choices the agent should respect") + +Entirely extractive, no LLM dependency, no embeddings — runs in +milliseconds on a typical project's memory.db. Persisted in the v4 +``work_profile`` table and re-rendered by SessionStart's resume builder. +Regenerated lazily; callers refresh when the row is older than +``WORK_PROFILE_TTL_SECONDS`` (3 days by default — shorter than +project_summary's 7 because user patterns shift faster than codebase +architecture). +""" +from __future__ import annotations + +import logging +import re +import sqlite3 +import time +from collections import Counter + +from context_engine.memory.grammar import _FILLERS_ULTRA + +log = logging.getLogger(__name__) + + +WORK_PROFILE_TTL_SECONDS = 3 * 24 * 60 * 60 + +_TOP_FILES_N = 5 +_TOP_THEMES_N = 6 +# Minimum repetitions a token needs to count as a "recurring" theme. +# At 1 every word in the most-recent rollup would qualify, which adds +# noise rather than signal. +_MIN_THEME_REPEATS = 2 +# Length filter for theme tokens — drops single letters and lone digits +# that aren't useful even after stop-word stripping. +_MIN_THEME_LENGTH = 3 + +# Token regex: words, dotted module names (auth.routes), and +# snake_case / kebab-case identifiers. Keeps domain terms together +# rather than splitting "vector_store" into "vector" + "store". +_TOKEN_RE = re.compile(r"[a-z][a-z0-9]*(?:[._-][a-z0-9]+)*") + +# Stop-word set tuned for the contents of rollup summaries — combines +# grammar.py's _FILLERS_ULTRA (articles, modals, pronouns, weak verbs) +# with a small extra set of conversational-prose connectives that show +# up frequently in rollups but tell us nothing about *what was worked +# on*. Kept conservative: domain terms (cache, auth, retry, parser, +# index, …) are NOT in this list. +_THEME_STOPWORDS = _FILLERS_ULTRA | frozenset({ + "session", "sessions", "previous", "next", "again", + "fix", "fixed", "fixes", "fixing", + "add", "added", "adds", "adding", + "update", "updated", "updates", "updating", + "use", "used", "uses", "using", + "change", "changed", "changes", "changing", + "make", "made", "makes", "making", + "get", "got", "gets", "getting", + "set", "sets", "setting", "settings", + "new", "old", "good", "bad", + "one", "two", "three", "four", "five", + "first", "second", "third", "next", "last", "final", + "today", "yesterday", "tomorrow", + "ok", "yes", "no", "maybe", + "work", "works", "worked", "working", + "etc", "eg", "ie", + # Common boilerplate from auto-generated rollups + "ran", "running", "run", "runs", + "added", "removed", "changed", +}) + + +# ── Cadence ──────────────────────────────────────────────────────────── + + +def _compute_cadence(conn: sqlite3.Connection) -> tuple[str, int]: + """Return ("N sessions in D days · …", session_count). + + Empty string when fewer than 2 sessions exist (one data point can't + establish a cadence). second tuple element is the raw session count + so the caller can decide whether the whole block is worth emitting. + """ + row = conn.execute( + "SELECT COUNT(*) AS n, " + "MIN(started_at_epoch) AS first_epoch, " + "MAX(COALESCE(ended_at_epoch, started_at_epoch)) AS last_epoch, " + "AVG(NULLIF(prompt_count, 0)) AS avg_prompts " + "FROM sessions" + ).fetchone() + if not row or not row["n"]: + return ("", 0) + n = int(row["n"]) + if n < 2: + return ("", n) + + span_seconds = max(0, int(row["last_epoch"] or 0) - int(row["first_epoch"] or 0)) + span_days = max(1, span_seconds // 86_400) + parts = [f"{n} sessions over {span_days} day{'s' if span_days != 1 else ''}"] + + avg_prompts = row["avg_prompts"] + if avg_prompts is not None and avg_prompts >= 0.5: + parts.append(f"~{round(float(avg_prompts))} prompts/session") + + last_active_days = max( + 0, (int(time.time()) - int(row["last_epoch"] or 0)) // 86_400 + ) + if last_active_days == 0: + parts.append("active today") + elif last_active_days == 1: + parts.append("last active yesterday") + else: + parts.append(f"last active {last_active_days} days ago") + + return (" · ".join(parts), n) + + +# ── Top files ────────────────────────────────────────────────────────── + + +def _top_touched_files(conn: sqlite3.Connection) -> str: + """Top N file_paths by code_area count, formatted as a single line. + + `code_areas` is the explicit "I worked on this and want future-me + to find it fast" surface — much higher signal than mtime scans + because the user is curating it. + """ + try: + rows = conn.execute( + "SELECT file_path, COUNT(*) AS n " + "FROM code_areas " + "GROUP BY file_path " + "ORDER BY n DESC, MAX(created_at_epoch) DESC " + "LIMIT ?", + (_TOP_FILES_N,), + ).fetchall() + except sqlite3.Error as exc: + log.debug("top_touched_files query failed: %s", exc) + return "" + if not rows: + return "" + return ", ".join(f"{r['file_path']} (×{r['n']})" for r in rows) + + +# ── Recurring themes ────────────────────────────────────────────────── + + +def _extract_themes(conn: sqlite3.Connection) -> str: + """Tokenize every rollup_summary, drop stop-words, return top tokens. + + A "theme" is a token that: + - matches _TOKEN_RE (word, dotted, or snake/kebab identifier) + - is at least _MIN_THEME_LENGTH chars long + - is not in _THEME_STOPWORDS + - appears at least _MIN_THEME_REPEATS times across all rollups + Result is a comma-separated list of the top _TOP_THEMES_N tokens. + """ + try: + rows = conn.execute( + "SELECT rollup_summary FROM sessions " + "WHERE rollup_summary IS NOT NULL AND rollup_summary != ''" + ).fetchall() + except sqlite3.Error as exc: + log.debug("rollup theme scan failed: %s", exc) + return "" + if not rows: + return "" + + counts: Counter[str] = Counter() + for r in rows: + text = (r["rollup_summary"] or "").lower() + for tok in _TOKEN_RE.findall(text): + if len(tok) < _MIN_THEME_LENGTH: + continue + if tok in _THEME_STOPWORDS: + continue + counts[tok] += 1 + + themes = [ + tok for tok, n in counts.most_common(_TOP_THEMES_N * 3) + if n >= _MIN_THEME_REPEATS + ] + return ", ".join(themes[:_TOP_THEMES_N]) + + +# ── Decision volume ─────────────────────────────────────────────────── + + +def _open_decisions_count(conn: sqlite3.Connection) -> int: + """Total decisions recorded — proxy for "how many durable choices + the agent should respect across sessions".""" + try: + row = conn.execute("SELECT COUNT(*) AS n FROM decisions").fetchone() + return int(row["n"]) if row else 0 + except sqlite3.Error: + return 0 + + +# ── Public API ──────────────────────────────────────────────────────── + + +def build_work_profile(conn: sqlite3.Connection) -> dict: + """Compute (but do not persist) a fresh work profile dict.""" + cadence, session_count = _compute_cadence(conn) + return { + "cadence": cadence, + "top_files": _top_touched_files(conn), + "recurring_themes": _extract_themes(conn), + "open_decisions": _open_decisions_count(conn), + "session_count": session_count, + "generated_at_epoch": int(time.time()), + } + + +def upsert_work_profile( + conn: sqlite3.Connection, project: str, profile: dict +) -> None: + """Persist `profile` for `project`, replacing any prior row.""" + conn.execute( + """ + INSERT INTO work_profile + (project, cadence, top_files, recurring_themes, + open_decisions, generated_at_epoch) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(project) DO UPDATE SET + cadence = excluded.cadence, + top_files = excluded.top_files, + recurring_themes = excluded.recurring_themes, + open_decisions = excluded.open_decisions, + generated_at_epoch = excluded.generated_at_epoch + """, + ( + project, + profile.get("cadence", ""), + profile.get("top_files", ""), + profile.get("recurring_themes", ""), + int(profile.get("open_decisions", 0)), + int(profile.get("generated_at_epoch", time.time())), + ), + ) + conn.commit() + + +def load_work_profile( + conn: sqlite3.Connection, project: str +) -> dict | None: + """Return the persisted profile dict for `project`, or None.""" + row = conn.execute( + "SELECT cadence, top_files, recurring_themes, open_decisions, " + "generated_at_epoch FROM work_profile WHERE project = ?", + (project,), + ).fetchone() + if row is None: + return None + return dict(row) + + +def is_stale( + profile: dict, ttl_seconds: int = WORK_PROFILE_TTL_SECONDS +) -> bool: + """True when `profile` was generated more than `ttl_seconds` ago.""" + age = int(time.time()) - int(profile.get("generated_at_epoch", 0)) + return age > ttl_seconds + + +def format_profile_block(profile: dict) -> str: + """Render `profile` as a Markdown block for the resume hook. + + Empty string when there's nothing to say (e.g. one-session + project with no recorded code_areas or decisions) so the caller + can suppress the section on a fresh project. + """ + cadence = (profile.get("cadence") or "").strip() + files = (profile.get("top_files") or "").strip() + themes = (profile.get("recurring_themes") or "").strip() + decisions = int(profile.get("open_decisions", 0) or 0) + if not (cadence or files or themes or decisions): + return "" + + lines = ["**Your work profile** (extracted from prior sessions)"] + if cadence: + lines.append(f" {cadence}") + if files: + lines.append(f" _Most-touched files:_ {files}") + if themes: + lines.append(f" _Recurring themes:_ {themes}") + if decisions: + lines.append( + f" _{decisions} decision{'s' if decisions != 1 else ''} on " + "file — call `session_recall(\"\")` to read them._" + ) + return "\n".join(lines) + + +def refresh_work_profile( + conn: sqlite3.Connection, project: str, *, force: bool = False +) -> dict: + """Rebuild and persist if missing, stale, or force=True. + + Idempotent; returns the (possibly-just-regenerated) profile dict. + Callers should use this rather than calling build/upsert directly + so the TTL stays in one place. + """ + if not force: + existing = load_work_profile(conn, project) + if existing and not is_stale(existing): + return existing + profile = build_work_profile(conn) + upsert_work_profile(conn, project, profile) + return profile diff --git a/tests/memory/test_work_profile.py b/tests/memory/test_work_profile.py new file mode 100644 index 0000000..677ee05 --- /dev/null +++ b/tests/memory/test_work_profile.py @@ -0,0 +1,399 @@ +"""Tests for the work_profile table and extractive builder. + +work_profile is the "how the user works on this project" companion to +project_summary. Covered here: + + - schema v4 migration adds the table + - cadence calculation (count, span, avg prompts, last-active) + - top file aggregation from code_areas + - recurring-theme extraction with stop-word stripping + - upsert / load / is_stale round-trip + - format_profile_block omits empty sections + - refresh_work_profile honours the TTL but rebuilds on force + - build_session_resume includes the work-profile block +""" +from __future__ import annotations + +import time + +from context_engine.memory import db as memory_db +from context_engine.memory.hooks import build_session_resume +from context_engine.memory.work_profile import ( + WORK_PROFILE_TTL_SECONDS, + build_work_profile, + format_profile_block, + is_stale, + load_work_profile, + refresh_work_profile, + upsert_work_profile, +) + + +# ── Schema ───────────────────────────────────────────────────────────── + + +def test_work_profile_table_exists_after_connect(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + row = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' " + "AND name='work_profile'" + ).fetchone() + assert row is not None, "work_profile table missing" + cols = {r[1] for r in conn.execute("PRAGMA table_info(work_profile)")} + for expected in ( + "project", "cadence", "top_files", "recurring_themes", + "open_decisions", "generated_at_epoch", + ): + assert expected in cols, f"missing column {expected}" + finally: + conn.close() + + +# ── Upsert / load / is_stale ─────────────────────────────────────────── + + +def test_upsert_round_trip(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + payload = { + "cadence": "4 sessions over 6 days · ~3 prompts/session", + "top_files": "src/a.py (×5), src/b.py (×2)", + "recurring_themes": "retry, cache, auth", + "open_decisions": 7, + "generated_at_epoch": 1700000000, + } + upsert_work_profile(conn, "demo", payload) + loaded = load_work_profile(conn, "demo") + assert loaded is not None + assert loaded["cadence"].startswith("4 sessions") + assert "src/a.py (×5)" in loaded["top_files"] + assert loaded["recurring_themes"] == "retry, cache, auth" + assert loaded["open_decisions"] == 7 + assert loaded["generated_at_epoch"] == 1700000000 + finally: + conn.close() + + +def test_load_returns_none_when_absent(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + assert load_work_profile(conn, "nope") is None + finally: + conn.close() + + +def test_is_stale_true_when_old(): + profile = { + "generated_at_epoch": int(time.time()) - WORK_PROFILE_TTL_SECONDS - 10, + } + assert is_stale(profile) is True + + +def test_is_stale_false_when_fresh(): + assert is_stale({"generated_at_epoch": int(time.time())}) is False + + +# ── Cadence ──────────────────────────────────────────────────────────── + + +def _seed_session(conn, sid, started_epoch, ended_epoch, prompt_count, rollup=None): + conn.execute( + "INSERT INTO sessions (id, project, started_at_epoch, started_at, " + "ended_at_epoch, ended_at, status, prompt_count, " + "rollup_summary, rollup_summary_at_epoch) VALUES " + "(?, 'demo', ?, ?, ?, ?, 'completed', ?, ?, ?)", + ( + sid, started_epoch, f"start-{sid}", + ended_epoch, f"end-{sid}", + prompt_count, + rollup, ended_epoch if rollup else None, + ), + ) + + +def test_cadence_skipped_with_only_one_session(tmp_path): + """One data point isn't a cadence — leave the line blank.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + _seed_session(conn, "only", 1700000000, 1700001000, 5) + conn.commit() + profile = build_work_profile(conn) + assert profile["cadence"] == "" + assert profile["session_count"] == 1 + finally: + conn.close() + + +def test_cadence_reports_span_and_average(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + # 3 sessions, 2 days apart, average 4 prompts/session. + for i, prompts in enumerate([3, 5, 4]): + _seed_session( + conn, f"s{i}", + started_epoch=1700000000 + i * 86400 * 2, + ended_epoch=1700001000 + i * 86400 * 2, + prompt_count=prompts, + ) + conn.commit() + profile = build_work_profile(conn) + assert "3 sessions" in profile["cadence"] + assert "days" in profile["cadence"] + # avg of 3, 5, 4 = 4 → "~4 prompts/session" + assert "~4 prompts/session" in profile["cadence"] + + + finally: + conn.close() + + +# ── Top files ────────────────────────────────────────────────────────── + + +def test_top_files_ranks_by_frequency(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + for path, n in [ + ("src/auth.py", 5), + ("src/db.py", 3), + ("src/cli.py", 1), + ]: + for i in range(n): + conn.execute( + "INSERT INTO code_areas (file_path, description, source, " + "created_at_epoch) VALUES (?, ?, 'manual', ?)", + (path, f"work {i}", 1700000000 + i), + ) + conn.commit() + profile = build_work_profile(conn) + top = profile["top_files"] + # auth.py first (5), then db.py (3), then cli.py (1) + assert top.index("auth.py") < top.index("db.py") < top.index("cli.py") + assert "(×5)" in top + assert "(×3)" in top + assert "(×1)" in top + finally: + conn.close() + + +def test_top_files_empty_when_no_code_areas(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + assert build_work_profile(conn)["top_files"] == "" + finally: + conn.close() + + +# ── Recurring themes ────────────────────────────────────────────────── + + +def test_themes_extract_repeating_tokens_and_drop_stopwords(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + rollups = [ + "Built retry logic for the auth client. Used jittered backoff.", + "Fixed cache invalidation in the auth path; retry budgets confirmed.", + "Worked on cache eviction and retry semantics; we keep refining auth.", + "Investigated retry storms when cache misses spike.", + ] + for i, r in enumerate(rollups): + _seed_session( + conn, f"s{i}", + started_epoch=1700000000 + i * 86400, + ended_epoch=1700001000 + i * 86400, + prompt_count=3, + rollup=r, + ) + conn.commit() + themes = build_work_profile(conn)["recurring_themes"] + # "retry" and "cache" appear in all four — must be top themes. + assert "retry" in themes + assert "cache" in themes + assert "auth" in themes + # Stopwords / common verbs must NOT leak in. + for stop in ("the", "for", "and", "fixed", "used", "worked"): + assert stop not in themes.split(", "), ( + f"stopword {stop!r} leaked into themes: {themes!r}" + ) + finally: + conn.close() + + +def test_themes_require_min_repeats(tmp_path): + """Single-occurrence tokens don't count as recurring.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + # 'globally' and 'whimsical' each appear once → must NOT make it. + # 'cache' appears twice → should appear. + _seed_session( + conn, "s1", 1700000000, 1700001000, 3, + rollup="Did one whimsical thing with the cache today.", + ) + _seed_session( + conn, "s2", 1700100000, 1700101000, 3, + rollup="Refined cache invalidation again globally.", + ) + conn.commit() + themes = build_work_profile(conn)["recurring_themes"] + assert "cache" in themes + assert "whimsical" not in themes + assert "globally" not in themes + finally: + conn.close() + + +def test_themes_empty_with_no_rollups(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + # session exists but no rollup_summary → no theme source + _seed_session(conn, "s1", 1700000000, 1700001000, 3) + conn.commit() + assert build_work_profile(conn)["recurring_themes"] == "" + finally: + conn.close() + + +# ── Decision count ──────────────────────────────────────────────────── + + +def test_open_decisions_count(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + for i in range(7): + conn.execute( + "INSERT INTO decisions (decision, reason, source, " + "created_at_epoch, created_at) VALUES " + "(?, ?, 'manual', ?, ?)", + (f"Decision {i}", f"because {i}", 1700000000 + i, f"t{i}"), + ) + conn.commit() + assert build_work_profile(conn)["open_decisions"] == 7 + finally: + conn.close() + + +# ── format_profile_block ─────────────────────────────────────────────── + + +def test_format_block_empty_returns_empty(): + assert format_profile_block({ + "cadence": "", "top_files": "", + "recurring_themes": "", "open_decisions": 0, + }) == "" + + +def test_format_block_omits_missing_sections(): + block = format_profile_block({ + "cadence": "4 sessions over 6 days", + "top_files": "", + "recurring_themes": "", + "open_decisions": 0, + }) + assert "Your work profile" in block + assert "4 sessions" in block + assert "Most-touched files" not in block + assert "Recurring themes" not in block + + +def test_format_block_pluralises_decisions(): + one = format_profile_block({ + "cadence": "x", "top_files": "", "recurring_themes": "", + "open_decisions": 1, + }) + assert "1 decision on file" in one + assert "1 decisions" not in one + many = format_profile_block({ + "cadence": "x", "top_files": "", "recurring_themes": "", + "open_decisions": 4, + }) + assert "4 decisions on file" in many + + +# ── refresh_work_profile (TTL + force) ───────────────────────────────── + + +def test_refresh_reuses_fresh_profile(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + _seed_session(conn, "s1", 1700000000, 1700001000, 3) + _seed_session(conn, "s2", 1700100000, 1700101000, 4) + conn.commit() + first = refresh_work_profile(conn, "demo") + # Insert a third session AFTER the first refresh — without --force + # the cached row should be served back. + _seed_session(conn, "s3", 1700200000, 1700201000, 5) + conn.commit() + second = refresh_work_profile(conn, "demo") + assert second["generated_at_epoch"] == first["generated_at_epoch"] + + + finally: + conn.close() + + +def test_refresh_force_rebuilds(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + _seed_session(conn, "s1", 1700000000, 1700001000, 3) + _seed_session(conn, "s2", 1700100000, 1700101000, 4) + conn.commit() + first = refresh_work_profile(conn, "demo") + time.sleep(1.05) # ensure the generated_at_epoch advances + second = refresh_work_profile(conn, "demo", force=True) + assert second["generated_at_epoch"] > first["generated_at_epoch"] + + + finally: + conn.close() + + +# ── build_session_resume integration ─────────────────────────────────── + + +def test_resume_includes_work_profile_block(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + upsert_work_profile(conn, "demo", { + "cadence": "5 sessions over 8 days · ~3 prompts/session", + "top_files": "src/auth.py (×4)", + "recurring_themes": "retry, cache, auth", + "open_decisions": 2, + "generated_at_epoch": int(time.time()), + }) + text = build_session_resume(conn, "demo") + assert "Your work profile" in text + assert "5 sessions over 8 days" in text + assert "src/auth.py" in text + assert "retry" in text + assert "2 decisions on file" in text + finally: + conn.close() + + +def test_resume_tolerates_missing_work_profile_table(tmp_path): + """An old db without the v4 table must not crash the resume — it + should skip the block and render the rest.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + conn.execute("DROP TABLE work_profile") + conn.execute( + "INSERT INTO decisions (decision, reason, source, " + "created_at_epoch, created_at) VALUES " + "('Use SQLite', 'simple', 'manual', 1700000000, 't')" + ) + conn.commit() + text = build_session_resume(conn, "demo") + assert "Use SQLite" in text + assert "Your work profile" not in text + finally: + conn.close() + + +def test_resume_empty_when_no_state(tmp_path): + """Virgin db with no profile, no rollups, no decisions, no savings.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + assert build_session_resume(conn, "demo") == "" + finally: + conn.close()