From 0fed2ff5aed091ee09ed9a2a32c452931002f1c5 Mon Sep 17 00:00:00 2001 From: Fazle Elahee Date: Tue, 12 May 2026 16:30:00 +0100 Subject: [PATCH] feat(memory): project summary + last-3-sessions in SessionStart resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a persistent, per-project summary that SessionStart injects at the top of every new Claude Code conversation, so each fresh session opens with a clear picture of what the project IS — not just what was decided last week. Pairs with a bump from 1 → 3 prior-session rollups so the "last 2-3 sessions of work" context the user asked for actually surfaces. What's added - memory.db v4: new `project_summary` table (one row per project) with pitch, tech_stack, recent_focus, source_file_count, generated_at_epoch. Migration is purely additive — old dbs upgrade in place on the next `connect()`. The migration path doesn't need sqlite-vec, so users without it still get v4. - `context_engine.memory.project_summary` — extractive builder with no LLM dependency: * pitch ← first substantive line of README.md (HTML/badge stripped), fallback to pyproject.toml's [project].description * tech_stack ← extension tally across the indexed chunks ("Python (124), TypeScript (38), Markdown (12)") * recent_focus ← top 5 file_paths from the code_areas table, most-recent first, with each description capped at 100 chars Plus upsert / load / is_stale / format_summary_block helpers. - `cce summarize` — manual refresh, `--force` to bypass the 7-day staleness check. - `cce init` now calls `_refresh_project_summary` after the initial index so the summary lands before the first SessionStart fires. - `build_session_resume` rewritten: * prepends the project summary block when present * shows the last 3 sessions' rollups (was 1) — header switches to "Previous N sessions" when plural, "Previous session" when one * tolerates an old db that's missing the v4 table (skips the block instead of raising) - Tests (new file): * schema migration creates table on fresh and upgrades existing * upsert round-trips, replaces existing, returns None when absent * pitch extraction from README, fallback to pyproject, empty when neither present * tech_stack tally * recent_focus ordering * format_summary_block omits empty sections * build_session_resume: - includes the new summary block - shows the last 3 of 4 prior sessions and drops the oldest - uses singular "Previous session" with exactly 1 - returns empty string on a virgin project - degrades gracefully when project_summary table is missing Not in this PR (follow-up) - Mirroring the same summary block into the MCP `context-engine-init` bootstrap prompt so Codex CLI (which doesn't have hooks) gets the same content via its system-prompt path. The data and the formatter are now ready for that wiring. Suite: 894 passed, 1 skipped, 0 failed. Ruff clean. --- src/context_engine/cli.py | 89 +++++ src/context_engine/memory/db.py | 29 +- src/context_engine/memory/hooks.py | 75 +++- src/context_engine/memory/project_summary.py | 326 ++++++++++++++++ tests/memory/test_project_summary.py | 389 +++++++++++++++++++ 5 files changed, 894 insertions(+), 14 deletions(-) create mode 100644 src/context_engine/memory/project_summary.py create mode 100644 tests/memory/test_project_summary.py diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index d14ee12..4585151 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -755,6 +755,17 @@ def init(ctx: click.Context) -> None: " " + click.style("Indexing project", fg="cyan", bold=True) + "..." ) asyncio.run(_run_index(config, str(project_dir), full=True)) + + # 7. Project summary — extractive, no LLM dep. Runs after indexing + # so the tech-stack scan sees the freshly-populated vector store, + # and is the data SessionStart will inject on every Claude/Codex + # boot from here on. + try: + _refresh_project_summary(config, project_dir) + _ok("Project summary captured " + _dim("(injected on every Claude Code session)")) + except Exception as exc: # pragma: no cover — best effort + _warn(f"Project summary skipped: {exc}") + click.echo("") click.echo( click.style(" Done!", fg="green", bold=True) + @@ -763,6 +774,84 @@ def init(ctx: click.Context) -> None: click.echo("") +def _refresh_project_summary(config, project_dir: Path) -> dict: + """Rebuild the project_summary row for `project_dir` if missing or stale. + + Returns the (possibly-just-regenerated) summary dict. Safe to call on + every `cce init` and from the `cce summarize` command. + """ + from context_engine.memory import db as memory_db + from context_engine.memory.project_summary import ( + build_project_summary, is_stale, load_project_summary, + upsert_project_summary, + ) + from context_engine.storage.local_backend import LocalBackend + + project_name = project_dir.name + storage_base = Path(config.storage_path) / project_name + backend = LocalBackend(base_path=str(storage_base)) + conn = memory_db.connect(memory_db.memory_db_path(storage_base)) + try: + existing = load_project_summary(conn, project_name) + if existing and not is_stale(existing): + return existing + summary = build_project_summary( + project_dir=project_dir, + memory_conn=conn, + vector_store=backend._vector_store, + ) + upsert_project_summary(conn, project_name, summary) + return summary + finally: + conn.close() + + +@main.command() +@click.option( + "--force", is_flag=True, + help="Regenerate even if the cached summary is fresh.", +) +@click.pass_context +def summarize(ctx: click.Context, force: bool) -> None: + """Refresh the project summary that SessionStart injects. + + Pulled extractively from README/pyproject + indexed chunks + + recent code_areas — no LLM needed. Called automatically by + `cce init` and refreshed every 7 days otherwise; run this + manually after a major architectural change so the next Claude + Code session sees the new shape. + """ + from context_engine.memory.project_summary import format_summary_block + config = ctx.obj["config"] + project_dir = _safe_cwd() + project_name = project_dir.name + + if force: + # Drop the existing row so _refresh_project_summary always + # rebuilds rather than honouring the freshness check. + from context_engine.memory import db as memory_db + storage_base = Path(config.storage_path) / project_name + conn = memory_db.connect(memory_db.memory_db_path(storage_base)) + try: + conn.execute( + "DELETE FROM project_summary WHERE project = ?", + (project_name,), + ) + conn.commit() + finally: + conn.close() + + summary = _refresh_project_summary(config, project_dir) + block = format_summary_block(summary) + if not block: + _warn("No summary content available yet — try `cce index` first.") + return + click.echo("") + click.echo(block) + click.echo("") + _ok(f"Project summary stored for {project_name}") + + @main.command() @click.option("--full", is_flag=True, help="Force full re-index of every file") @click.option("--path", type=str, default=None, help="Index only this file or directory") diff --git a/src/context_engine/memory/db.py b/src/context_engine/memory/db.py index 7425797..ff87968 100644 --- a/src/context_engine/memory/db.py +++ b/src/context_engine/memory/db.py @@ -23,7 +23,7 @@ log = logging.getLogger(__name__) -CURRENT_VERSION = 3 +CURRENT_VERSION = 4 # bge-small-en-v1.5 — the default embedder used everywhere else in cce. # If the project's embedder swaps to a different model, vec tables are @@ -229,6 +229,25 @@ ] +# v4: project_summary. One row per project so SessionStart and the MCP +# bootstrap path can prepend a stable "this is what the project does" +# block to every resumed session. The build path is extractive (no LLM +# dependency) so the row can be populated on `cce init` without +# requiring Ollama. Regenerated when older than `_PROJECT_SUMMARY_TTL`. +_SCHEMA_V4 = [ + """ + CREATE TABLE IF NOT EXISTS project_summary ( + project TEXT PRIMARY KEY, + pitch TEXT NOT NULL DEFAULT '', + tech_stack TEXT NOT NULL DEFAULT '', + recent_focus TEXT NOT NULL DEFAULT '', + source_file_count INTEGER NOT NULL DEFAULT 0, + generated_at_epoch INTEGER NOT NULL + ) + """, +] + + def _vec_table_stmts(dim: int) -> list[str]: """vec0 virtual tables for the two surfaces session_recall actually reads. @@ -324,6 +343,8 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None: cur.execute(stmt) for stmt in _SCHEMA_V3: cur.execute(stmt) + for stmt in _SCHEMA_V4: + cur.execute(stmt) cur.execute( "INSERT INTO schema_versions (version, applied_at_epoch) " "VALUES (?, strftime('%s','now'))", @@ -338,7 +359,8 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None: # Existing db — apply additive upgrades up to CURRENT_VERSION. # v1 → v2: add vec tables + cleanup triggers (needs sqlite-vec). # v2 → v3: add savings_log (no extension dependency). - # If sqlite-vec is unavailable we can still apply v3, but we don't + # v3 → v4: add project_summary (no extension dependency). + # If sqlite-vec is unavailable we can still apply v3/v4, but we don't # stamp the version row so a future connection with vec loaded will # complete the v1 → v2 step. current = schema_version(conn) @@ -354,6 +376,9 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None: if current < 3: for stmt in _SCHEMA_V3: cur.execute(stmt) + if current < 4: + for stmt in _SCHEMA_V4: + cur.execute(stmt) if current < 2 and not has_vec: # No version bump — vec step still pending. conn.commit() diff --git a/src/context_engine/memory/hooks.py b/src/context_engine/memory/hooks.py index de384ad..9b4bbe3 100644 --- a/src/context_engine/memory/hooks.py +++ b/src/context_engine/memory/hooks.py @@ -45,6 +45,10 @@ def _conn(request: web.Request) -> sqlite3.Connection: _RESUME_RECENT_DECISIONS = 5 _RESUME_DECISION_REASON_CHARS = 200 +# How many prior-session rollups to surface. Originally 1, but a single +# rollup loses the trajectory of multi-day work — three is enough to +# show "what we did Mon, Tue, Wed" without bloating the resume block. +_RESUME_RECENT_SESSIONS = 3 def _build_savings_line(conn: sqlite3.Connection) -> str: @@ -99,15 +103,49 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str: conversation start — so this is the mechanism that prevents "decisions you made last week have to be re-explained today." Empty string for a brand-new project so there's no awkward header on the first session. + + Layout (each section omitted when empty): + + ## CCE memory · resuming + + **Project summary** ← from project_summary table + + _Stack:_ + _Recent focus:_ + - + + **Savings** ← from savings_log + + **Previous sessions** ← last 3 sessions w/ rollup + (session sid · ended_at) + + + **Recent decisions** ← last 5 decisions + + Footer with session_recall / session_timeline hints. """ parts: list[str] = [] - last_rollup = conn.execute( + # ── Project summary (v4) ─────────────────────────────────────────── + from context_engine.memory.project_summary import ( + format_summary_block, load_project_summary, + ) + try: + summary = load_project_summary(conn, project) + except sqlite3.Error: + # project_summary table may not exist yet on a partially-migrated + # db. Treat as absent and continue — the rest of the resume is + # independent. + summary = None + summary_block = format_summary_block(summary) if summary else "" + + recent_sessions = list(conn.execute( "SELECT id, rollup_summary, ended_at " "FROM sessions " "WHERE rollup_summary IS NOT NULL AND rollup_summary != '' " - "ORDER BY started_at_epoch DESC LIMIT 1" - ).fetchone() + "ORDER BY started_at_epoch DESC LIMIT ?", + (_RESUME_RECENT_SESSIONS,), + )) decisions = list(conn.execute( "SELECT decision, reason, source, session_id, created_at " @@ -118,7 +156,7 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str: savings_line = _build_savings_line(conn) - if not last_rollup and not decisions and not savings_line: + if not (recent_sessions or decisions or savings_line or summary_block): return "" parts.append(f"## CCE memory · resuming {project}") @@ -126,19 +164,32 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str: # before display so the resume reads as natural prose. from context_engine.memory.grammar import expand as _grammar_expand + if summary_block: + parts.append("") + parts.append(summary_block) + if savings_line: parts.append("") parts.append(f"**{savings_line}**") - if last_rollup: - when = last_rollup["ended_at"] or "in progress" + if recent_sessions: parts.append("") - parts.append(f"**Previous session** ({when}):") - rollup = _grammar_expand((last_rollup["rollup_summary"] or "").strip()) - for line in rollup.split("\n"): - line = line.strip() - if line: - parts.append(f" {line}") + if len(recent_sessions) == 1: + parts.append("**Previous session**:") + else: + parts.append( + f"**Previous {len(recent_sessions)} sessions** " + f"(most-recent first):" + ) + for s in recent_sessions: + when = s["ended_at"] or "in progress" + sid = s["id"] + parts.append(f" - _session `{sid}` · {when}_") + rollup = _grammar_expand((s["rollup_summary"] or "").strip()) + for line in rollup.split("\n"): + line = line.strip() + if line: + parts.append(f" {line}") if decisions: parts.append("") parts.append("**Recent decisions** (most-recent first):") diff --git a/src/context_engine/memory/project_summary.py b/src/context_engine/memory/project_summary.py new file mode 100644 index 0000000..2e36b3b --- /dev/null +++ b/src/context_engine/memory/project_summary.py @@ -0,0 +1,326 @@ +"""Project-level summary, persisted in memory.db. + +The SessionStart hook injects a *resume* block at the start of every new +Claude Code conversation. Before this module, that block only carried +prior-session rollups and decisions — nothing told the model what the +project actually IS at a high level, so each fresh conversation re-derived +the basics from scratch. + +`build_project_summary()` produces a small, three-section text block: + + * **pitch** — one sentence pulled from README.md/CONTRIBUTING.md + front matter or the pyproject description + * **tech_stack** — file-extension distribution from the indexed + chunks, top languages first + * **recent_focus** — most-touched file paths from the `code_areas` + table (the canonical "where work has been + happening lately" signal) + +Entirely extractive — no LLM dependency — so it can run on `cce init` +without requiring Ollama or fastembed-the-model. Persisted in the v4 +`project_summary` table and read back by `build_session_resume()`. + +Regenerated on demand; callers should refresh when the row is older than +``SUMMARY_TTL_SECONDS`` (7 days by default) or after a large index +operation finishes. +""" +from __future__ import annotations + +import logging +import re +import sqlite3 +import time +from collections import Counter +from pathlib import Path + +log = logging.getLogger(__name__) + + +# Regenerate the project summary if the cached row is older than this. A +# week balances "fresh enough to reflect new architectural decisions" with +# "not paying the rescan cost on every `cce init`". +SUMMARY_TTL_SECONDS = 7 * 24 * 60 * 60 + +# Caps for the three sections — kept tight because the resume block goes +# into every session's context window. +_PITCH_MAX_CHARS = 280 +_TECH_STACK_TOP_N = 6 +_RECENT_FOCUS_TOP_N = 5 + +# File-extension → display name. Anything not listed falls back to the +# bare extension (e.g. ".rs" → "rs"). Matches the language map in +# indexer/pipeline.py but is intentionally a small subset — we don't need +# every recognised language, only the common ones. +_EXT_LABELS = { + ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", + ".jsx": "JSX", ".tsx": "TSX", ".go": "Go", ".rs": "Rust", + ".java": "Java", ".rb": "Ruby", ".php": "PHP", ".cs": "C#", + ".c": "C", ".cpp": "C++", ".swift": "Swift", ".kt": "Kotlin", + ".scala": "Scala", ".sh": "Shell", ".md": "Markdown", + ".html": "HTML", ".css": "CSS", ".sql": "SQL", ".yaml": "YAML", + ".yml": "YAML", ".toml": "TOML", ".json": "JSON", +} + + +# ── Pitch extraction ──────────────────────────────────────────────────── + + +def _strip_html(line: str) -> str: + """Trim HTML/Markdown noise that shows up at the top of READMEs.""" + line = re.sub(r"<[^>]+>", " ", line) + line = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", line) # images + line = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", line) # markdown links + line = re.sub(r"[#*_`]", "", line) + return line.strip() + + +def _extract_pitch_from_readme(project_dir: Path) -> str: + """Return the first substantive sentence from README-like files. + + Walks a small candidate list in priority order. A "substantive" + sentence is the first non-empty, non-heading, non-badge line whose + plain-text form is at least 30 characters — short enough that a + one-line tagline counts, long enough that "Welcome!" doesn't. + """ + candidates = [ + project_dir / "README.md", + project_dir / "README.rst", + project_dir / "README.txt", + project_dir / "README", + ] + for path in candidates: + if not path.is_file(): + continue + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + for raw in text.splitlines()[:80]: + line = _strip_html(raw).strip() + if not line or line.startswith("#"): + continue + if len(line) < 30: + continue + if len(line) > _PITCH_MAX_CHARS: + line = line[:_PITCH_MAX_CHARS].rsplit(" ", 1)[0] + "…" + return line + return "" + + +def _extract_pitch_from_pyproject(project_dir: Path) -> str: + """Fallback pitch: the `description` field from pyproject.toml. + + Skipped if the project doesn't have one (e.g. a JS-only repo). The + parser is intentionally regex-based — pulling in `tomllib` for one + field is heavier than needed and 3.11+ has it stdlib anyway. + """ + pyproject = project_dir / "pyproject.toml" + if not pyproject.is_file(): + return "" + try: + text = pyproject.read_text(encoding="utf-8", errors="replace") + except OSError: + return "" + # Match `description = "..."` at the top of [project]. + m = re.search(r'^\s*description\s*=\s*"([^"]+)"', text, re.MULTILINE) + if not m: + return "" + pitch = m.group(1).strip() + if len(pitch) > _PITCH_MAX_CHARS: + pitch = pitch[:_PITCH_MAX_CHARS].rsplit(" ", 1)[0] + "…" + return pitch + + +def _extract_pitch(project_dir: Path) -> str: + return ( + _extract_pitch_from_readme(project_dir) + or _extract_pitch_from_pyproject(project_dir) + or "" + ) + + +# ── Tech stack distribution ──────────────────────────────────────────── + + +def _extract_tech_stack(vector_store) -> tuple[str, int]: + """Read distinct file paths from the vector store and tally extensions. + + Returns (label_string, file_count). `label_string` is a comma-joined + list of the top languages by file count, e.g. "Python (124), + TypeScript (38), Markdown (12)". Returns ("", 0) if the store hasn't + been indexed yet (count == 0) or the path-fetching API isn't + available — the caller is expected to handle the empty case + gracefully. + """ + try: + # VectorStore exposes `count()` but not (yet) a bulk distinct-path + # call, so we go through the underlying connection. Keeping this + # behind a try/except so an internals refactor doesn't break the + # summary builder. + conn = vector_store._conn # noqa: SLF001 + rows = conn.execute( + "SELECT DISTINCT file_path FROM chunks" + ).fetchall() + except (AttributeError, sqlite3.Error) as exc: + log.debug("tech_stack scan unavailable: %s", exc) + return ("", 0) + + paths = [r[0] for r in rows if r and r[0]] + if not paths: + return ("", 0) + + counts: Counter[str] = Counter() + for p in paths: + suffix = Path(p).suffix.lower() + if not suffix: + continue + label = _EXT_LABELS.get(suffix, suffix.lstrip(".").upper()) + counts[label] += 1 + if not counts: + return ("", len(paths)) + + top = counts.most_common(_TECH_STACK_TOP_N) + return ( + ", ".join(f"{label} ({count})" for label, count in top), + len(paths), + ) + + +# ── Recent focus (from code_areas) ───────────────────────────────────── + + +def _extract_recent_focus(conn: sqlite3.Connection) -> str: + """Return the top N file_paths from code_areas, most-recent first. + + `code_areas` is populated by record_code_area() — the human-curated + "I worked on this and want future-me to find it fast" signal — and + is the cleanest proxy for "what's the current focus" without + requiring git or file-mtime scans. + """ + try: + rows = conn.execute( + "SELECT file_path, description, MAX(created_at_epoch) AS last_seen " + "FROM code_areas " + "GROUP BY file_path " + "ORDER BY last_seen DESC " + "LIMIT ?", + (_RECENT_FOCUS_TOP_N,), + ).fetchall() + except sqlite3.Error as exc: + log.debug("recent_focus query failed: %s", exc) + return "" + if not rows: + return "" + parts = [] + for r in rows: + file_path = r["file_path"] + desc = (r["description"] or "").strip() + if desc: + # Truncate per-line so one verbose record_code_area call + # doesn't blow out the resume block. + if len(desc) > 100: + desc = desc[:100].rsplit(" ", 1)[0] + "…" + parts.append(f"{file_path} — {desc}") + else: + parts.append(file_path) + return "\n".join(parts) + + +# ── Public API ───────────────────────────────────────────────────────── + + +def build_project_summary( + project_dir: Path, + memory_conn: sqlite3.Connection, + vector_store, +) -> dict: + """Build (but do not persist) a fresh summary dict. + + Composed from three independent sources so a failure in one section + doesn't poison the others. The caller persists via + :func:`upsert_project_summary`. + """ + pitch = _extract_pitch(project_dir) + tech_stack, file_count = _extract_tech_stack(vector_store) + recent_focus = _extract_recent_focus(memory_conn) + return { + "pitch": pitch, + "tech_stack": tech_stack, + "recent_focus": recent_focus, + "source_file_count": file_count, + "generated_at_epoch": int(time.time()), + } + + +def upsert_project_summary( + conn: sqlite3.Connection, project: str, summary: dict +) -> None: + """Persist `summary` for `project`, replacing any prior row.""" + conn.execute( + """ + INSERT INTO project_summary + (project, pitch, tech_stack, recent_focus, + source_file_count, generated_at_epoch) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(project) DO UPDATE SET + pitch = excluded.pitch, + tech_stack = excluded.tech_stack, + recent_focus = excluded.recent_focus, + source_file_count = excluded.source_file_count, + generated_at_epoch = excluded.generated_at_epoch + """, + ( + project, + summary.get("pitch", ""), + summary.get("tech_stack", ""), + summary.get("recent_focus", ""), + int(summary.get("source_file_count", 0)), + int(summary.get("generated_at_epoch", time.time())), + ), + ) + conn.commit() + + +def load_project_summary( + conn: sqlite3.Connection, project: str +) -> dict | None: + """Return the persisted summary dict for `project`, or None.""" + row = conn.execute( + "SELECT pitch, tech_stack, recent_focus, source_file_count, " + "generated_at_epoch FROM project_summary WHERE project = ?", + (project,), + ).fetchone() + if row is None: + return None + return dict(row) + + +def is_stale(summary: dict, ttl_seconds: int = SUMMARY_TTL_SECONDS) -> bool: + """True when `summary` was generated more than `ttl_seconds` ago.""" + age = int(time.time()) - int(summary.get("generated_at_epoch", 0)) + return age > ttl_seconds + + +def format_summary_block(summary: dict) -> str: + """Render `summary` as a Markdown block for the resume hook. + + Returns "" when all three sections are empty so the caller can suppress + the block on a brand-new project. + """ + pitch = (summary.get("pitch") or "").strip() + stack = (summary.get("tech_stack") or "").strip() + focus = (summary.get("recent_focus") or "").strip() + if not (pitch or stack or focus): + return "" + lines = ["**Project summary**"] + if pitch: + lines.append(f" {pitch}") + if stack: + lines.append(f" _Stack:_ {stack}") + if focus: + lines.append(" _Recent focus:_") + for line in focus.split("\n"): + line = line.strip() + if line: + lines.append(f" - {line}") + return "\n".join(lines) diff --git a/tests/memory/test_project_summary.py b/tests/memory/test_project_summary.py new file mode 100644 index 0000000..12d5119 --- /dev/null +++ b/tests/memory/test_project_summary.py @@ -0,0 +1,389 @@ +"""Tests for the project_summary table and extractive builder. + +Covers: + - schema v4 migration (table exists after connect on a fresh db) + - pitch extraction from README.md / pyproject.toml fallback + - tech_stack tally from indexed chunks + - recent_focus from code_areas + - upsert + load round-trip + - is_stale + TTL + - format_summary_block omits empty sections + - build_session_resume includes the new summary block AND last 3 sessions +""" +from __future__ import annotations + +import sqlite3 +import time +from unittest.mock import MagicMock + + +from context_engine.memory import db as memory_db +from context_engine.memory.hooks import build_session_resume +from context_engine.memory.project_summary import ( + SUMMARY_TTL_SECONDS, + build_project_summary, + format_summary_block, + is_stale, + load_project_summary, + upsert_project_summary, +) + + +# ── Schema migration ──────────────────────────────────────────────────── + + +def test_project_summary_table_exists_after_connect(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + row = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' " + "AND name='project_summary'" + ).fetchone() + assert row is not None, "project_summary table missing" + cols = { + r[1] for r in conn.execute("PRAGMA table_info(project_summary)") + } + for expected in ( + "project", "pitch", "tech_stack", "recent_focus", + "source_file_count", "generated_at_epoch", + ): + assert expected in cols, f"missing column {expected}" + finally: + conn.close() + + +# ── Upsert / load round-trip ─────────────────────────────────────────── + + +def test_upsert_and_load_round_trip(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + payload = { + "pitch": "A tool for X", + "tech_stack": "Python (10), JavaScript (3)", + "recent_focus": "src/a.py — hot loop", + "source_file_count": 13, + "generated_at_epoch": 1700000000, + } + upsert_project_summary(conn, "demo", payload) + loaded = load_project_summary(conn, "demo") + assert loaded is not None + assert loaded["pitch"] == "A tool for X" + assert loaded["tech_stack"] == "Python (10), JavaScript (3)" + assert loaded["recent_focus"] == "src/a.py — hot loop" + assert loaded["source_file_count"] == 13 + assert loaded["generated_at_epoch"] == 1700000000 + finally: + conn.close() + + +def test_upsert_replaces_existing(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + upsert_project_summary(conn, "demo", { + "pitch": "old", "tech_stack": "", "recent_focus": "", + "source_file_count": 0, "generated_at_epoch": 1700000000, + }) + upsert_project_summary(conn, "demo", { + "pitch": "new", "tech_stack": "", "recent_focus": "", + "source_file_count": 0, "generated_at_epoch": 1700001000, + }) + loaded = load_project_summary(conn, "demo") + assert loaded["pitch"] == "new" + # And no duplicate rows. + rows = list(conn.execute("SELECT project FROM project_summary")) + assert len(rows) == 1 + finally: + conn.close() + + +def test_load_returns_none_when_absent(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + assert load_project_summary(conn, "nope") is None + finally: + conn.close() + + +# ── is_stale ─────────────────────────────────────────────────────────── + + +def test_is_stale_true_when_old(): + summary = {"generated_at_epoch": int(time.time()) - (SUMMARY_TTL_SECONDS + 10)} + assert is_stale(summary) is True + + +def test_is_stale_false_when_fresh(): + summary = {"generated_at_epoch": int(time.time())} + assert is_stale(summary) is False + + +# ── Pitch extraction ─────────────────────────────────────────────────── + + +def _make_vector_store(paths: list[str]): + """Build a vector_store stub that exposes a _conn with `chunks` rows.""" + conn = sqlite3.connect(":memory:") + conn.execute( + "CREATE TABLE chunks (id TEXT, content TEXT, file_path TEXT)" + ) + for i, p in enumerate(paths): + conn.execute( + "INSERT INTO chunks VALUES (?, ?, ?)", + (str(i), "x", p), + ) + conn.commit() + store = MagicMock() + store._conn = conn + return store + + +def test_extract_pitch_from_readme(tmp_path): + (tmp_path / "README.md").write_text( + "# Demo\n\n" + "![badge](https://example.com/x.png) " + "[link](https://example.com)\n\n" + "Demo is a small library for parsing TOML files and emitting\n" + "warnings about non-canonical whitespace.\n" + ) + memory_db_conn = memory_db.connect(tmp_path / "memory.db") + try: + summary = build_project_summary( + project_dir=tmp_path, + memory_conn=memory_db_conn, + vector_store=_make_vector_store([]), + ) + assert "Demo is a small library" in summary["pitch"], summary["pitch"] + # Badges/links must be stripped. + assert "badge" not in summary["pitch"] + assert "https://" not in summary["pitch"] + finally: + memory_db_conn.close() + + +def test_extract_pitch_falls_back_to_pyproject(tmp_path): + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "demo"\n' + 'description = "A tool for indexing things efficiently"\n' + 'version = "0.1.0"\n' + ) + memory_db_conn = memory_db.connect(tmp_path / "memory.db") + try: + summary = build_project_summary( + project_dir=tmp_path, + memory_conn=memory_db_conn, + vector_store=_make_vector_store([]), + ) + assert summary["pitch"] == "A tool for indexing things efficiently" + finally: + memory_db_conn.close() + + +def test_pitch_empty_when_no_readme_or_pyproject(tmp_path): + memory_db_conn = memory_db.connect(tmp_path / "memory.db") + try: + summary = build_project_summary( + project_dir=tmp_path, + memory_conn=memory_db_conn, + vector_store=_make_vector_store([]), + ) + assert summary["pitch"] == "" + finally: + memory_db_conn.close() + + +# ── Tech stack ───────────────────────────────────────────────────────── + + +def test_tech_stack_tallies_extensions(tmp_path): + memory_db_conn = memory_db.connect(tmp_path / "memory.db") + try: + paths = ( + ["src/a.py", "src/b.py", "src/c.py", "src/d.py"] + + ["app/x.ts", "app/y.ts"] + + ["README.md"] + ) + summary = build_project_summary( + project_dir=tmp_path, + memory_conn=memory_db_conn, + vector_store=_make_vector_store(paths), + ) + assert "Python" in summary["tech_stack"] + assert "(4)" in summary["tech_stack"] + assert "TypeScript" in summary["tech_stack"] + assert summary["source_file_count"] == 7 + finally: + memory_db_conn.close() + + +def test_tech_stack_handles_empty_index(tmp_path): + memory_db_conn = memory_db.connect(tmp_path / "memory.db") + try: + summary = build_project_summary( + project_dir=tmp_path, + memory_conn=memory_db_conn, + vector_store=_make_vector_store([]), + ) + assert summary["tech_stack"] == "" + assert summary["source_file_count"] == 0 + finally: + memory_db_conn.close() + + +# ── Recent focus from code_areas ─────────────────────────────────────── + + +def test_recent_focus_from_code_areas(tmp_path): + memory_db_conn = memory_db.connect(tmp_path / "memory.db") + try: + # Insert a few code_areas, varying recency. + for i, (path, desc, t) in enumerate([ + ("src/oldest.py", "old work", 1700000000), + ("src/middle.py", "middle work", 1700001000), + ("src/newest.py", "newest work", 1700002000), + ]): + memory_db_conn.execute( + "INSERT INTO code_areas (file_path, description, source, " + "created_at_epoch) VALUES (?, ?, 'manual', ?)", + (path, desc, t), + ) + memory_db_conn.commit() + summary = build_project_summary( + project_dir=tmp_path, + memory_conn=memory_db_conn, + vector_store=_make_vector_store([]), + ) + focus = summary["recent_focus"] + # Most-recent first. + assert focus.index("newest.py") < focus.index("oldest.py") + assert "newest work" in focus + finally: + memory_db_conn.close() + + +# ── format_summary_block ─────────────────────────────────────────────── + + +def test_format_summary_block_omits_empty_sections(): + block = format_summary_block({ + "pitch": "A demo tool", + "tech_stack": "", + "recent_focus": "", + }) + assert "**Project summary**" in block + assert "A demo tool" in block + assert "_Stack:_" not in block + assert "_Recent focus:_" not in block + + +def test_format_summary_block_empty_returns_empty(): + assert format_summary_block({ + "pitch": "", "tech_stack": "", "recent_focus": "", + }) == "" + + +# ── build_session_resume integration ─────────────────────────────────── + + +def test_resume_includes_project_summary(tmp_path): + """The new feature: SessionStart resume must prepend the project + summary so each Claude/Codex session sees what the project is.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + upsert_project_summary(conn, "demo", { + "pitch": "Local context engine for AI coding assistants", + "tech_stack": "Python (200), Markdown (15)", + "recent_focus": "src/context_engine/cli.py — main entry", + "source_file_count": 215, + "generated_at_epoch": int(time.time()), + }) + text = build_session_resume(conn, "demo") + assert "Project summary" in text + assert "Local context engine" in text + assert "Python (200)" in text + assert "cli.py" in text + finally: + conn.close() + + +def test_resume_shows_last_three_sessions(tmp_path): + """Previously only 1 prior session was shown — now last 3.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + for i, sid in enumerate(["s1", "s2", "s3", "s4"]): + conn.execute( + "INSERT INTO sessions (id, project, started_at_epoch, " + "started_at, ended_at_epoch, ended_at, status, " + "rollup_summary, rollup_summary_at_epoch) VALUES " + "(?, 'demo', ?, ?, ?, ?, 'completed', ?, ?)", + ( + sid, + 1700000000 + i * 1000, + f"start-{i}", + 1700001000 + i * 1000, + f"end-{i}", + f"Session {sid} did thing {i}.", + 1700001000 + i * 1000, + ), + ) + conn.commit() + + text = build_session_resume(conn, "demo") + # Three most-recent should appear (s4, s3, s2), s1 omitted. + assert "did thing 3" in text # s4 + assert "did thing 2" in text # s3 + assert "did thing 1" in text # s2 + assert "did thing 0" not in text # s1 dropped + # Header reflects plurality. + assert "Previous 3 sessions" in text + finally: + conn.close() + + +def test_resume_uses_singular_header_when_only_one_session(tmp_path): + conn = memory_db.connect(tmp_path / "memory.db") + try: + conn.execute( + "INSERT INTO sessions (id, project, started_at_epoch, " + "started_at, ended_at_epoch, ended_at, status, " + "rollup_summary, rollup_summary_at_epoch) VALUES " + "('only', 'demo', 1700000000, 's', 1700001000, 'e', " + "'completed', 'Only session work', 1700001000)" + ) + conn.commit() + text = build_session_resume(conn, "demo") + assert "Previous session" in text + assert "Previous 1 sessions" not in text + finally: + conn.close() + + +def test_resume_empty_when_no_state(tmp_path): + """Brand-new project, no summary, no rollups, no decisions → blank.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + assert build_session_resume(conn, "demo") == "" + finally: + conn.close() + + +def test_resume_tolerates_missing_project_summary_table(tmp_path): + """An old db without the v4 table must not crash the resume — it + should just skip the summary block.""" + conn = memory_db.connect(tmp_path / "memory.db") + try: + conn.execute("DROP TABLE project_summary") + # Still need at least one piece of state so the function gets past + # its early-return. + conn.execute( + "INSERT INTO decisions (decision, reason, source, " + "created_at_epoch, created_at) VALUES " + "('Use SQLite', 'simple', 'manual', 1700000000, 't')" + ) + conn.commit() + text = build_session_resume(conn, "demo") + assert "Use SQLite" in text + # No project summary block because the table is gone. + assert "Project summary" not in text + finally: + conn.close()