From 0fed2ff5aed091ee09ed9a2a32c452931002f1c5 Mon Sep 17 00:00:00 2001
From: Fazle Elahee <felahee@gmail.com>
Date: Tue, 12 May 2026 16:30:00 +0100
Subject: [PATCH] feat(memory): project summary + last-3-sessions in
 SessionStart resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a persistent, per-project summary that SessionStart injects at the
top of every new Claude Code conversation, so each fresh session opens
with a clear picture of what the project IS — not just what was decided
last week. Pairs with a bump from 1 → 3 prior-session rollups so the
"last 2-3 sessions of work" context the user asked for actually
surfaces.

What's added

- memory.db v4: new `project_summary` table (one row per project) with
  pitch, tech_stack, recent_focus, source_file_count, generated_at_epoch.
  Migration is purely additive — old dbs upgrade in place on the next
  `connect()`. The migration path doesn't need sqlite-vec, so users
  without it still get v4.

- `context_engine.memory.project_summary` — extractive builder with no
  LLM dependency:
    * pitch     ← first substantive line of README.md (HTML/badge
                  stripped), fallback to pyproject.toml's
                  [project].description
    * tech_stack ← extension tally across the indexed chunks
                   ("Python (124), TypeScript (38), Markdown (12)")
    * recent_focus ← top 5 file_paths from the code_areas table,
                     most-recent first, with each description capped
                     at 100 chars
  Plus upsert / load / is_stale / format_summary_block helpers.

- `cce summarize` — manual refresh, `--force` to bypass the 7-day
  staleness check.

- `cce init` now calls `_refresh_project_summary` after the initial
  index so the summary lands before the first SessionStart fires.

- `build_session_resume` rewritten:
    * prepends the project summary block when present
    * shows the last 3 sessions' rollups (was 1) — header switches to
      "Previous N sessions" when plural, "Previous session" when one
    * tolerates an old db that's missing the v4 table (skips the block
      instead of raising)

- Tests (new file):
    * schema migration creates table on fresh and upgrades existing
    * upsert round-trips, replaces existing, returns None when absent
    * pitch extraction from README, fallback to pyproject, empty when
      neither present
    * tech_stack tally
    * recent_focus ordering
    * format_summary_block omits empty sections
    * build_session_resume:
        - includes the new summary block
        - shows the last 3 of 4 prior sessions and drops the oldest
        - uses singular "Previous session" with exactly 1
        - returns empty string on a virgin project
        - degrades gracefully when project_summary table is missing

Not in this PR (follow-up)

- Mirroring the same summary block into the MCP `context-engine-init`
  bootstrap prompt so Codex CLI (which doesn't have hooks) gets the
  same content via its system-prompt path. The data and the formatter
  are now ready for that wiring.

Suite: 894 passed, 1 skipped, 0 failed. Ruff clean.
---
 src/context_engine/cli.py                    |  89 +++++
 src/context_engine/memory/db.py              |  29 +-
 src/context_engine/memory/hooks.py           |  75 +++-
 src/context_engine/memory/project_summary.py | 326 ++++++++++++++++
 tests/memory/test_project_summary.py         | 389 +++++++++++++++++++
 5 files changed, 894 insertions(+), 14 deletions(-)
 create mode 100644 src/context_engine/memory/project_summary.py
 create mode 100644 tests/memory/test_project_summary.py

diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py
index d14ee12..4585151 100644
--- a/src/context_engine/cli.py
+++ b/src/context_engine/cli.py
@@ -755,6 +755,17 @@ def init(ctx: click.Context) -> None:
         "  " + click.style("Indexing project", fg="cyan", bold=True) + "..."
     )
     asyncio.run(_run_index(config, str(project_dir), full=True))
+
+    # 7. Project summary — extractive, no LLM dep. Runs after indexing
+    # so the tech-stack scan sees the freshly-populated vector store,
+    # and is the data SessionStart will inject on every Claude/Codex
+    # boot from here on.
+    try:
+        _refresh_project_summary(config, project_dir)
+        _ok("Project summary captured  " + _dim("(injected on every Claude Code session)"))
+    except Exception as exc:  # pragma: no cover — best effort
+        _warn(f"Project summary skipped: {exc}")
+
     click.echo("")
     click.echo(
         click.style("  Done!", fg="green", bold=True) +
@@ -763,6 +774,84 @@ def init(ctx: click.Context) -> None:
     click.echo("")
 
 
+def _refresh_project_summary(config, project_dir: Path) -> dict:
+    """Rebuild the project_summary row for `project_dir` if missing or stale.
+
+    Returns the (possibly-just-regenerated) summary dict. Safe to call on
+    every `cce init` and from the `cce summarize` command.
+    """
+    from context_engine.memory import db as memory_db
+    from context_engine.memory.project_summary import (
+        build_project_summary, is_stale, load_project_summary,
+        upsert_project_summary,
+    )
+    from context_engine.storage.local_backend import LocalBackend
+
+    project_name = project_dir.name
+    storage_base = Path(config.storage_path) / project_name
+    backend = LocalBackend(base_path=str(storage_base))
+    conn = memory_db.connect(memory_db.memory_db_path(storage_base))
+    try:
+        existing = load_project_summary(conn, project_name)
+        if existing and not is_stale(existing):
+            return existing
+        summary = build_project_summary(
+            project_dir=project_dir,
+            memory_conn=conn,
+            vector_store=backend._vector_store,
+        )
+        upsert_project_summary(conn, project_name, summary)
+        return summary
+    finally:
+        conn.close()
+
+
+@main.command()
+@click.option(
+    "--force", is_flag=True,
+    help="Regenerate even if the cached summary is fresh.",
+)
+@click.pass_context
+def summarize(ctx: click.Context, force: bool) -> None:
+    """Refresh the project summary that SessionStart injects.
+
+    Pulled extractively from README/pyproject + indexed chunks +
+    recent code_areas — no LLM needed. Called automatically by
+    `cce init` and refreshed every 7 days otherwise; run this
+    manually after a major architectural change so the next Claude
+    Code session sees the new shape.
+    """
+    from context_engine.memory.project_summary import format_summary_block
+    config = ctx.obj["config"]
+    project_dir = _safe_cwd()
+    project_name = project_dir.name
+
+    if force:
+        # Drop the existing row so _refresh_project_summary always
+        # rebuilds rather than honouring the freshness check.
+        from context_engine.memory import db as memory_db
+        storage_base = Path(config.storage_path) / project_name
+        conn = memory_db.connect(memory_db.memory_db_path(storage_base))
+        try:
+            conn.execute(
+                "DELETE FROM project_summary WHERE project = ?",
+                (project_name,),
+            )
+            conn.commit()
+        finally:
+            conn.close()
+
+    summary = _refresh_project_summary(config, project_dir)
+    block = format_summary_block(summary)
+    if not block:
+        _warn("No summary content available yet — try `cce index` first.")
+        return
+    click.echo("")
+    click.echo(block)
+    click.echo("")
+    _ok(f"Project summary stored for {project_name}")
+
+
 @main.command()
 @click.option("--full", is_flag=True, help="Force full re-index of every file")
 @click.option("--path", type=str, default=None, help="Index only this file or directory")
diff --git a/src/context_engine/memory/db.py b/src/context_engine/memory/db.py
index 7425797..ff87968 100644
--- a/src/context_engine/memory/db.py
+++ b/src/context_engine/memory/db.py
@@ -23,7 +23,7 @@
 
 log = logging.getLogger(__name__)
 
-CURRENT_VERSION = 3
+CURRENT_VERSION = 4
 
 # bge-small-en-v1.5 — the default embedder used everywhere else in cce.
 # If the project's embedder swaps to a different model, vec tables are
@@ -229,6 +229,25 @@
 ]
 
 
+# v4: project_summary. One row per project so SessionStart and the MCP
+# bootstrap path can prepend a stable "this is what the project does"
+# block to every resumed session. The build path is extractive (no LLM
+# dependency) so the row can be populated on `cce init` without
+# requiring Ollama. Regenerated when older than `_PROJECT_SUMMARY_TTL`.
+_SCHEMA_V4 = [
+    """
+    CREATE TABLE IF NOT EXISTS project_summary (
+      project TEXT PRIMARY KEY,
+      pitch TEXT NOT NULL DEFAULT '',
+      tech_stack TEXT NOT NULL DEFAULT '',
+      recent_focus TEXT NOT NULL DEFAULT '',
+      source_file_count INTEGER NOT NULL DEFAULT 0,
+      generated_at_epoch INTEGER NOT NULL
+    )
+    """,
+]
+
+
 def _vec_table_stmts(dim: int) -> list[str]:
     """vec0 virtual tables for the two surfaces session_recall actually reads.
 
@@ -324,6 +343,8 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None:
                     cur.execute(stmt)
             for stmt in _SCHEMA_V3:
                 cur.execute(stmt)
+            for stmt in _SCHEMA_V4:
+                cur.execute(stmt)
             cur.execute(
                 "INSERT INTO schema_versions (version, applied_at_epoch) "
                 "VALUES (?, strftime('%s','now'))",
@@ -338,7 +359,8 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None:
     # Existing db — apply additive upgrades up to CURRENT_VERSION.
     # v1 → v2: add vec tables + cleanup triggers (needs sqlite-vec).
     # v2 → v3: add savings_log (no extension dependency).
-    # If sqlite-vec is unavailable we can still apply v3, but we don't
+    # v3 → v4: add project_summary (no extension dependency).
+    # If sqlite-vec is unavailable we can still apply v3/v4, but we don't
     # stamp the version row so a future connection with vec loaded will
     # complete the v1 → v2 step.
     current = schema_version(conn)
@@ -354,6 +376,9 @@ def _ensure_schema(conn: sqlite3.Connection, *, has_vec: bool) -> None:
         if current < 3:
             for stmt in _SCHEMA_V3:
                 cur.execute(stmt)
+        if current < 4:
+            for stmt in _SCHEMA_V4:
+                cur.execute(stmt)
         if current < 2 and not has_vec:
             # No version bump — vec step still pending.
             conn.commit()
diff --git a/src/context_engine/memory/hooks.py b/src/context_engine/memory/hooks.py
index de384ad..9b4bbe3 100644
--- a/src/context_engine/memory/hooks.py
+++ b/src/context_engine/memory/hooks.py
@@ -45,6 +45,10 @@ def _conn(request: web.Request) -> sqlite3.Connection:
 
 _RESUME_RECENT_DECISIONS = 5
 _RESUME_DECISION_REASON_CHARS = 200
+# How many prior-session rollups to surface. Originally 1, but a single
+# rollup loses the trajectory of multi-day work — three is enough to
+# show "what we did Mon, Tue, Wed" without bloating the resume block.
+_RESUME_RECENT_SESSIONS = 3
 
 
 def _build_savings_line(conn: sqlite3.Connection) -> str:
@@ -99,15 +103,49 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str:
     conversation start — so this is the mechanism that prevents "decisions
     you made last week have to be re-explained today." Empty string for
     a brand-new project so there's no awkward header on the first session.
+
+    Layout (each section omitted when empty):
+
+        ## CCE memory · resuming <project>
+
+        **Project summary**          ← from project_summary table
+          <pitch>
+          _Stack:_ <tech_stack>
+          _Recent focus:_
+            - <file> — <description>
+
+        **Savings**                  ← from savings_log
+
+        **Previous sessions**        ← last 3 sessions w/ rollup
+          (session sid · ended_at)
+            <rollup>
+
+        **Recent decisions**         ← last 5 decisions
+
+        Footer with session_recall / session_timeline hints.
     """
     parts: list[str] = []
 
-    last_rollup = conn.execute(
+    # ── Project summary (v4) ───────────────────────────────────────────
+    from context_engine.memory.project_summary import (
+        format_summary_block, load_project_summary,
+    )
+    try:
+        summary = load_project_summary(conn, project)
+    except sqlite3.Error:
+        # project_summary table may not exist yet on a partially-migrated
+        # db. Treat as absent and continue — the rest of the resume is
+        # independent.
+        summary = None
+    summary_block = format_summary_block(summary) if summary else ""
+
+    recent_sessions = list(conn.execute(
         "SELECT id, rollup_summary, ended_at "
         "FROM sessions "
         "WHERE rollup_summary IS NOT NULL AND rollup_summary != '' "
-        "ORDER BY started_at_epoch DESC LIMIT 1"
-    ).fetchone()
+        "ORDER BY started_at_epoch DESC LIMIT ?",
+        (_RESUME_RECENT_SESSIONS,),
+    ))
 
     decisions = list(conn.execute(
         "SELECT decision, reason, source, session_id, created_at "
@@ -118,7 +156,7 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str:
 
     savings_line = _build_savings_line(conn)
 
-    if not last_rollup and not decisions and not savings_line:
+    if not (recent_sessions or decisions or savings_line or summary_block):
         return ""
 
     parts.append(f"## CCE memory · resuming {project}")
@@ -126,19 +164,32 @@ def build_session_resume(conn: sqlite3.Connection, project: str) -> str:
     # before display so the resume reads as natural prose.
     from context_engine.memory.grammar import expand as _grammar_expand
 
+    if summary_block:
+        parts.append("")
+        parts.append(summary_block)
+
     if savings_line:
         parts.append("")
         parts.append(f"**{savings_line}**")
 
-    if last_rollup:
-        when = last_rollup["ended_at"] or "in progress"
+    if recent_sessions:
         parts.append("")
-        parts.append(f"**Previous session** ({when}):")
-        rollup = _grammar_expand((last_rollup["rollup_summary"] or "").strip())
-        for line in rollup.split("\n"):
-            line = line.strip()
-            if line:
-                parts.append(f"  {line}")
+        if len(recent_sessions) == 1:
+            parts.append("**Previous session**:")
+        else:
+            parts.append(
+                f"**Previous {len(recent_sessions)} sessions** "
+                f"(most-recent first):"
+            )
+        for s in recent_sessions:
+            when = s["ended_at"] or "in progress"
+            sid = s["id"]
+            parts.append(f"  - _session `{sid}` · {when}_")
+            rollup = _grammar_expand((s["rollup_summary"] or "").strip())
+            for line in rollup.split("\n"):
+                line = line.strip()
+                if line:
+                    parts.append(f"      {line}")
     if decisions:
         parts.append("")
         parts.append("**Recent decisions** (most-recent first):")
diff --git a/src/context_engine/memory/project_summary.py b/src/context_engine/memory/project_summary.py
new file mode 100644
index 0000000..2e36b3b
--- /dev/null
+++ b/src/context_engine/memory/project_summary.py
@@ -0,0 +1,326 @@
+"""Project-level summary, persisted in memory.db.
+
+The SessionStart hook injects a *resume* block at the start of every new
+Claude Code conversation. Before this module, that block only carried
+prior-session rollups and decisions — nothing told the model what the
+project actually IS at a high level, so each fresh conversation re-derived
+the basics from scratch.
+
+`build_project_summary()` produces a small, three-section text block:
+
+  * **pitch**         — one sentence pulled from README.md/CONTRIBUTING.md
+                        front matter or the pyproject description
+  * **tech_stack**    — file-extension distribution from the indexed
+                        chunks, top languages first
+  * **recent_focus**  — most-touched file paths from the `code_areas`
+                        table (the canonical "where work has been
+                        happening lately" signal)
+
+Entirely extractive — no LLM dependency — so it can run on `cce init`
+without requiring Ollama or fastembed-the-model. Persisted in the v4
+`project_summary` table and read back by `build_session_resume()`.
+
+Regenerated on demand; callers should refresh when the row is older than
+``SUMMARY_TTL_SECONDS`` (7 days by default) or after a large index
+operation finishes.
+"""
+from __future__ import annotations
+
+import logging
+import re
+import sqlite3
+import time
+from collections import Counter
+from pathlib import Path
+
+log = logging.getLogger(__name__)
+
+
+# Regenerate the project summary if the cached row is older than this. A
+# week balances "fresh enough to reflect new architectural decisions" with
+# "not paying the rescan cost on every `cce init`".
+SUMMARY_TTL_SECONDS = 7 * 24 * 60 * 60
+
+# Caps for the three sections — kept tight because the resume block goes
+# into every session's context window.
+_PITCH_MAX_CHARS = 280
+_TECH_STACK_TOP_N = 6
+_RECENT_FOCUS_TOP_N = 5
+
+# File-extension → display name. Anything not listed falls back to the
+# bare extension (e.g. ".rs" → "rs"). Matches the language map in
+# indexer/pipeline.py but is intentionally a small subset — we don't need
+# every recognised language, only the common ones.
+_EXT_LABELS = {
+    ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
+    ".jsx": "JSX", ".tsx": "TSX", ".go": "Go", ".rs": "Rust",
+    ".java": "Java", ".rb": "Ruby", ".php": "PHP", ".cs": "C#",
+    ".c": "C", ".cpp": "C++", ".swift": "Swift", ".kt": "Kotlin",
+    ".scala": "Scala", ".sh": "Shell", ".md": "Markdown",
+    ".html": "HTML", ".css": "CSS", ".sql": "SQL", ".yaml": "YAML",
+    ".yml": "YAML", ".toml": "TOML", ".json": "JSON",
+}
+
+
+# ── Pitch extraction ────────────────────────────────────────────────────
+
+
+def _strip_html(line: str) -> str:
+    """Trim HTML/Markdown noise that shows up at the top of READMEs."""
+    line = re.sub(r"<[^>]+>", " ", line)
+    line = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", line)  # images
+    line = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", line)  # markdown links
+    line = re.sub(r"[#*_`]", "", line)
+    return line.strip()
+
+
+def _extract_pitch_from_readme(project_dir: Path) -> str:
+    """Return the first substantive sentence from README-like files.
+
+    Walks a small candidate list in priority order. A "substantive"
+    sentence is the first non-empty, non-heading, non-badge line whose
+    plain-text form is at least 30 characters — short enough that a
+    one-line tagline counts, long enough that "Welcome!" doesn't.
+    """
+    candidates = [
+        project_dir / "README.md",
+        project_dir / "README.rst",
+        project_dir / "README.txt",
+        project_dir / "README",
+    ]
+    for path in candidates:
+        if not path.is_file():
+            continue
+        try:
+            text = path.read_text(encoding="utf-8", errors="replace")
+        except OSError:
+            continue
+        for raw in text.splitlines()[:80]:
+            line = _strip_html(raw).strip()
+            if not line or line.startswith("#"):
+                continue
+            if len(line) < 30:
+                continue
+            if len(line) > _PITCH_MAX_CHARS:
+                line = line[:_PITCH_MAX_CHARS].rsplit(" ", 1)[0] + "…"
+            return line
+    return ""
+
+
+def _extract_pitch_from_pyproject(project_dir: Path) -> str:
+    """Fallback pitch: the `description` field from pyproject.toml.
+
+    Skipped if the project doesn't have one (e.g. a JS-only repo). The
+    parser is intentionally regex-based — pulling in `tomllib` for one
+    field is heavier than needed and 3.11+ has it stdlib anyway.
+    """
+    pyproject = project_dir / "pyproject.toml"
+    if not pyproject.is_file():
+        return ""
+    try:
+        text = pyproject.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return ""
+    # Match `description = "..."` at the top of [project].
+    m = re.search(r'^\s*description\s*=\s*"([^"]+)"', text, re.MULTILINE)
+    if not m:
+        return ""
+    pitch = m.group(1).strip()
+    if len(pitch) > _PITCH_MAX_CHARS:
+        pitch = pitch[:_PITCH_MAX_CHARS].rsplit(" ", 1)[0] + "…"
+    return pitch
+
+
+def _extract_pitch(project_dir: Path) -> str:
+    return (
+        _extract_pitch_from_readme(project_dir)
+        or _extract_pitch_from_pyproject(project_dir)
+        or ""
+    )
+
+
+# ── Tech stack distribution ────────────────────────────────────────────
+
+
+def _extract_tech_stack(vector_store) -> tuple[str, int]:
+    """Read distinct file paths from the vector store and tally extensions.
+
+    Returns (label_string, file_count). `label_string` is a comma-joined
+    list of the top languages by file count, e.g. "Python (124),
+    TypeScript (38), Markdown (12)". Returns ("", 0) if the store hasn't
+    been indexed yet (count == 0) or the path-fetching API isn't
+    available — the caller is expected to handle the empty case
+    gracefully.
+    """
+    try:
+        # VectorStore exposes `count()` but not (yet) a bulk distinct-path
+        # call, so we go through the underlying connection. Keeping this
+        # behind a try/except so an internals refactor doesn't break the
+        # summary builder.
+        conn = vector_store._conn  # noqa: SLF001
+        rows = conn.execute(
+            "SELECT DISTINCT file_path FROM chunks"
+        ).fetchall()
+    except (AttributeError, sqlite3.Error) as exc:
+        log.debug("tech_stack scan unavailable: %s", exc)
+        return ("", 0)
+
+    paths = [r[0] for r in rows if r and r[0]]
+    if not paths:
+        return ("", 0)
+
+    counts: Counter[str] = Counter()
+    for p in paths:
+        suffix = Path(p).suffix.lower()
+        if not suffix:
+            continue
+        label = _EXT_LABELS.get(suffix, suffix.lstrip(".").upper())
+        counts[label] += 1
+    if not counts:
+        return ("", len(paths))
+
+    top = counts.most_common(_TECH_STACK_TOP_N)
+    return (
+        ", ".join(f"{label} ({count})" for label, count in top),
+        len(paths),
+    )
+
+
+# ── Recent focus (from code_areas) ─────────────────────────────────────
+
+
+def _extract_recent_focus(conn: sqlite3.Connection) -> str:
+    """Return the top N file_paths from code_areas, most-recent first.
+
+    `code_areas` is populated by record_code_area() — the human-curated
+    "I worked on this and want future-me to find it fast" signal — and
+    is the cleanest proxy for "what's the current focus" without
+    requiring git or file-mtime scans.
+    """
+    try:
+        rows = conn.execute(
+            "SELECT file_path, description, MAX(created_at_epoch) AS last_seen "
+            "FROM code_areas "
+            "GROUP BY file_path "
+            "ORDER BY last_seen DESC "
+            "LIMIT ?",
+            (_RECENT_FOCUS_TOP_N,),
+        ).fetchall()
+    except sqlite3.Error as exc:
+        log.debug("recent_focus query failed: %s", exc)
+        return ""
+    if not rows:
+        return ""
+    parts = []
+    for r in rows:
+        file_path = r["file_path"]
+        desc = (r["description"] or "").strip()
+        if desc:
+            # Truncate per-line so one verbose record_code_area call
+            # doesn't blow out the resume block.
+            if len(desc) > 100:
+                desc = desc[:100].rsplit(" ", 1)[0] + "…"
+            parts.append(f"{file_path} — {desc}")
+        else:
+            parts.append(file_path)
+    return "\n".join(parts)
+
+
+# ── Public API ─────────────────────────────────────────────────────────
+
+
+def build_project_summary(
+    project_dir: Path,
+    memory_conn: sqlite3.Connection,
+    vector_store,
+) -> dict:
+    """Build (but do not persist) a fresh summary dict.
+
+    Composed from three independent sources so a failure in one section
+    doesn't poison the others. The caller persists via
+    :func:`upsert_project_summary`.
+    """
+    pitch = _extract_pitch(project_dir)
+    tech_stack, file_count = _extract_tech_stack(vector_store)
+    recent_focus = _extract_recent_focus(memory_conn)
+    return {
+        "pitch": pitch,
+        "tech_stack": tech_stack,
+        "recent_focus": recent_focus,
+        "source_file_count": file_count,
+        "generated_at_epoch": int(time.time()),
+    }
+
+
+def upsert_project_summary(
+    conn: sqlite3.Connection, project: str, summary: dict
+) -> None:
+    """Persist `summary` for `project`, replacing any prior row."""
+    conn.execute(
+        """
+        INSERT INTO project_summary
+          (project, pitch, tech_stack, recent_focus,
+           source_file_count, generated_at_epoch)
+        VALUES (?, ?, ?, ?, ?, ?)
+        ON CONFLICT(project) DO UPDATE SET
+          pitch = excluded.pitch,
+          tech_stack = excluded.tech_stack,
+          recent_focus = excluded.recent_focus,
+          source_file_count = excluded.source_file_count,
+          generated_at_epoch = excluded.generated_at_epoch
+        """,
+        (
+            project,
+            summary.get("pitch", ""),
+            summary.get("tech_stack", ""),
+            summary.get("recent_focus", ""),
+            int(summary.get("source_file_count", 0)),
+            int(summary.get("generated_at_epoch", time.time())),
+        ),
+    )
+    conn.commit()
+
+
+def load_project_summary(
+    conn: sqlite3.Connection, project: str
+) -> dict | None:
+    """Return the persisted summary dict for `project`, or None."""
+    row = conn.execute(
+        "SELECT pitch, tech_stack, recent_focus, source_file_count, "
+        "generated_at_epoch FROM project_summary WHERE project = ?",
+        (project,),
+    ).fetchone()
+    if row is None:
+        return None
+    return dict(row)
+
+
+def is_stale(summary: dict, ttl_seconds: int = SUMMARY_TTL_SECONDS) -> bool:
+    """True when `summary` was generated more than `ttl_seconds` ago."""
+    age = int(time.time()) - int(summary.get("generated_at_epoch", 0))
+    return age > ttl_seconds
+
+
+def format_summary_block(summary: dict) -> str:
+    """Render `summary` as a Markdown block for the resume hook.
+
+    Returns "" when all three sections are empty so the caller can suppress
+    the block on a brand-new project.
+    """
+    pitch = (summary.get("pitch") or "").strip()
+    stack = (summary.get("tech_stack") or "").strip()
+    focus = (summary.get("recent_focus") or "").strip()
+    if not (pitch or stack or focus):
+        return ""
+    lines = ["**Project summary**"]
+    if pitch:
+        lines.append(f"  {pitch}")
+    if stack:
+        lines.append(f"  _Stack:_ {stack}")
+    if focus:
+        lines.append("  _Recent focus:_")
+        for line in focus.split("\n"):
+            line = line.strip()
+            if line:
+                lines.append(f"    - {line}")
+    return "\n".join(lines)
diff --git a/tests/memory/test_project_summary.py b/tests/memory/test_project_summary.py
new file mode 100644
index 0000000..12d5119
--- /dev/null
+++ b/tests/memory/test_project_summary.py
@@ -0,0 +1,389 @@
+"""Tests for the project_summary table and extractive builder.
+
+Covers:
+  - schema v4 migration (table exists after connect on a fresh db)
+  - pitch extraction from README.md / pyproject.toml fallback
+  - tech_stack tally from indexed chunks
+  - recent_focus from code_areas
+  - upsert + load round-trip
+  - is_stale + TTL
+  - format_summary_block omits empty sections
+  - build_session_resume includes the new summary block AND last 3 sessions
+"""
+from __future__ import annotations
+
+import sqlite3
+import time
+from unittest.mock import MagicMock
+
+
+from context_engine.memory import db as memory_db
+from context_engine.memory.hooks import build_session_resume
+from context_engine.memory.project_summary import (
+    SUMMARY_TTL_SECONDS,
+    build_project_summary,
+    format_summary_block,
+    is_stale,
+    load_project_summary,
+    upsert_project_summary,
+)
+
+
+# ── Schema migration ────────────────────────────────────────────────────
+
+
+def test_project_summary_table_exists_after_connect(tmp_path):
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        row = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' "
+            "AND name='project_summary'"
+        ).fetchone()
+        assert row is not None, "project_summary table missing"
+        cols = {
+            r[1] for r in conn.execute("PRAGMA table_info(project_summary)")
+        }
+        for expected in (
+            "project", "pitch", "tech_stack", "recent_focus",
+            "source_file_count", "generated_at_epoch",
+        ):
+            assert expected in cols, f"missing column {expected}"
+    finally:
+        conn.close()
+
+
+# ── Upsert / load round-trip ───────────────────────────────────────────
+
+
+def test_upsert_and_load_round_trip(tmp_path):
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        payload = {
+            "pitch": "A tool for X",
+            "tech_stack": "Python (10), JavaScript (3)",
+            "recent_focus": "src/a.py — hot loop",
+            "source_file_count": 13,
+            "generated_at_epoch": 1700000000,
+        }
+        upsert_project_summary(conn, "demo", payload)
+        loaded = load_project_summary(conn, "demo")
+        assert loaded is not None
+        assert loaded["pitch"] == "A tool for X"
+        assert loaded["tech_stack"] == "Python (10), JavaScript (3)"
+        assert loaded["recent_focus"] == "src/a.py — hot loop"
+        assert loaded["source_file_count"] == 13
+        assert loaded["generated_at_epoch"] == 1700000000
+    finally:
+        conn.close()
+
+
+def test_upsert_replaces_existing(tmp_path):
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        upsert_project_summary(conn, "demo", {
+            "pitch": "old", "tech_stack": "", "recent_focus": "",
+            "source_file_count": 0, "generated_at_epoch": 1700000000,
+        })
+        upsert_project_summary(conn, "demo", {
+            "pitch": "new", "tech_stack": "", "recent_focus": "",
+            "source_file_count": 0, "generated_at_epoch": 1700001000,
+        })
+        loaded = load_project_summary(conn, "demo")
+        assert loaded["pitch"] == "new"
+        # And no duplicate rows.
+        rows = list(conn.execute("SELECT project FROM project_summary"))
+        assert len(rows) == 1
+    finally:
+        conn.close()
+
+
+def test_load_returns_none_when_absent(tmp_path):
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        assert load_project_summary(conn, "nope") is None
+    finally:
+        conn.close()
+
+
+# ── is_stale ───────────────────────────────────────────────────────────
+
+
+def test_is_stale_true_when_old():
+    summary = {"generated_at_epoch": int(time.time()) - (SUMMARY_TTL_SECONDS + 10)}
+    assert is_stale(summary) is True
+
+
+def test_is_stale_false_when_fresh():
+    summary = {"generated_at_epoch": int(time.time())}
+    assert is_stale(summary) is False
+
+
+# ── Pitch extraction ───────────────────────────────────────────────────
+
+
+def _make_vector_store(paths: list[str]):
+    """Build a vector_store stub that exposes a _conn with `chunks` rows."""
+    conn = sqlite3.connect(":memory:")
+    conn.execute(
+        "CREATE TABLE chunks (id TEXT, content TEXT, file_path TEXT)"
+    )
+    for i, p in enumerate(paths):
+        conn.execute(
+            "INSERT INTO chunks VALUES (?, ?, ?)",
+            (str(i), "x", p),
+        )
+    conn.commit()
+    store = MagicMock()
+    store._conn = conn
+    return store
+
+
+def test_extract_pitch_from_readme(tmp_path):
+    (tmp_path / "README.md").write_text(
+        "# Demo\n\n"
+        "![badge](https://example.com/x.png) "
+        "[link](https://example.com)\n\n"
+        "Demo is a small library for parsing TOML files and emitting\n"
+        "warnings about non-canonical whitespace.\n"
+    )
+    memory_db_conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        summary = build_project_summary(
+            project_dir=tmp_path,
+            memory_conn=memory_db_conn,
+            vector_store=_make_vector_store([]),
+        )
+        assert "Demo is a small library" in summary["pitch"], summary["pitch"]
+        # Badges/links must be stripped.
+        assert "badge" not in summary["pitch"]
+        assert "https://" not in summary["pitch"]
+    finally:
+        memory_db_conn.close()
+
+
+def test_extract_pitch_falls_back_to_pyproject(tmp_path):
+    (tmp_path / "pyproject.toml").write_text(
+        '[project]\nname = "demo"\n'
+        'description = "A tool for indexing things efficiently"\n'
+        'version = "0.1.0"\n'
+    )
+    memory_db_conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        summary = build_project_summary(
+            project_dir=tmp_path,
+            memory_conn=memory_db_conn,
+            vector_store=_make_vector_store([]),
+        )
+        assert summary["pitch"] == "A tool for indexing things efficiently"
+    finally:
+        memory_db_conn.close()
+
+
+def test_pitch_empty_when_no_readme_or_pyproject(tmp_path):
+    memory_db_conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        summary = build_project_summary(
+            project_dir=tmp_path,
+            memory_conn=memory_db_conn,
+            vector_store=_make_vector_store([]),
+        )
+        assert summary["pitch"] == ""
+    finally:
+        memory_db_conn.close()
+
+
+# ── Tech stack ─────────────────────────────────────────────────────────
+
+
+def test_tech_stack_tallies_extensions(tmp_path):
+    memory_db_conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        paths = (
+            ["src/a.py", "src/b.py", "src/c.py", "src/d.py"]
+            + ["app/x.ts", "app/y.ts"]
+            + ["README.md"]
+        )
+        summary = build_project_summary(
+            project_dir=tmp_path,
+            memory_conn=memory_db_conn,
+            vector_store=_make_vector_store(paths),
+        )
+        assert "Python" in summary["tech_stack"]
+        assert "(4)" in summary["tech_stack"]
+        assert "TypeScript" in summary["tech_stack"]
+        assert summary["source_file_count"] == 7
+    finally:
+        memory_db_conn.close()
+
+
+def test_tech_stack_handles_empty_index(tmp_path):
+    memory_db_conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        summary = build_project_summary(
+            project_dir=tmp_path,
+            memory_conn=memory_db_conn,
+            vector_store=_make_vector_store([]),
+        )
+        assert summary["tech_stack"] == ""
+        assert summary["source_file_count"] == 0
+    finally:
+        memory_db_conn.close()
+
+
+# ── Recent focus from code_areas ───────────────────────────────────────
+
+
+def test_recent_focus_from_code_areas(tmp_path):
+    memory_db_conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        # Insert a few code_areas, varying recency.
+        for i, (path, desc, t) in enumerate([
+            ("src/oldest.py", "old work", 1700000000),
+            ("src/middle.py", "middle work", 1700001000),
+            ("src/newest.py", "newest work", 1700002000),
+        ]):
+            memory_db_conn.execute(
+                "INSERT INTO code_areas (file_path, description, source, "
+                "created_at_epoch) VALUES (?, ?, 'manual', ?)",
+                (path, desc, t),
+            )
+        memory_db_conn.commit()
+        summary = build_project_summary(
+            project_dir=tmp_path,
+            memory_conn=memory_db_conn,
+            vector_store=_make_vector_store([]),
+        )
+        focus = summary["recent_focus"]
+        # Most-recent first.
+        assert focus.index("newest.py") < focus.index("oldest.py")
+        assert "newest work" in focus
+    finally:
+        memory_db_conn.close()
+
+
+# ── format_summary_block ───────────────────────────────────────────────
+
+
+def test_format_summary_block_omits_empty_sections():
+    block = format_summary_block({
+        "pitch": "A demo tool",
+        "tech_stack": "",
+        "recent_focus": "",
+    })
+    assert "**Project summary**" in block
+    assert "A demo tool" in block
+    assert "_Stack:_" not in block
+    assert "_Recent focus:_" not in block
+
+
+def test_format_summary_block_empty_returns_empty():
+    assert format_summary_block({
+        "pitch": "", "tech_stack": "", "recent_focus": "",
+    }) == ""
+
+
+# ── build_session_resume integration ───────────────────────────────────
+
+
+def test_resume_includes_project_summary(tmp_path):
+    """The new feature: SessionStart resume must prepend the project
+    summary so each Claude/Codex session sees what the project is."""
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        upsert_project_summary(conn, "demo", {
+            "pitch": "Local context engine for AI coding assistants",
+            "tech_stack": "Python (200), Markdown (15)",
+            "recent_focus": "src/context_engine/cli.py — main entry",
+            "source_file_count": 215,
+            "generated_at_epoch": int(time.time()),
+        })
+        text = build_session_resume(conn, "demo")
+        assert "Project summary" in text
+        assert "Local context engine" in text
+        assert "Python (200)" in text
+        assert "cli.py" in text
+    finally:
+        conn.close()
+
+
+def test_resume_shows_last_three_sessions(tmp_path):
+    """Previously only 1 prior session was shown — now last 3."""
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        for i, sid in enumerate(["s1", "s2", "s3", "s4"]):
+            conn.execute(
+                "INSERT INTO sessions (id, project, started_at_epoch, "
+                "started_at, ended_at_epoch, ended_at, status, "
+                "rollup_summary, rollup_summary_at_epoch) VALUES "
+                "(?, 'demo', ?, ?, ?, ?, 'completed', ?, ?)",
+                (
+                    sid,
+                    1700000000 + i * 1000,
+                    f"start-{i}",
+                    1700001000 + i * 1000,
+                    f"end-{i}",
+                    f"Session {sid} did thing {i}.",
+                    1700001000 + i * 1000,
+                ),
+            )
+        conn.commit()
+
+        text = build_session_resume(conn, "demo")
+        # Three most-recent should appear (s4, s3, s2), s1 omitted.
+        assert "did thing 3" in text  # s4
+        assert "did thing 2" in text  # s3
+        assert "did thing 1" in text  # s2
+        assert "did thing 0" not in text  # s1 dropped
+        # Header reflects plurality.
+        assert "Previous 3 sessions" in text
+    finally:
+        conn.close()
+
+
+def test_resume_uses_singular_header_when_only_one_session(tmp_path):
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        conn.execute(
+            "INSERT INTO sessions (id, project, started_at_epoch, "
+            "started_at, ended_at_epoch, ended_at, status, "
+            "rollup_summary, rollup_summary_at_epoch) VALUES "
+            "('only', 'demo', 1700000000, 's', 1700001000, 'e', "
+            "'completed', 'Only session work', 1700001000)"
+        )
+        conn.commit()
+        text = build_session_resume(conn, "demo")
+        assert "Previous session" in text
+        assert "Previous 1 sessions" not in text
+    finally:
+        conn.close()
+
+
+def test_resume_empty_when_no_state(tmp_path):
+    """Brand-new project, no summary, no rollups, no decisions → blank."""
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        assert build_session_resume(conn, "demo") == ""
+    finally:
+        conn.close()
+
+
+def test_resume_tolerates_missing_project_summary_table(tmp_path):
+    """An old db without the v4 table must not crash the resume — it
+    should just skip the summary block."""
+    conn = memory_db.connect(tmp_path / "memory.db")
+    try:
+        conn.execute("DROP TABLE project_summary")
+        # Still need at least one piece of state so the function gets past
+        # its early-return.
+        conn.execute(
+            "INSERT INTO decisions (decision, reason, source, "
+            "created_at_epoch, created_at) VALUES "
+            "('Use SQLite', 'simple', 'manual', 1700000000, 't')"
+        )
+        conn.commit()
+        text = build_session_resume(conn, "demo")
+        assert "Use SQLite" in text
+        # No project summary block because the table is gone.
+        assert "Project summary" not in text
+    finally:
+        conn.close()