Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,21 @@ jobs:
- name: Install package
run: pip install -e ".[dev]"

# Pin fastembed's cache for BOTH the warm step and the test step so
# they share state. Embedder defaults to ~/.cache/fastembed under
# #67, and fastembed itself honours FASTEMBED_CACHE_PATH — setting
# this env var keeps the warm step and the tests pointed at the
# same directory without needing explicit kwargs.
- name: Warm fastembed model cache
env:
FASTEMBED_CACHE_PATH: ${{ runner.temp }}/fastembed-cache
run: |
python -c "from fastembed import TextEmbedding; TextEmbedding('BAAI/bge-small-en-v1.5')"
python -c "from fastembed import TextEmbedding; TextEmbedding('sentence-transformers/all-MiniLM-L6-v2')"

- name: Run tests
env:
FASTEMBED_CACHE_PATH: ${{ runner.temp }}/fastembed-cache
run: pytest -n ${{ runner.os == 'Windows' && '0' || '4' }} --cov=context_engine --cov-report=xml --cov-report=term
# Windows GitHub Actions runners send spurious SIGINT during pytest
# teardown, causing exit code 1 even when all tests pass (779/779).
Expand Down
75 changes: 74 additions & 1 deletion src/context_engine/indexer/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
import sys
from functools import lru_cache
from pathlib import Path

from fastembed import TextEmbedding

Expand All @@ -20,6 +21,66 @@

_DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"


def _resolve_cache_dir() -> Path:
"""Pick a persistent fastembed cache location.

Precedence (highest first):
1. ``CCE_FASTEMBED_CACHE_PATH`` (CCE-specific override; takes
priority over fastembed's own var so users running multiple
tools that share fastembed can isolate CCE's cache)
2. ``FASTEMBED_CACHE_PATH`` (the env var fastembed itself recognises)
3. ``$XDG_CACHE_HOME/fastembed`` if XDG_CACHE_HOME is set
4. ``~/.cache/fastembed``

Previously CCE accepted fastembed's default of
``$TMPDIR/fastembed_cache`` which on WSL/Ubuntu's systemd-tmpfiles
layout (``/tmp`` cleared on every boot) meant the model got
re-downloaded on each restart — and re-hit any flaky-network failure
along with it (issue #67).
"""
cce_override = os.environ.get("CCE_FASTEMBED_CACHE_PATH", "").strip()
if cce_override:
return Path(cce_override).expanduser()
fast_override = os.environ.get("FASTEMBED_CACHE_PATH", "").strip()
if fast_override:
return Path(fast_override).expanduser()
xdg = os.environ.get("XDG_CACHE_HOME", "").strip()
if xdg:
return Path(xdg).expanduser() / "fastembed"
return Path.home() / ".cache" / "fastembed"


def _sweep_incomplete_downloads(cache_dir: Path) -> int:
"""Delete any ``*.incomplete`` files from a previous stalled download.

Without this sweep, a stalled ``huggingface_hub`` download leaves a
zero-byte ``model_optimized.onnx.incomplete`` file alongside the
expected ``model_optimized.onnx`` — and fastembed will then fail at
load time with a confusing ``NO_SUCHFILE`` error on every subsequent
run until the user manually nukes the cache (#67).

Returns the number of files removed.
"""
if not cache_dir.exists():
return 0
removed = 0
try:
for path in cache_dir.rglob("*.incomplete"):
try:
path.unlink()
removed += 1
except OSError as exc:
log.warning("Could not remove stale fastembed file %s: %s", path, exc)
except OSError as exc:
log.warning("Failed to scan fastembed cache at %s: %s", cache_dir, exc)
if removed:
log.info(
"Cleared %d stale `*.incomplete` file(s) from fastembed cache at %s",
removed, cache_dir,
)
return removed

# Passed straight to fastembed's `parallel` argument:
# None → no data-parallel mp; use onnxruntime's own threading
# N>0 → spawn N forkserver workers around onnxruntime
Expand Down Expand Up @@ -78,12 +139,24 @@ def __init__(
resolved = f"sentence-transformers/{model_name}"
else:
resolved = model_name
# Use a persistent cache dir that survives reboot, and clear out any
# partial downloads from a previously interrupted run so we never
# try to load a zero-byte ONNX file (#67).
cache_dir = _resolve_cache_dir()
try:
cache_dir.mkdir(parents=True, exist_ok=True)
except OSError as exc:
log.warning("Could not create fastembed cache dir %s: %s", cache_dir, exc)
_sweep_incomplete_downloads(cache_dir)
try:
self._model = TextEmbedding(resolved)
self._model = TextEmbedding(resolved, cache_dir=str(cache_dir))
except Exception as exc:
raise RuntimeError(
f"Failed to load embedding model '{model_name}'. "
f"Ensure fastembed is installed and the model name is valid. "
f"Cache dir: {cache_dir}. "
f"If a previous download was interrupted, deleting the cache "
f"directory and retrying may help. "
f"Supported models: TextEmbedding.list_supported_models(). "
f"Original error: {exc}"
) from exc
Expand Down
18 changes: 16 additions & 2 deletions src/context_engine/indexer/git_hooks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Git hook installer and handler for triggering re-indexing."""
import shlex
import shutil
import stat
import sys
Expand Down Expand Up @@ -31,9 +32,22 @@ def _resolve_cce_binary() -> str:


def _hook_script() -> str:
bin_path = _resolve_cce_binary()
# `cce index` without any flag already performs incremental indexing
# via the on-disk manifest's content-hash check. The old
# `--changed-only` flag was removed but the hook template hadn't been
# updated — every commit silently errored with
# "No such option: --changed-only" (issue #67).
#
# bin_path must be shell-quoted because resolved paths commonly
# include spaces (e.g. C:\Users\Alice Smith\... on Windows, or
# /Users/Firstname Lastname/.venv/bin/cce on macOS). git's hook
# runner invokes the file via POSIX sh on every platform — even
# git-for-windows ships a bundled sh — so single-quoting with
# shlex.quote produces a correct token for the shell that actually
# runs the hook (Copilot review).
bin_path = shlex.quote(_resolve_cce_binary())
return f"""{HOOK_MARKER}
{bin_path} index --changed-only >/dev/null 2>&1 &
{bin_path} index >/dev/null 2>&1 &
"""


Expand Down
77 changes: 62 additions & 15 deletions src/context_engine/integration/mcp_server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""MCP server exposing context engine tools to Claude Code."""
import asyncio
import json
import logging
import re
Expand Down Expand Up @@ -883,24 +884,57 @@ async def call_tool(name: str, arguments: dict):

# ── tool handlers ───────────────────────────────────────────────────────

async def _ensure_indexed(self) -> None:
"""Lazy indexing: if the index is empty, trigger indexing on first query."""
if self._lazy_indexed:
return
self._lazy_indexed = True
async def _ensure_indexed(self) -> bool:
"""Lazy indexing on empty index.

Returns True iff the index already has content. When the index is
empty, indexing is kicked off as a background task and the method
returns False so the caller can surface an informative message
instead of blocking the MCP request for the duration of the
first-time index. The old behaviour silently blocked context_search
for many seconds (or many minutes on a large repo); from the
client's side it looked like the call had hung (#67).
"""
try:
count = self._backend._vector_store.count()
if count > 0:
return
return True
except Exception:
pass
# Index is empty — trigger on-the-fly indexing
log.info("Index empty — triggering lazy indexing for %s", self._project_name)
# If we can't tell, assume it's populated rather than kick off
# an erroneous reindex.
return True

if self._lazy_indexed:
# Already kicked off — index is presumably building in the
# background. Keep telling callers to wait until it shows up.
return False
self._lazy_indexed = True

log.info(
"Index empty — kicking off background indexing for %s",
self._project_name,
)

async def _bg_index():
try:
from context_engine.indexer.pipeline import run_indexing
await run_indexing(self._config, self._project_dir, full=False)
log.info("Background indexing complete for %s", self._project_name)
except Exception as exc:
log.warning("Background indexing failed: %s", exc)

try:
from context_engine.indexer.pipeline import run_indexing
await run_indexing(self._config, self._project_dir, full=False)
except Exception as exc:
log.warning("Lazy indexing failed: %s", exc)
asyncio.create_task(_bg_index())
except RuntimeError:
# No running loop (synchronous test harness, etc.) — fall back
# to blocking. Better than swallowing the request.
try:
from context_engine.indexer.pipeline import run_indexing
await run_indexing(self._config, self._project_dir, full=False)
return True
except Exception as exc:
log.warning("Lazy indexing failed: %s", exc)
return False

async def _handle_context_search(self, args):
query = (args.get("query") or "").strip()
Expand All @@ -914,8 +948,21 @@ async def _handle_context_search(self, args):
)
]

# Lazy index if this is the first query and index is empty
await self._ensure_indexed()
# Lazy index if this is the first query and index is empty.
# When the index is empty we return immediately with a status line
# rather than blocking the call for the full indexing run — the
# MCP client otherwise sees a multi-second/minute hang with no
# progress indicator (#67).
indexed = await self._ensure_indexed()
if not indexed:
return [TextContent(
type="text",
text=(
f"Index for {self._project_name} is empty — indexing has "
"been started in the background. Retry this search in a "
"few seconds, or run `cce index` for a synchronous index."
),
)]

top_k = _clamp_top_k(args.get("top_k", 10))
max_tokens = args.get("max_tokens", 8000)
Expand Down
35 changes: 35 additions & 0 deletions src/context_engine/memory/hook_installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,25 @@ def _is_windows() -> bool:
[ -r "${PORT_FILE}" ] || exit 0
PORT="$(cat "${PORT_FILE}" 2>/dev/null)"
[ -n "${PORT}" ] || exit 0
# PORT is interpolated into a `bash -c` command and a curl URL below.
# A corrupted (or hostile) port file containing $(...) / backticks /
# newlines would otherwise be evaluated by the shell. Refuse anything
# that isn't a plain integer in the valid TCP range (Copilot review).
case "${PORT}" in
''|*[!0-9]*) exit 0 ;;
esac
[ "${PORT}" -ge 1 ] 2>/dev/null && [ "${PORT}" -le 65535 ] 2>/dev/null || exit 0

# Quick TCP liveness probe via bash's /dev/tcp — if nothing is listening on
# the port (i.e. cce serve died but left its serve.port behind), bail out
# immediately instead of letting curl burn its full 1-2s timeout per hook
# call. On long Claude Code sessions with hundreds of PostToolUse events
# that adds up to many seconds of wasted wait (#67). If bash isn't
# available (rare on POSIX), fall through to the timed curl below — slower
# but still correct.
if command -v bash >/dev/null 2>&1; then
bash -c "exec 3<>/dev/tcp/127.0.0.1/${PORT}" 2>/dev/null || exit 0
fi

if [ "${HOOK_NAME}" = "SessionStart" ]; then
RESPONSE="$(curl -sf -m 2 -X POST -H "Content-Type: application/json" \\
Expand Down Expand Up @@ -114,6 +133,22 @@ def _is_windows() -> bool:

set /p PORT=<"%PORT_FILE%"
if "%PORT%"=="" exit /b 0
REM PORT is interpolated into the PowerShell -Command string and the
REM curl URL below. A corrupted (or hostile) port file containing
REM ');something(' would otherwise terminate the intended expression
REM and inject. Refuse anything that isn't a positive integer in the
REM valid TCP range (Copilot review).
echo %PORT%|findstr /R "^[1-9][0-9]*$" >nul 2>&1
if errorlevel 1 exit /b 0
if %PORT% LSS 1 exit /b 0
if %PORT% GTR 65535 exit /b 0

REM Liveness probe before spending curl's full timeout. PowerShell's
REM TcpClient is universally available on supported Windows targets and
REM exits in ~50ms when nothing's listening, vs. curl's 1-2s.
REM See the POSIX comment above for the motivation (#67).
powershell -NoProfile -Command "$ErrorActionPreference='Stop'; try { (New-Object Net.Sockets.TcpClient).Connect('127.0.0.1',%PORT%); exit 0 } catch { exit 1 }" >nul 2>&1
Comment thread
rajkumarsakthivel marked this conversation as resolved.
if errorlevel 1 exit /b 0

if /i "%HOOK_NAME%"=="SessionStart" (
set "TMP_RESP=%TEMP%\\cce_hook_resp_%RANDOM%.txt"
Expand Down
65 changes: 65 additions & 0 deletions tests/indexer/test_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def test_resolve_parallel_invalid_env_falls_through(monkeypatch):
assert _resolve_parallel() == 2



# ─── Issue #66 regression coverage ──────────────────────────────────────


Expand Down Expand Up @@ -117,3 +118,67 @@ def test_resolve_parallel_is_lazy(monkeypatch):
monkeypatch.setenv("CCE_EMBED_PARALLEL", "0")
second = _resolve_parallel()
assert second is None


# ─── Issue #67 regression coverage ──────────────────────────────────────


def test_resolve_cache_dir_default(monkeypatch, tmp_path):
"""No env var set → ~/.cache/fastembed, NOT /tmp."""
from context_engine.indexer.embedder import _resolve_cache_dir
monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False)
monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False)
monkeypatch.delenv("XDG_CACHE_HOME", raising=False)
fake_home = tmp_path / "home"
monkeypatch.setattr("pathlib.Path.home", classmethod(lambda cls: fake_home))
got = _resolve_cache_dir()
assert got == fake_home / ".cache" / "fastembed"


def test_resolve_cache_dir_respects_fastembed_env(monkeypatch, tmp_path):
from context_engine.indexer.embedder import _resolve_cache_dir
monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False)
monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(tmp_path / "custom"))
assert _resolve_cache_dir() == tmp_path / "custom"


def test_resolve_cache_dir_cce_override_wins(monkeypatch, tmp_path):
"""CCE_FASTEMBED_CACHE_PATH overrides fastembed's own env var so users
with multiple tools sharing the fastembed default can isolate CCE's
cache."""
from context_engine.indexer.embedder import _resolve_cache_dir
monkeypatch.setenv("CCE_FASTEMBED_CACHE_PATH", str(tmp_path / "cce_path"))
monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(tmp_path / "fast_path"))
assert _resolve_cache_dir() == tmp_path / "cce_path"


def test_resolve_cache_dir_xdg(monkeypatch, tmp_path):
from context_engine.indexer.embedder import _resolve_cache_dir
monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False)
monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False)
monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg"))
assert _resolve_cache_dir() == tmp_path / "xdg" / "fastembed"


def test_sweep_incomplete_removes_stale_partial(tmp_path):
"""Issue #67: a stalled huggingface_hub download leaves a 0-byte
`model_optimized.onnx.incomplete` file that crashes every subsequent
load. We must remove these on startup."""
from context_engine.indexer.embedder import _sweep_incomplete_downloads
nested = tmp_path / "models--qdrant--bge" / "snapshots" / "abc"
nested.mkdir(parents=True)
bad = nested / "model_optimized.onnx.incomplete"
bad.write_bytes(b"")
good = nested / "tokenizer.json"
good.write_text("{}")

removed = _sweep_incomplete_downloads(tmp_path)
assert removed == 1
assert not bad.exists()
assert good.exists() # other cache files must survive


def test_sweep_incomplete_missing_dir_is_noop(tmp_path):
from context_engine.indexer.embedder import _sweep_incomplete_downloads
missing = tmp_path / "does_not_exist_yet"
assert _sweep_incomplete_downloads(missing) == 0
Loading
Loading