elara-labs · rajkumarsakthivel · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
@@ -70,12 +70,21 @@ jobs:
       - name: Install package
         run: pip install -e ".[dev]"
 
+      # Pin fastembed's cache for BOTH the warm step and the test step so
+      # they share state. Embedder defaults to ~/.cache/fastembed under
+      # #67, and fastembed itself honours FASTEMBED_CACHE_PATH — setting
+      # this env var keeps the warm step and the tests pointed at the
+      # same directory without needing explicit kwargs.
       - name: Warm fastembed model cache
+        env:
+          FASTEMBED_CACHE_PATH: ${{ runner.temp }}/fastembed-cache
         run: |
           python -c "from fastembed import TextEmbedding; TextEmbedding('BAAI/bge-small-en-v1.5')"
           python -c "from fastembed import TextEmbedding; TextEmbedding('sentence-transformers/all-MiniLM-L6-v2')"
 
       - name: Run tests
+        env:
+          FASTEMBED_CACHE_PATH: ${{ runner.temp }}/fastembed-cache
         run: pytest -n ${{ runner.os == 'Windows' && '0' || '4' }} --cov=context_engine --cov-report=xml --cov-report=term
         # Windows GitHub Actions runners send spurious SIGINT during pytest
         # teardown, causing exit code 1 even when all tests pass (779/779).

@@ -10,6 +10,7 @@
 import os
 import sys
 from functools import lru_cache
+from pathlib import Path
 
 from fastembed import TextEmbedding
 
@@ -20,6 +21,66 @@
 
 _DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
 
+
+def _resolve_cache_dir() -> Path:
+    """Pick a persistent fastembed cache location.
+
+    Precedence (highest first):
+      1. ``CCE_FASTEMBED_CACHE_PATH`` (CCE-specific override; takes
+         priority over fastembed's own var so users running multiple
+         tools that share fastembed can isolate CCE's cache)
+      2. ``FASTEMBED_CACHE_PATH`` (the env var fastembed itself recognises)
+      3. ``$XDG_CACHE_HOME/fastembed`` if XDG_CACHE_HOME is set
+      4. ``~/.cache/fastembed``
+
+    Previously CCE accepted fastembed's default of
+    ``$TMPDIR/fastembed_cache`` which on WSL/Ubuntu's systemd-tmpfiles
+    layout (``/tmp`` cleared on every boot) meant the model got
+    re-downloaded on each restart — and re-hit any flaky-network failure
+    along with it (issue #67).
+    """
+    cce_override = os.environ.get("CCE_FASTEMBED_CACHE_PATH", "").strip()
+    if cce_override:
+        return Path(cce_override).expanduser()
+    fast_override = os.environ.get("FASTEMBED_CACHE_PATH", "").strip()
+    if fast_override:
+        return Path(fast_override).expanduser()
+    xdg = os.environ.get("XDG_CACHE_HOME", "").strip()
+    if xdg:
+        return Path(xdg).expanduser() / "fastembed"
+    return Path.home() / ".cache" / "fastembed"
+
+
+def _sweep_incomplete_downloads(cache_dir: Path) -> int:
+    """Delete any ``*.incomplete`` files from a previous stalled download.
+
+    Without this sweep, a stalled ``huggingface_hub`` download leaves a
+    zero-byte ``model_optimized.onnx.incomplete`` file alongside the
+    expected ``model_optimized.onnx`` — and fastembed will then fail at
+    load time with a confusing ``NO_SUCHFILE`` error on every subsequent
+    run until the user manually nukes the cache (#67).
+
+    Returns the number of files removed.
+    """
+    if not cache_dir.exists():
+        return 0
+    removed = 0
+    try:
+        for path in cache_dir.rglob("*.incomplete"):
+            try:
+                path.unlink()
+                removed += 1
+            except OSError as exc:
+                log.warning("Could not remove stale fastembed file %s: %s", path, exc)
+    except OSError as exc:
+        log.warning("Failed to scan fastembed cache at %s: %s", cache_dir, exc)
+    if removed:
+        log.info(
+            "Cleared %d stale `*.incomplete` file(s) from fastembed cache at %s",
+            removed, cache_dir,
+        )
+    return removed
+
 # Passed straight to fastembed's `parallel` argument:
 #   None → no data-parallel mp; use onnxruntime's own threading
 #   N>0  → spawn N forkserver workers around onnxruntime
@@ -78,12 +139,24 @@ def __init__(
             resolved = f"sentence-transformers/{model_name}"
         else:
             resolved = model_name
+        # Use a persistent cache dir that survives reboot, and clear out any
+        # partial downloads from a previously interrupted run so we never
+        # try to load a zero-byte ONNX file (#67).
+        cache_dir = _resolve_cache_dir()
+        try:
+            cache_dir.mkdir(parents=True, exist_ok=True)
+        except OSError as exc:
+            log.warning("Could not create fastembed cache dir %s: %s", cache_dir, exc)
+        _sweep_incomplete_downloads(cache_dir)
         try:
-            self._model = TextEmbedding(resolved)
+            self._model = TextEmbedding(resolved, cache_dir=str(cache_dir))
         except Exception as exc:
             raise RuntimeError(
                 f"Failed to load embedding model '{model_name}'. "
                 f"Ensure fastembed is installed and the model name is valid. "
+                f"Cache dir: {cache_dir}. "
+                f"If a previous download was interrupted, deleting the cache "
+                f"directory and retrying may help. "
                 f"Supported models: TextEmbedding.list_supported_models(). "
                 f"Original error: {exc}"
             ) from exc

@@ -1,4 +1,5 @@
 """Git hook installer and handler for triggering re-indexing."""
+import shlex
 import shutil
 import stat
 import sys
@@ -31,9 +32,22 @@ def _resolve_cce_binary() -> str:
 
 
 def _hook_script() -> str:
-    bin_path = _resolve_cce_binary()
+    # `cce index` without any flag already performs incremental indexing
+    # via the on-disk manifest's content-hash check. The old
+    # `--changed-only` flag was removed but the hook template hadn't been
+    # updated — every commit silently errored with
+    # "No such option: --changed-only" (issue #67).
+    #
+    # bin_path must be shell-quoted because resolved paths commonly
+    # include spaces (e.g. C:\Users\Alice Smith\... on Windows, or
+    # /Users/Firstname Lastname/.venv/bin/cce on macOS). git's hook
+    # runner invokes the file via POSIX sh on every platform — even
+    # git-for-windows ships a bundled sh — so single-quoting with
+    # shlex.quote produces a correct token for the shell that actually
+    # runs the hook (Copilot review).
+    bin_path = shlex.quote(_resolve_cce_binary())
     return f"""{HOOK_MARKER}
-{bin_path} index --changed-only >/dev/null 2>&1 &
+{bin_path} index >/dev/null 2>&1 &
 """
 
 

@@ -1,4 +1,5 @@
 """MCP server exposing context engine tools to Claude Code."""
+import asyncio
 import json
 import logging
 import re
@@ -883,24 +884,57 @@ async def call_tool(name: str, arguments: dict):
 
     # ── tool handlers ───────────────────────────────────────────────────────
 
-    async def _ensure_indexed(self) -> None:
-        """Lazy indexing: if the index is empty, trigger indexing on first query."""
-        if self._lazy_indexed:
-            return
-        self._lazy_indexed = True
+    async def _ensure_indexed(self) -> bool:
+        """Lazy indexing on empty index.
+
+        Returns True iff the index already has content. When the index is
+        empty, indexing is kicked off as a background task and the method
+        returns False so the caller can surface an informative message
+        instead of blocking the MCP request for the duration of the
+        first-time index. The old behaviour silently blocked context_search
+        for many seconds (or many minutes on a large repo); from the
+        client's side it looked like the call had hung (#67).
+        """
         try:
             count = self._backend._vector_store.count()
             if count > 0:
-                return
+                return True
         except Exception:
-            pass
-        # Index is empty — trigger on-the-fly indexing
-        log.info("Index empty — triggering lazy indexing for %s", self._project_name)
+            # If we can't tell, assume it's populated rather than kick off
+            # an erroneous reindex.
+            return True
+
+        if self._lazy_indexed:
+            # Already kicked off — index is presumably building in the
+            # background. Keep telling callers to wait until it shows up.
+            return False
+        self._lazy_indexed = True
+
+        log.info(
+            "Index empty — kicking off background indexing for %s",
+            self._project_name,
+        )
+
+        async def _bg_index():
+            try:
+                from context_engine.indexer.pipeline import run_indexing
+                await run_indexing(self._config, self._project_dir, full=False)
+                log.info("Background indexing complete for %s", self._project_name)
+            except Exception as exc:
+                log.warning("Background indexing failed: %s", exc)
+
         try:
-            from context_engine.indexer.pipeline import run_indexing
-            await run_indexing(self._config, self._project_dir, full=False)
-        except Exception as exc:
-            log.warning("Lazy indexing failed: %s", exc)
+            asyncio.create_task(_bg_index())
+        except RuntimeError:
+            # No running loop (synchronous test harness, etc.) — fall back
+            # to blocking. Better than swallowing the request.
+            try:
+                from context_engine.indexer.pipeline import run_indexing
+                await run_indexing(self._config, self._project_dir, full=False)
+                return True
+            except Exception as exc:
+                log.warning("Lazy indexing failed: %s", exc)
+        return False
 
     async def _handle_context_search(self, args):
         query = (args.get("query") or "").strip()
@@ -914,8 +948,21 @@ async def _handle_context_search(self, args):
                 )
             ]
 
-        # Lazy index if this is the first query and index is empty
-        await self._ensure_indexed()
+        # Lazy index if this is the first query and index is empty.
+        # When the index is empty we return immediately with a status line
+        # rather than blocking the call for the full indexing run — the
+        # MCP client otherwise sees a multi-second/minute hang with no
+        # progress indicator (#67).
+        indexed = await self._ensure_indexed()
+        if not indexed:
+            return [TextContent(
+                type="text",
+                text=(
+                    f"Index for {self._project_name} is empty — indexing has "
+                    "been started in the background. Retry this search in a "
+                    "few seconds, or run `cce index` for a synchronous index."
+                ),
+            )]
 
         top_k = _clamp_top_k(args.get("top_k", 10))
         max_tokens = args.get("max_tokens", 8000)

@@ -77,6 +77,25 @@ def _is_windows() -> bool:
 [ -r "${PORT_FILE}" ] || exit 0
 PORT="$(cat "${PORT_FILE}" 2>/dev/null)"
 [ -n "${PORT}" ] || exit 0
+# PORT is interpolated into a `bash -c` command and a curl URL below.
+# A corrupted (or hostile) port file containing $(...) / backticks /
+# newlines would otherwise be evaluated by the shell. Refuse anything
+# that isn't a plain integer in the valid TCP range (Copilot review).
+case "${PORT}" in
+    ''|*[!0-9]*) exit 0 ;;
+esac
+[ "${PORT}" -ge 1 ] 2>/dev/null && [ "${PORT}" -le 65535 ] 2>/dev/null || exit 0
+
+# Quick TCP liveness probe via bash's /dev/tcp — if nothing is listening on
+# the port (i.e. cce serve died but left its serve.port behind), bail out
+# immediately instead of letting curl burn its full 1-2s timeout per hook
+# call. On long Claude Code sessions with hundreds of PostToolUse events
+# that adds up to many seconds of wasted wait (#67). If bash isn't
+# available (rare on POSIX), fall through to the timed curl below — slower
+# but still correct.
+if command -v bash >/dev/null 2>&1; then
+    bash -c "exec 3<>/dev/tcp/127.0.0.1/${PORT}" 2>/dev/null || exit 0
+fi
 
 if [ "${HOOK_NAME}" = "SessionStart" ]; then
     RESPONSE="$(curl -sf -m 2 -X POST -H "Content-Type: application/json" \\
@@ -114,6 +133,22 @@ def _is_windows() -> bool:
 
 set /p PORT=<"%PORT_FILE%"
 if "%PORT%"=="" exit /b 0
+REM PORT is interpolated into the PowerShell -Command string and the
+REM curl URL below. A corrupted (or hostile) port file containing
+REM ');something(' would otherwise terminate the intended expression
+REM and inject. Refuse anything that isn't a positive integer in the
+REM valid TCP range (Copilot review).
+echo %PORT%|findstr /R "^[1-9][0-9]*$" >nul 2>&1
+if errorlevel 1 exit /b 0
+if %PORT% LSS 1 exit /b 0
+if %PORT% GTR 65535 exit /b 0
+
+REM Liveness probe before spending curl's full timeout. PowerShell's
+REM TcpClient is universally available on supported Windows targets and
+REM exits in ~50ms when nothing's listening, vs. curl's 1-2s.
+REM See the POSIX comment above for the motivation (#67).
+powershell -NoProfile -Command "$ErrorActionPreference='Stop'; try { (New-Object Net.Sockets.TcpClient).Connect('127.0.0.1',%PORT%); exit 0 } catch { exit 1 }" >nul 2>&1
+if errorlevel 1 exit /b 0
 
 if /i "%HOOK_NAME%"=="SessionStart" (
     set "TMP_RESP=%TEMP%\\cce_hook_resp_%RANDOM%.txt"

@@ -67,6 +67,7 @@ def test_resolve_parallel_invalid_env_falls_through(monkeypatch):
     assert _resolve_parallel() == 2
 
 
+
 # ─── Issue #66 regression coverage ──────────────────────────────────────
 
 
@@ -117,3 +118,67 @@ def test_resolve_parallel_is_lazy(monkeypatch):
     monkeypatch.setenv("CCE_EMBED_PARALLEL", "0")
     second = _resolve_parallel()
     assert second is None
+
+
+# ─── Issue #67 regression coverage ──────────────────────────────────────
+
+
+def test_resolve_cache_dir_default(monkeypatch, tmp_path):
+    """No env var set → ~/.cache/fastembed, NOT /tmp."""
+    from context_engine.indexer.embedder import _resolve_cache_dir
+    monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False)
+    monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False)
+    monkeypatch.delenv("XDG_CACHE_HOME", raising=False)
+    fake_home = tmp_path / "home"
+    monkeypatch.setattr("pathlib.Path.home", classmethod(lambda cls: fake_home))
+    got = _resolve_cache_dir()
+    assert got == fake_home / ".cache" / "fastembed"
+
+
+def test_resolve_cache_dir_respects_fastembed_env(monkeypatch, tmp_path):
+    from context_engine.indexer.embedder import _resolve_cache_dir
+    monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False)
+    monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(tmp_path / "custom"))
+    assert _resolve_cache_dir() == tmp_path / "custom"
+
+
+def test_resolve_cache_dir_cce_override_wins(monkeypatch, tmp_path):
+    """CCE_FASTEMBED_CACHE_PATH overrides fastembed's own env var so users
+    with multiple tools sharing the fastembed default can isolate CCE's
+    cache."""
+    from context_engine.indexer.embedder import _resolve_cache_dir
+    monkeypatch.setenv("CCE_FASTEMBED_CACHE_PATH", str(tmp_path / "cce_path"))
+    monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(tmp_path / "fast_path"))
+    assert _resolve_cache_dir() == tmp_path / "cce_path"
+
+
+def test_resolve_cache_dir_xdg(monkeypatch, tmp_path):
+    from context_engine.indexer.embedder import _resolve_cache_dir
+    monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False)
+    monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False)
+    monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg"))
+    assert _resolve_cache_dir() == tmp_path / "xdg" / "fastembed"
+
+
+def test_sweep_incomplete_removes_stale_partial(tmp_path):
+    """Issue #67: a stalled huggingface_hub download leaves a 0-byte
+    `model_optimized.onnx.incomplete` file that crashes every subsequent
+    load. We must remove these on startup."""
+    from context_engine.indexer.embedder import _sweep_incomplete_downloads
+    nested = tmp_path / "models--qdrant--bge" / "snapshots" / "abc"
+    nested.mkdir(parents=True)
+    bad = nested / "model_optimized.onnx.incomplete"
+    bad.write_bytes(b"")
+    good = nested / "tokenizer.json"
+    good.write_text("{}")
+
+    removed = _sweep_incomplete_downloads(tmp_path)
+    assert removed == 1
+    assert not bad.exists()
+    assert good.exists()  # other cache files must survive
+
+
+def test_sweep_incomplete_missing_dir_is_noop(tmp_path):
+    from context_engine.indexer.embedder import _sweep_incomplete_downloads
+    missing = tmp_path / "does_not_exist_yet"
+    assert _sweep_incomplete_downloads(missing) == 0