diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1d1ad3..bb2fce7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,12 +70,21 @@ jobs: - name: Install package run: pip install -e ".[dev]" + # Pin fastembed's cache for BOTH the warm step and the test step so + # they share state. Embedder defaults to ~/.cache/fastembed under + # #67, and fastembed itself honours FASTEMBED_CACHE_PATH — setting + # this env var keeps the warm step and the tests pointed at the + # same directory without needing explicit kwargs. - name: Warm fastembed model cache + env: + FASTEMBED_CACHE_PATH: ${{ runner.temp }}/fastembed-cache run: | python -c "from fastembed import TextEmbedding; TextEmbedding('BAAI/bge-small-en-v1.5')" python -c "from fastembed import TextEmbedding; TextEmbedding('sentence-transformers/all-MiniLM-L6-v2')" - name: Run tests + env: + FASTEMBED_CACHE_PATH: ${{ runner.temp }}/fastembed-cache run: pytest -n ${{ runner.os == 'Windows' && '0' || '4' }} --cov=context_engine --cov-report=xml --cov-report=term # Windows GitHub Actions runners send spurious SIGINT during pytest # teardown, causing exit code 1 even when all tests pass (779/779). diff --git a/src/context_engine/indexer/embedder.py b/src/context_engine/indexer/embedder.py index 4917a2f..6b6ec2e 100644 --- a/src/context_engine/indexer/embedder.py +++ b/src/context_engine/indexer/embedder.py @@ -10,6 +10,7 @@ import os import sys from functools import lru_cache +from pathlib import Path from fastembed import TextEmbedding @@ -20,6 +21,66 @@ _DEFAULT_MODEL = "BAAI/bge-small-en-v1.5" + +def _resolve_cache_dir() -> Path: + """Pick a persistent fastembed cache location. + + Precedence (highest first): + 1. ``CCE_FASTEMBED_CACHE_PATH`` (CCE-specific override; takes + priority over fastembed's own var so users running multiple + tools that share fastembed can isolate CCE's cache) + 2. ``FASTEMBED_CACHE_PATH`` (the env var fastembed itself recognises) + 3. ``$XDG_CACHE_HOME/fastembed`` if XDG_CACHE_HOME is set + 4. ``~/.cache/fastembed`` + + Previously CCE accepted fastembed's default of + ``$TMPDIR/fastembed_cache`` which on WSL/Ubuntu's systemd-tmpfiles + layout (``/tmp`` cleared on every boot) meant the model got + re-downloaded on each restart — and re-hit any flaky-network failure + along with it (issue #67). + """ + cce_override = os.environ.get("CCE_FASTEMBED_CACHE_PATH", "").strip() + if cce_override: + return Path(cce_override).expanduser() + fast_override = os.environ.get("FASTEMBED_CACHE_PATH", "").strip() + if fast_override: + return Path(fast_override).expanduser() + xdg = os.environ.get("XDG_CACHE_HOME", "").strip() + if xdg: + return Path(xdg).expanduser() / "fastembed" + return Path.home() / ".cache" / "fastembed" + + +def _sweep_incomplete_downloads(cache_dir: Path) -> int: + """Delete any ``*.incomplete`` files from a previous stalled download. + + Without this sweep, a stalled ``huggingface_hub`` download leaves a + zero-byte ``model_optimized.onnx.incomplete`` file alongside the + expected ``model_optimized.onnx`` — and fastembed will then fail at + load time with a confusing ``NO_SUCHFILE`` error on every subsequent + run until the user manually nukes the cache (#67). + + Returns the number of files removed. + """ + if not cache_dir.exists(): + return 0 + removed = 0 + try: + for path in cache_dir.rglob("*.incomplete"): + try: + path.unlink() + removed += 1 + except OSError as exc: + log.warning("Could not remove stale fastembed file %s: %s", path, exc) + except OSError as exc: + log.warning("Failed to scan fastembed cache at %s: %s", cache_dir, exc) + if removed: + log.info( + "Cleared %d stale `*.incomplete` file(s) from fastembed cache at %s", + removed, cache_dir, + ) + return removed + # Passed straight to fastembed's `parallel` argument: # None → no data-parallel mp; use onnxruntime's own threading # N>0 → spawn N forkserver workers around onnxruntime @@ -78,12 +139,24 @@ def __init__( resolved = f"sentence-transformers/{model_name}" else: resolved = model_name + # Use a persistent cache dir that survives reboot, and clear out any + # partial downloads from a previously interrupted run so we never + # try to load a zero-byte ONNX file (#67). + cache_dir = _resolve_cache_dir() + try: + cache_dir.mkdir(parents=True, exist_ok=True) + except OSError as exc: + log.warning("Could not create fastembed cache dir %s: %s", cache_dir, exc) + _sweep_incomplete_downloads(cache_dir) try: - self._model = TextEmbedding(resolved) + self._model = TextEmbedding(resolved, cache_dir=str(cache_dir)) except Exception as exc: raise RuntimeError( f"Failed to load embedding model '{model_name}'. " f"Ensure fastembed is installed and the model name is valid. " + f"Cache dir: {cache_dir}. " + f"If a previous download was interrupted, deleting the cache " + f"directory and retrying may help. " f"Supported models: TextEmbedding.list_supported_models(). " f"Original error: {exc}" ) from exc diff --git a/src/context_engine/indexer/git_hooks.py b/src/context_engine/indexer/git_hooks.py index 1056668..0af4276 100644 --- a/src/context_engine/indexer/git_hooks.py +++ b/src/context_engine/indexer/git_hooks.py @@ -1,4 +1,5 @@ """Git hook installer and handler for triggering re-indexing.""" +import shlex import shutil import stat import sys @@ -31,9 +32,22 @@ def _resolve_cce_binary() -> str: def _hook_script() -> str: - bin_path = _resolve_cce_binary() + # `cce index` without any flag already performs incremental indexing + # via the on-disk manifest's content-hash check. The old + # `--changed-only` flag was removed but the hook template hadn't been + # updated — every commit silently errored with + # "No such option: --changed-only" (issue #67). + # + # bin_path must be shell-quoted because resolved paths commonly + # include spaces (e.g. C:\Users\Alice Smith\... on Windows, or + # /Users/Firstname Lastname/.venv/bin/cce on macOS). git's hook + # runner invokes the file via POSIX sh on every platform — even + # git-for-windows ships a bundled sh — so single-quoting with + # shlex.quote produces a correct token for the shell that actually + # runs the hook (Copilot review). + bin_path = shlex.quote(_resolve_cce_binary()) return f"""{HOOK_MARKER} -{bin_path} index --changed-only >/dev/null 2>&1 & +{bin_path} index >/dev/null 2>&1 & """ diff --git a/src/context_engine/integration/mcp_server.py b/src/context_engine/integration/mcp_server.py index 2d5e8d8..8a14897 100644 --- a/src/context_engine/integration/mcp_server.py +++ b/src/context_engine/integration/mcp_server.py @@ -1,4 +1,5 @@ """MCP server exposing context engine tools to Claude Code.""" +import asyncio import json import logging import re @@ -883,24 +884,57 @@ async def call_tool(name: str, arguments: dict): # ── tool handlers ─────────────────────────────────────────────────────── - async def _ensure_indexed(self) -> None: - """Lazy indexing: if the index is empty, trigger indexing on first query.""" - if self._lazy_indexed: - return - self._lazy_indexed = True + async def _ensure_indexed(self) -> bool: + """Lazy indexing on empty index. + + Returns True iff the index already has content. When the index is + empty, indexing is kicked off as a background task and the method + returns False so the caller can surface an informative message + instead of blocking the MCP request for the duration of the + first-time index. The old behaviour silently blocked context_search + for many seconds (or many minutes on a large repo); from the + client's side it looked like the call had hung (#67). + """ try: count = self._backend._vector_store.count() if count > 0: - return + return True except Exception: - pass - # Index is empty — trigger on-the-fly indexing - log.info("Index empty — triggering lazy indexing for %s", self._project_name) + # If we can't tell, assume it's populated rather than kick off + # an erroneous reindex. + return True + + if self._lazy_indexed: + # Already kicked off — index is presumably building in the + # background. Keep telling callers to wait until it shows up. + return False + self._lazy_indexed = True + + log.info( + "Index empty — kicking off background indexing for %s", + self._project_name, + ) + + async def _bg_index(): + try: + from context_engine.indexer.pipeline import run_indexing + await run_indexing(self._config, self._project_dir, full=False) + log.info("Background indexing complete for %s", self._project_name) + except Exception as exc: + log.warning("Background indexing failed: %s", exc) + try: - from context_engine.indexer.pipeline import run_indexing - await run_indexing(self._config, self._project_dir, full=False) - except Exception as exc: - log.warning("Lazy indexing failed: %s", exc) + asyncio.create_task(_bg_index()) + except RuntimeError: + # No running loop (synchronous test harness, etc.) — fall back + # to blocking. Better than swallowing the request. + try: + from context_engine.indexer.pipeline import run_indexing + await run_indexing(self._config, self._project_dir, full=False) + return True + except Exception as exc: + log.warning("Lazy indexing failed: %s", exc) + return False async def _handle_context_search(self, args): query = (args.get("query") or "").strip() @@ -914,8 +948,21 @@ async def _handle_context_search(self, args): ) ] - # Lazy index if this is the first query and index is empty - await self._ensure_indexed() + # Lazy index if this is the first query and index is empty. + # When the index is empty we return immediately with a status line + # rather than blocking the call for the full indexing run — the + # MCP client otherwise sees a multi-second/minute hang with no + # progress indicator (#67). + indexed = await self._ensure_indexed() + if not indexed: + return [TextContent( + type="text", + text=( + f"Index for {self._project_name} is empty — indexing has " + "been started in the background. Retry this search in a " + "few seconds, or run `cce index` for a synchronous index." + ), + )] top_k = _clamp_top_k(args.get("top_k", 10)) max_tokens = args.get("max_tokens", 8000) diff --git a/src/context_engine/memory/hook_installer.py b/src/context_engine/memory/hook_installer.py index 78ec653..813d6dd 100644 --- a/src/context_engine/memory/hook_installer.py +++ b/src/context_engine/memory/hook_installer.py @@ -77,6 +77,25 @@ def _is_windows() -> bool: [ -r "${PORT_FILE}" ] || exit 0 PORT="$(cat "${PORT_FILE}" 2>/dev/null)" [ -n "${PORT}" ] || exit 0 +# PORT is interpolated into a `bash -c` command and a curl URL below. +# A corrupted (or hostile) port file containing $(...) / backticks / +# newlines would otherwise be evaluated by the shell. Refuse anything +# that isn't a plain integer in the valid TCP range (Copilot review). +case "${PORT}" in + ''|*[!0-9]*) exit 0 ;; +esac +[ "${PORT}" -ge 1 ] 2>/dev/null && [ "${PORT}" -le 65535 ] 2>/dev/null || exit 0 + +# Quick TCP liveness probe via bash's /dev/tcp — if nothing is listening on +# the port (i.e. cce serve died but left its serve.port behind), bail out +# immediately instead of letting curl burn its full 1-2s timeout per hook +# call. On long Claude Code sessions with hundreds of PostToolUse events +# that adds up to many seconds of wasted wait (#67). If bash isn't +# available (rare on POSIX), fall through to the timed curl below — slower +# but still correct. +if command -v bash >/dev/null 2>&1; then + bash -c "exec 3<>/dev/tcp/127.0.0.1/${PORT}" 2>/dev/null || exit 0 +fi if [ "${HOOK_NAME}" = "SessionStart" ]; then RESPONSE="$(curl -sf -m 2 -X POST -H "Content-Type: application/json" \\ @@ -114,6 +133,22 @@ def _is_windows() -> bool: set /p PORT=<"%PORT_FILE%" if "%PORT%"=="" exit /b 0 +REM PORT is interpolated into the PowerShell -Command string and the +REM curl URL below. A corrupted (or hostile) port file containing +REM ');something(' would otherwise terminate the intended expression +REM and inject. Refuse anything that isn't a positive integer in the +REM valid TCP range (Copilot review). +echo %PORT%|findstr /R "^[1-9][0-9]*$" >nul 2>&1 +if errorlevel 1 exit /b 0 +if %PORT% LSS 1 exit /b 0 +if %PORT% GTR 65535 exit /b 0 + +REM Liveness probe before spending curl's full timeout. PowerShell's +REM TcpClient is universally available on supported Windows targets and +REM exits in ~50ms when nothing's listening, vs. curl's 1-2s. +REM See the POSIX comment above for the motivation (#67). +powershell -NoProfile -Command "$ErrorActionPreference='Stop'; try { (New-Object Net.Sockets.TcpClient).Connect('127.0.0.1',%PORT%); exit 0 } catch { exit 1 }" >nul 2>&1 +if errorlevel 1 exit /b 0 if /i "%HOOK_NAME%"=="SessionStart" ( set "TMP_RESP=%TEMP%\\cce_hook_resp_%RANDOM%.txt" diff --git a/tests/indexer/test_embedder.py b/tests/indexer/test_embedder.py index 9f6685b..1215983 100644 --- a/tests/indexer/test_embedder.py +++ b/tests/indexer/test_embedder.py @@ -67,6 +67,7 @@ def test_resolve_parallel_invalid_env_falls_through(monkeypatch): assert _resolve_parallel() == 2 + # ─── Issue #66 regression coverage ────────────────────────────────────── @@ -117,3 +118,67 @@ def test_resolve_parallel_is_lazy(monkeypatch): monkeypatch.setenv("CCE_EMBED_PARALLEL", "0") second = _resolve_parallel() assert second is None + + +# ─── Issue #67 regression coverage ────────────────────────────────────── + + +def test_resolve_cache_dir_default(monkeypatch, tmp_path): + """No env var set → ~/.cache/fastembed, NOT /tmp.""" + from context_engine.indexer.embedder import _resolve_cache_dir + monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + monkeypatch.delenv("XDG_CACHE_HOME", raising=False) + fake_home = tmp_path / "home" + monkeypatch.setattr("pathlib.Path.home", classmethod(lambda cls: fake_home)) + got = _resolve_cache_dir() + assert got == fake_home / ".cache" / "fastembed" + + +def test_resolve_cache_dir_respects_fastembed_env(monkeypatch, tmp_path): + from context_engine.indexer.embedder import _resolve_cache_dir + monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False) + monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(tmp_path / "custom")) + assert _resolve_cache_dir() == tmp_path / "custom" + + +def test_resolve_cache_dir_cce_override_wins(monkeypatch, tmp_path): + """CCE_FASTEMBED_CACHE_PATH overrides fastembed's own env var so users + with multiple tools sharing the fastembed default can isolate CCE's + cache.""" + from context_engine.indexer.embedder import _resolve_cache_dir + monkeypatch.setenv("CCE_FASTEMBED_CACHE_PATH", str(tmp_path / "cce_path")) + monkeypatch.setenv("FASTEMBED_CACHE_PATH", str(tmp_path / "fast_path")) + assert _resolve_cache_dir() == tmp_path / "cce_path" + + +def test_resolve_cache_dir_xdg(monkeypatch, tmp_path): + from context_engine.indexer.embedder import _resolve_cache_dir + monkeypatch.delenv("CCE_FASTEMBED_CACHE_PATH", raising=False) + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) + assert _resolve_cache_dir() == tmp_path / "xdg" / "fastembed" + + +def test_sweep_incomplete_removes_stale_partial(tmp_path): + """Issue #67: a stalled huggingface_hub download leaves a 0-byte + `model_optimized.onnx.incomplete` file that crashes every subsequent + load. We must remove these on startup.""" + from context_engine.indexer.embedder import _sweep_incomplete_downloads + nested = tmp_path / "models--qdrant--bge" / "snapshots" / "abc" + nested.mkdir(parents=True) + bad = nested / "model_optimized.onnx.incomplete" + bad.write_bytes(b"") + good = nested / "tokenizer.json" + good.write_text("{}") + + removed = _sweep_incomplete_downloads(tmp_path) + assert removed == 1 + assert not bad.exists() + assert good.exists() # other cache files must survive + + +def test_sweep_incomplete_missing_dir_is_noop(tmp_path): + from context_engine.indexer.embedder import _sweep_incomplete_downloads + missing = tmp_path / "does_not_exist_yet" + assert _sweep_incomplete_downloads(missing) == 0 diff --git a/tests/indexer/test_git_hooks.py b/tests/indexer/test_git_hooks.py index a1e2e87..ef25dcb 100644 --- a/tests/indexer/test_git_hooks.py +++ b/tests/indexer/test_git_hooks.py @@ -46,3 +46,49 @@ def test_install_hooks_returns_empty_for_non_git(tmp_path): """Non-git directory should return empty list, not raise.""" result = install_hooks(project_dir=str(tmp_path)) assert result == [] + + +# ─── Issue #67 regression coverage ────────────────────────────────────── + + +def test_post_commit_hook_does_not_use_removed_flag(git_repo): + """`cce index --changed-only` flag was removed but the template still + referenced it, so every commit silently errored. The hook must invoke + bare `cce index` (which already does incremental indexing).""" + install_hooks(project_dir=str(git_repo)) + for name in ("post-commit", "post-checkout", "post-merge"): + content = (git_repo / ".git" / "hooks" / name).read_text() + assert "--changed-only" not in content, ( + f"{name} still references the removed --changed-only flag" + ) + # Sanity: the hook still triggers an index. + assert " index " in content or "index>" in content + + +def test_post_commit_hook_quotes_bin_path_with_spaces(tmp_path, monkeypatch): + """A resolved cce binary path with spaces (e.g. Alice Smith) must be + shell-quoted in the generated hook so it isn't tokenised by sh + (Copilot review on #70).""" + from context_engine.indexer import git_hooks + + spaced = tmp_path / "Alice Smith" / "bin" / "cce" + spaced.parent.mkdir(parents=True) + spaced.write_text("#!/bin/sh\necho cce\n") + monkeypatch.setattr( + git_hooks, "_resolve_cce_binary", lambda: str(spaced), + ) + + git_repo = tmp_path / "repo" + (git_repo / ".git" / "hooks").mkdir(parents=True) + git_hooks.install_hooks(project_dir=str(git_repo)) + + body = (git_repo / ".git" / "hooks" / "post-commit").read_text() + # The path must be quoted as one shell token. shlex.quote single- + # quotes anything containing whitespace, so the literal `Alice Smith` + # must live inside `'...'`. Confirm a `'` appears before any space + # in the path. + assert "'" in body, f"bin_path should be shell-quoted: {body!r}" + assert "Alice Smith" in body + quoted_idx = body.index("'") + space_in_path = body.index("Alice Smith") + len("Alice") + assert quoted_idx < space_in_path diff --git a/tests/integration/test_mcp_empty_index.py b/tests/integration/test_mcp_empty_index.py new file mode 100644 index 0000000..1d96a8e --- /dev/null +++ b/tests/integration/test_mcp_empty_index.py @@ -0,0 +1,117 @@ +"""Tests for context_search behaviour on an empty index (#67). + +Previously, the first context_search against an empty index silently +blocked while a full reindex ran underneath — from the MCP client's +side the call looked hung. The fix returns a status response and +spawns the reindex as a background task instead. +""" +from __future__ import annotations + +import asyncio +from unittest.mock import MagicMock + +import pytest + +from context_engine.config import Config +from context_engine.integration.mcp_server import ContextEngineMCP + + +def _make_mcp(tmp_path, monkeypatch, *, chunk_count: int): + project_dir = tmp_path / "demo" + project_dir.mkdir(parents=True, exist_ok=True) + storage_path = tmp_path / "storage" + monkeypatch.chdir(project_dir) + config = Config( + storage_path=str(storage_path), + embedding_model="BAAI/bge-small-en-v1.5", + ) + backend = MagicMock() + backend._vector_store.count.return_value = chunk_count + compressor = MagicMock() + embedder = MagicMock() + retriever = MagicMock() + return ContextEngineMCP( + retriever=retriever, backend=backend, compressor=compressor, + embedder=embedder, config=config, + ) + + +@pytest.mark.asyncio +async def test_context_search_returns_status_when_index_empty( + tmp_path, monkeypatch +): + mcp = _make_mcp(tmp_path, monkeypatch, chunk_count=0) + + # Patch run_indexing so the background task can't actually fire off a + # real pipeline run during the test (it would try to read the empty + # tmp dir but the import would still cost real wall-time). + indexing_called: list[bool] = [] + + async def _fake_run_indexing(*a, **kw): + indexing_called.append(True) + + monkeypatch.setattr( + "context_engine.indexer.pipeline.run_indexing", + _fake_run_indexing, + ) + + result = await mcp._handle_context_search({"query": "anything"}) + assert len(result) == 1 + text = result[0].text + # Must NOT pretend the search ran — has to tell the user the index + # is empty and what to do. + assert "empty" in text.lower() + assert "cce index" in text.lower() or "indexing" in text.lower() + # Background indexing must have been scheduled. Yielding once lets + # the create_task'd coroutine run (Copilot review: the previous + # version of the test recorded `indexing_called` but never + # asserted on it). + await asyncio.sleep(0) + assert indexing_called == [True], ( + "Background indexing should have been scheduled exactly once" + ) + + +@pytest.mark.asyncio +async def test_context_search_uses_real_retrieval_when_index_populated( + tmp_path, monkeypatch +): + """A populated index must NOT return the empty-index status message — + confirms the guard runs the right way around.""" + mcp = _make_mcp(tmp_path, monkeypatch, chunk_count=5) + + # Stub the retriever + compressor to keep the path lightweight. + async def _fake_retrieve(*a, **kw): + return [] + + async def _fake_compress(chunks, level): + return chunks + + mcp._retriever.retrieve = _fake_retrieve + mcp._compressor.compress = _fake_compress + + result = await mcp._handle_context_search({"query": "anything"}) + assert len(result) == 1 + text = result[0].text + # The empty-index banner must not appear when the store has rows. + assert "indexing has been started" not in text.lower() + assert "is empty" not in text.lower() + + +@pytest.mark.asyncio +async def test_ensure_indexed_returns_false_for_empty_and_true_for_populated( + tmp_path, monkeypatch +): + mcp_empty = _make_mcp(tmp_path / "a", monkeypatch, chunk_count=0) + + async def _fake_run_indexing(*a, **kw): + return None + + monkeypatch.setattr( + "context_engine.indexer.pipeline.run_indexing", _fake_run_indexing, + ) + + assert await mcp_empty._ensure_indexed() is False + + mcp_full = _make_mcp(tmp_path / "b", monkeypatch, chunk_count=10) + assert await mcp_full._ensure_indexed() is True diff --git a/tests/memory/test_hook_installer.py b/tests/memory/test_hook_installer.py index 0ba20c1..7adeea1 100644 --- a/tests/memory/test_hook_installer.py +++ b/tests/memory/test_hook_installer.py @@ -63,6 +63,63 @@ def test_posix_hook_script_body_when_platform_not_win(monkeypatch): assert "curl -sf" in body +# ─── Issue #67 regression coverage: liveness probe in hook scripts ───── + + +def test_posix_hook_has_tcp_liveness_probe(monkeypatch): + """POSIX hook must short-circuit when nothing's listening on the port + so a stale serve.port doesn't waste 1-2s/curl call (#67).""" + monkeypatch.setattr(hi, "_is_windows", lambda: False) + body = hi._hook_script_body() + # The probe uses bash's /dev/tcp pseudo-device. + assert "/dev/tcp/127.0.0.1" in body + # The probe gates curl: if it fails we exit BEFORE the curl call. + probe_pos = body.index("/dev/tcp/127.0.0.1") + curl_pos = body.index("curl -sf") + assert probe_pos < curl_pos, ( + "Liveness probe must run before any curl invocation" + ) + + +def test_windows_hook_has_tcp_liveness_probe(monkeypatch): + monkeypatch.setattr(hi, "_is_windows", lambda: True) + body = hi._hook_script_body() + # TcpClient is the cheapest port probe available without extra deps. + assert "TcpClient" in body + probe_pos = body.index("TcpClient") + curl_pos = body.index("curl -sf") + assert probe_pos < curl_pos + + +# ─── Copilot review: PORT must be validated before shell interpolation ─ + + +def test_posix_hook_rejects_non_numeric_port(monkeypatch): + """Corrupted/hostile serve.port (containing $() or backticks) must + not be interpolated into the bash -c command (Copilot security + review on #70).""" + monkeypatch.setattr(hi, "_is_windows", lambda: False) + body = hi._hook_script_body() + assert "*[!0-9]*" in body, "POSIX script must reject non-digit PORT" + assert "-le 65535" in body, "POSIX script must cap PORT at 65535" + validation_pos = body.index("*[!0-9]*") + probe_pos = body.index("/dev/tcp/127.0.0.1") + curl_pos = body.index("curl -sf") + assert validation_pos < probe_pos < curl_pos + + +def test_windows_hook_rejects_non_numeric_port(monkeypatch): + monkeypatch.setattr(hi, "_is_windows", lambda: True) + body = hi._hook_script_body() + assert "findstr /R" in body + assert "LSS 1" in body + assert "GTR 65535" in body + validation_pos = body.index("findstr /R") + probe_pos = body.index("TcpClient") + curl_pos = body.index("curl -sf") + assert validation_pos < probe_pos < curl_pos + + def test_install_settings_uses_cmd_quoting_on_windows(tmp_path: Path, monkeypatch): """On Windows, the hook command must use cmd.exe-style double quotes around the path. POSIX single-quotes (shlex.quote) would not dequote