Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions src/semble/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,18 @@ def _mcp_main() -> None:
help="Local directory or git URL to pre-index at startup (optional).",
)
parser.add_argument("--ref", default=None, help="Branch or tag to check out (git URLs only).")
parser.add_argument(
"--include-text-files",
action="store_true",
help="Also index non-code text files (.md, .yaml, .json, etc.).",
)
args = parser.parse_args()
if any(find_spec(dep) is None for dep in get_package_extras("semble", "mcp")):
print("MCP dependencies are not installed. Run: pip install 'semble[mcp]'", file=sys.stderr)
raise SystemExit(1)
from semble.mcp import serve

asyncio.run(serve(args.path, ref=args.ref))
asyncio.run(serve(args.path, ref=args.ref, include_text_files=args.include_text_files))


def _run_init(*, force: bool = False) -> None:
Expand All @@ -66,12 +71,22 @@ def _cli_main() -> None:
search_p.add_argument(
"-m", "--mode", default="hybrid", choices=["hybrid", "semantic", "bm25"], help="Search mode (default: hybrid)."
)
search_p.add_argument(
"--include-text-files",
action="store_true",
help="Also index non-code text files (.md, .yaml, .json, etc.).",
)

related_p = sub.add_parser("find-related", help="Find code similar to a specific location.")
related_p.add_argument("file_path", help="File path as shown in search results.")
related_p.add_argument("line", type=int, help="Line number (1-indexed).")
related_p.add_argument("path", nargs="?", default=".", help="Local path or git URL (default: current directory).")
related_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
related_p.add_argument(
"--include-text-files",
action="store_true",
help="Also index non-code text files (.md, .yaml, .json, etc.).",
)

init_p = sub.add_parser("init", help="Write .claude/agents/semble-search.md for Claude Code sub-agent support.")
init_p.add_argument("--force", action="store_true", help="Overwrite if the file already exists.")
Expand All @@ -82,7 +97,12 @@ def _cli_main() -> None:
_run_init(force=args.force)
return

index = SembleIndex.from_git(args.path) if _is_git_url(args.path) else SembleIndex.from_path(args.path)
include_text_files = args.include_text_files
index = (
SembleIndex.from_git(args.path, include_text_files=include_text_files)
if _is_git_url(args.path)
else SembleIndex.from_path(args.path, include_text_files=include_text_files)
)

if args.command == "search":
results = index.search(args.query, top_k=args.top_k, mode=args.mode)
Expand Down
22 changes: 17 additions & 5 deletions src/semble/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ async def find_related(
return server


async def serve(path: str | None = None, ref: str | None = None) -> None:
async def serve(path: str | None = None, ref: str | None = None, include_text_files: bool = False) -> None:
"""Start an MCP stdio server, optionally pre-indexing a default source."""
model = await asyncio.to_thread(load_model)
cache = _IndexCache(model=model)
cache = _IndexCache(model=model, include_text_files=include_text_files)
if path:
await cache.get(path, ref=ref)
if not _is_git_url(path):
Expand All @@ -132,9 +132,10 @@ async def serve(path: str | None = None, ref: str | None = None) -> None:
class _IndexCache:
"""Cache of indexed repos and local paths for the lifetime of the MCP server process."""

def __init__(self, model: Encoder) -> None:
def __init__(self, model: Encoder, include_text_files: bool = False) -> None:
"""Initialise an empty cache with a shared embedding model."""
self._model = model
self._include_text_files = include_text_files
self._tasks: OrderedDict[str, asyncio.Task[SembleIndex]] = OrderedDict() # ordered for LRU eviction
self._watcher_task: asyncio.Task[None] | None = None

Expand Down Expand Up @@ -173,11 +174,22 @@ async def get(self, source: str, ref: str | None = None) -> SembleIndex:
self._tasks.popitem(last=False)
if _is_git_url(source):
self._tasks[cache_key] = asyncio.create_task(
asyncio.to_thread(SembleIndex.from_git, source, ref=ref, model=self._model)
asyncio.to_thread(
SembleIndex.from_git,
source,
ref=ref,
model=self._model,
include_text_files=self._include_text_files,
)
)
else:
self._tasks[cache_key] = asyncio.create_task(
asyncio.to_thread(SembleIndex.from_path, cache_key, model=self._model)
asyncio.to_thread(
SembleIndex.from_path,
cache_key,
model=self._model,
include_text_files=self._include_text_files,
)
)
task = self._tasks[cache_key]
try:
Expand Down
24 changes: 24 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,30 @@ def test_mcp_main_exits_with_message_when_extras_missing(
assert "pip install 'semble[mcp]'" in capsys.readouterr().err


def test_cli_search_passes_include_text_files(monkeypatch: pytest.MonkeyPatch) -> None:
"""--include-text-files is forwarded to SembleIndex.from_path."""
monkeypatch.setattr(sys, "argv", ["semble", "search", "query", "/some/path", "--include-text-files"])
fake_index = MagicMock()
fake_index.search.return_value = []
with patch("semble.cli.SembleIndex.from_path", return_value=fake_index) as mock_from_path:
_cli_main()
mock_from_path.assert_called_once_with("/some/path", include_text_files=True)


def test_cli_find_related_passes_include_text_files(monkeypatch: pytest.MonkeyPatch) -> None:
"""--include-text-files is forwarded to SembleIndex.from_path for find-related."""
chunk = make_chunk("def foo(): pass", "src/foo.py")
monkeypatch.setattr(
sys, "argv", ["semble", "find-related", "src/foo.py", "1", "/some/path", "--include-text-files"]
)
fake_index = MagicMock()
fake_index.chunks = [chunk]
fake_index.find_related.return_value = []
with patch("semble.cli.SembleIndex.from_path", return_value=fake_index) as mock_from_path:
_cli_main()
mock_from_path.assert_called_once_with("/some/path", include_text_files=True)


def test_agent_file_tools_are_bash_only() -> None:
"""The agent file must list only Bash and Read — no MCP tools that require schema loading."""
frontmatter = _CLAUDE_AGENT_FILE.split("---")[1]
Expand Down
15 changes: 15 additions & 0 deletions tests/test_mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,21 @@ def test_cache_evict(cache: _IndexCache, tmp_path: Path) -> None:
assert key not in cache._tasks


@pytest.mark.anyio
async def test_serve_passes_include_text_files(tmp_path: Path) -> None:
"""serve(include_text_files=True) forwards the flag when building the index."""
with (
patch("semble.mcp.load_model", return_value=MagicMock(spec=Encoder)),
patch("semble.mcp.SembleIndex.from_path", return_value=MagicMock()) as mock_from_path,
patch.object(_IndexCache, "start_watcher", new_callable=AsyncMock),
patch("mcp.server.fastmcp.FastMCP.run_stdio_async", new_callable=AsyncMock),
):
await serve(str(tmp_path), include_text_files=True)

_, kwargs = mock_from_path.call_args
assert kwargs.get("include_text_files") is True


def test_cache_evict_missing(cache: _IndexCache, tmp_path: Path) -> None:
"""evict() on an unknown path is a no-op."""
cache.evict(str(tmp_path)) # should not raise
Expand Down