Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions src/semble/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,18 @@ def _mcp_main() -> None:
help="Local directory or git URL to pre-index at startup (optional).",
)
parser.add_argument("--ref", default=None, help="Branch or tag to check out (git URLs only).")
parser.add_argument(
"--include-text-files",
action="store_true",
help="Also index non-code text files (.md, .yaml, .json, etc.).",
)
args = parser.parse_args()
if any(find_spec(dep) is None for dep in get_package_extras("semble", "mcp")):
print("MCP dependencies are not installed. Run: pip install 'semble[mcp]'", file=sys.stderr)
raise SystemExit(1)
from semble.mcp import serve

asyncio.run(serve(args.path, ref=args.ref))
asyncio.run(serve(args.path, ref=args.ref, include_text_files=args.include_text_files))


def _run_init(*, force: bool = False) -> None:
Expand All @@ -66,12 +71,22 @@ def _cli_main() -> None:
search_p.add_argument(
"-m", "--mode", default="hybrid", choices=["hybrid", "semantic", "bm25"], help="Search mode (default: hybrid)."
)
search_p.add_argument(
"--include-text-files",
action="store_true",
help="Also index non-code text files (.md, .yaml, .json, etc.).",
)

related_p = sub.add_parser("find-related", help="Find code similar to a specific location.")
related_p.add_argument("file_path", help="File path as shown in search results.")
related_p.add_argument("line", type=int, help="Line number (1-indexed).")
related_p.add_argument("path", nargs="?", default=".", help="Local path or git URL (default: current directory).")
related_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
related_p.add_argument(
"--include-text-files",
action="store_true",
help="Also index non-code text files (.md, .yaml, .json, etc.).",
)

init_p = sub.add_parser("init", help="Write .claude/agents/semble-search.md for Claude Code sub-agent support.")
init_p.add_argument("--force", action="store_true", help="Overwrite if the file already exists.")
Expand All @@ -82,7 +97,12 @@ def _cli_main() -> None:
_run_init(force=args.force)
return

index = SembleIndex.from_git(args.path) if _is_git_url(args.path) else SembleIndex.from_path(args.path)
include_text = args.include_text_files
index = (
SembleIndex.from_git(args.path, include_text_files=include_text)
if _is_git_url(args.path)
else SembleIndex.from_path(args.path, include_text_files=include_text)
)

if args.command == "search":
results = index.search(args.query, top_k=args.top_k, mode=args.mode)
Expand Down
19 changes: 14 additions & 5 deletions src/semble/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ async def find_related(
return server


async def serve(path: str | None = None, ref: str | None = None) -> None:
async def serve(path: str | None = None, ref: str | None = None, include_text_files: bool = False) -> None:
"""Start an MCP stdio server, optionally pre-indexing a default source."""
model = await asyncio.to_thread(load_model)
cache = _IndexCache(model=model)
cache = _IndexCache(model=model, include_text_files=include_text_files)
if path:
await cache.get(path, ref=ref)
if not _is_git_url(path):
Expand All @@ -132,9 +132,10 @@ async def serve(path: str | None = None, ref: str | None = None) -> None:
class _IndexCache:
"""Cache of indexed repos and local paths for the lifetime of the MCP server process."""

def __init__(self, model: Encoder) -> None:
def __init__(self, model: Encoder, include_text_files: bool = False) -> None:
"""Initialise an empty cache with a shared embedding model."""
self._model = model
self._include_text_files = include_text_files
self._tasks: OrderedDict[str, asyncio.Task[SembleIndex]] = OrderedDict() # ordered for LRU eviction
self._watcher_task: asyncio.Task[None] | None = None

Expand Down Expand Up @@ -173,11 +174,19 @@ async def get(self, source: str, ref: str | None = None) -> SembleIndex:
self._tasks.popitem(last=False)
if _is_git_url(source):
self._tasks[cache_key] = asyncio.create_task(
asyncio.to_thread(SembleIndex.from_git, source, ref=ref, model=self._model)
asyncio.to_thread(
SembleIndex.from_git,
source,
ref=ref,
model=self._model,
include_text_files=self._include_text_files,
)
)
else:
self._tasks[cache_key] = asyncio.create_task(
asyncio.to_thread(SembleIndex.from_path, cache_key, model=self._model)
asyncio.to_thread(
SembleIndex.from_path, cache_key, model=self._model, include_text_files=self._include_text_files
)
)
task = self._tasks[cache_key]
try:
Expand Down
Loading
Loading