From 9877f9df57e949acce92cddf5ca3414282277bcf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 03:51:23 +0000 Subject: [PATCH 1/2] Initial plan From 29fdda0913d48e4c6c721c0584b4faa271e1b207 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 03:58:42 +0000 Subject: [PATCH 2/2] Remove semantic search CLI command, MCP tool, and DSR adaptive retrieval from search.ts - Remove semantic CLI command from ai.ts aggregator - Remove semantic handler from CLI registry - Remove semantic_search MCP tool definition, handler, and schema - Remove semantic_search from MCP server schema map - Remove DSR adaptive retrieval pipeline from core/search.ts - Update MCP smoke test to verify semantic_search is not registered - Remove semantic_search tool call from MCP smoke test - Remove semantic CLI command tests from e2e test - Remove runAdaptiveRetrieval test from retrieval test Co-authored-by: mars167 <29228178+mars167@users.noreply.github.com> --- package-lock.json | 6 +++ src/cli/registry.ts | 10 +---- src/commands/ai.ts | 2 - src/core/search.ts | 47 -------------------- src/mcp/handlers/searchHandlers.ts | 69 +----------------------------- src/mcp/schemas/searchSchemas.ts | 16 ------- src/mcp/server.ts | 1 - src/mcp/tools/index.ts | 4 +- src/mcp/tools/searchTools.ts | 21 --------- test/e2e.test.js | 16 ------- test/mcp.smoke.test.js | 10 +---- test/retrieval.test.ts | 14 ------ 12 files changed, 11 insertions(+), 205 deletions(-) diff --git a/package-lock.json b/package-lock.json index 679d5bc..ce21975 100644 --- a/package-lock.json +++ b/package-lock.json @@ -451,6 +451,7 @@ "node_modules/@types/node": { "version": "25.0.9", "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -552,6 +553,7 @@ "node_modules/apache-arrow": { "version": "18.1.0", "license": "Apache-2.0", + "peer": true, "dependencies": { "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", @@ -969,6 +971,7 @@ "node_modules/express": { "version": "5.2.1", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -2043,6 +2046,7 @@ "version": "0.21.1", "hasInstallScript": true, "license": "MIT", + "peer": true, "dependencies": { "node-addon-api": "^8.0.0", "node-gyp-build": "^4.8.0" @@ -2218,6 +2222,7 @@ "node_modules/typescript": { "version": "5.9.3", "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -2326,6 +2331,7 @@ "node_modules/zod": { "version": "4.3.5", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/src/cli/registry.ts b/src/cli/registry.ts index 1c32062..ee615b1 100644 --- a/src/cli/registry.ts +++ b/src/cli/registry.ts @@ -17,11 +17,9 @@ import { handleGraphCallees, handleGraphChain, } from './handlers/graphHandlers'; -import { SemanticSearchSchema } from './schemas/semanticSchemas'; import { IndexRepoSchema } from './schemas/indexSchemas'; import { SearchSymbolsSchema } from './schemas/querySchemas'; import { SearchFilesSchema } from './schemas/queryFilesSchemas'; -import { handleSemanticSearch } from './handlers/semanticHandlers'; import { handleIndexRepo } from './handlers/indexHandlers'; import { handleSearchSymbols } from './handlers/queryHandlers'; import { handleSearchFiles } from './handlers/queryFilesHandlers'; @@ -42,17 +40,13 @@ import { handleRepoMap } from './handlers/repoMapHandler'; * Maps command keys to their schema + handler implementations. * * Command keys follow the pattern: - * - Top-level commands: 'index', 'semantic', 'status' - * - Subcommands: 'graph:find', 'graph:query', 'dsr:generate' + * - Top-level commands: 'index', 'status' + * - Subcommands: 'graph:find', 'graph:query' * * This will be populated as commands are migrated from src/commands/*.ts */ export const cliHandlers: Record> = { // Top-level commands - 'semantic': { - schema: SemanticSearchSchema, - handler: handleSemanticSearch, - }, 'index': { schema: IndexRepoSchema, handler: handleIndexRepo, diff --git a/src/commands/ai.ts b/src/commands/ai.ts index fb49b57..c22869d 100644 --- a/src/commands/ai.ts +++ b/src/commands/ai.ts @@ -2,7 +2,6 @@ import { Command } from 'commander'; import { indexCommand } from '../cli/commands/indexCommand.js'; import { queryCommand } from '../cli/commands/queryCommand.js'; import { queryFilesCommand } from '../cli/commands/queryFilesCommand.js'; -import { semanticCommand } from '../cli/commands/semanticCommand.js'; import { serveCommand, agentCommand } from '../cli/commands/serveCommands.js'; import { packCommand, unpackCommand } from '../cli/commands/archiveCommands.js'; import { hooksCommand } from '../cli/commands/hooksCommands.js'; @@ -18,7 +17,6 @@ export const aiCommand = new Command('ai') .addCommand(repoMapCommand) .addCommand(queryCommand) .addCommand(queryFilesCommand) - .addCommand(semanticCommand) .addCommand(graphCommand) .addCommand(packCommand) .addCommand(unpackCommand) diff --git a/src/core/search.ts b/src/core/search.ts index c98c7ff..9e3101d 100644 --- a/src/core/search.ts +++ b/src/core/search.ts @@ -1,11 +1,5 @@ import { dequantizeSQ8, cosineSimilarity, quantizeSQ8, SQ8Vector } from './sq8'; import { hashEmbedding } from './embedding'; -import { classifyQuery } from './retrieval/classifier'; -import { expandQuery } from './retrieval/expander'; -import { fuseResults } from './retrieval/fuser'; -import { rerank } from './retrieval/reranker'; -import { computeWeights, type WeightFeedback } from './retrieval/weights'; -import type { QueryType, RankedResult, RetrievalResult, RetrievalWeights } from './retrieval/types'; export interface SemanticHit { content_hash: string; @@ -13,22 +7,6 @@ export interface SemanticHit { text?: string; } -export interface AdaptiveQueryPlan { - query: string; - expanded: string[]; - queryType: QueryType; - weights: RetrievalWeights; -} - -export interface AdaptiveFusionOptions { - feedback?: WeightFeedback; - limit?: number; -} - -export interface AdaptiveFusionOutput extends AdaptiveQueryPlan { - results: RankedResult[]; -} - export function buildQueryVector(text: string, dim: number): SQ8Vector { const vec = hashEmbedding(text, { dim }); return quantizeSQ8(vec); @@ -39,28 +17,3 @@ export function scoreAgainst(q: SQ8Vector, item: { scale: number; qvec: Int8Arra const vf = dequantizeSQ8({ dim: item.dim, scale: item.scale, q: item.qvec }); return cosineSimilarity(qf, vf); } - -export function buildAdaptiveQueryPlan(query: string, feedback?: WeightFeedback): AdaptiveQueryPlan { - const q = String(query ?? '').trim(); - const queryType = classifyQuery(q); - const expanded = expandQuery(q, queryType); - const weights = computeWeights(queryType, feedback); - return { query: q, expanded, queryType, weights }; -} - -/** - * Runs the adaptive retrieval pipeline: classification -> expansion -> weighting -> fusion -> heuristic reranking. - * - * Note: This uses synchronous heuristic reranking. For higher quality but slower reranking using - * the ONNX Cross-Encoder, use the `CrossEncoderReranker` class directly (which is async). - */ -export function runAdaptiveRetrieval( - query: string, - candidates: RetrievalResult[], - options: AdaptiveFusionOptions = {} -): AdaptiveFusionOutput { - const plan = buildAdaptiveQueryPlan(query, options.feedback); - const fused = fuseResults(candidates, plan.weights, options.limit); - const results = rerank(plan.query, fused, { limit: options.limit }); - return { ...plan, results }; -} diff --git a/src/mcp/handlers/searchHandlers.ts b/src/mcp/handlers/searchHandlers.ts index 0e6ed51..c477b87 100644 --- a/src/mcp/handlers/searchHandlers.ts +++ b/src/mcp/handlers/searchHandlers.ts @@ -2,12 +2,10 @@ import type { ToolHandler } from '../types'; import { successResponse, errorResponse } from '../types'; import type { SearchSymbolsArgs, - SemanticSearchArgs, RepoMapArgs } from '../schemas'; import { resolveGitRoot, inferScanRoot, inferWorkspaceRoot } from '../../core/git'; import { defaultDbDir, openTablesByLang } from '../../core/lancedb'; -import { buildQueryVector, scoreAgainst } from '../../core/search'; import { checkIndex, resolveLangs } from '../../core/indexCheck'; import { generateRepoMap } from '../../core/repoMap'; import { buildCoarseWhere, filterAndRankSymbolRows, inferSymbolSearchMode, pickCoarseToken } from '../../core/symbolSearch'; @@ -253,69 +251,4 @@ export const handleSearchSymbols: ToolHandler = async (args) }); }; -export const handleSemanticSearch: ToolHandler = async (args) => { - const repoRoot = await resolveGitRoot(path.resolve(args.path)); - const query = args.query; - const topk = args.topk ?? 10; - const langSel = args.lang ?? 'auto'; - const withRepoMap = args.with_repo_map ?? false; - const wikiDir = resolveWikiDirInsideRepo(repoRoot, args.wiki_dir ?? ''); - const repoMapMaxFiles = args.repo_map_max_files ?? 20; - const repoMapMaxSymbols = args.repo_map_max_symbols ?? 5; - - const status = await checkIndex(repoRoot); - if (!status.ok) { - return errorResponse( - new Error('Index incompatible or missing'), - 'index_incompatible' - ); - } - - const langs = resolveLangs(status.found.meta ?? null, langSel as any); - const dim = typeof status.found.meta?.dim === 'number' ? status.found.meta.dim : 256; - const dbDir = defaultDbDir(repoRoot); - const { byLang } = await openTablesByLang({ - dbDir, - dim, - mode: 'open_only', - languages: langs - }); - const q = buildQueryVector(query, dim); - - const allScored: any[] = []; - for (const lang of langs) { - const t = byLang[lang]; - if (!t) continue; - const chunkRows = await t.chunks - .query() - .select(['content_hash', 'text', 'dim', 'scale', 'qvec_b64']) - .limit(1_000_000) - .toArray(); - for (const r of chunkRows as any[]) { - allScored.push({ - lang, - content_hash: String(r.content_hash), - score: scoreAgainst(q, { - dim: Number(r.dim), - scale: Number(r.scale), - qvec: new Int8Array(Buffer.from(String(r.qvec_b64), 'base64')) - }), - text: String(r.text) - }); - } - } - - const rows = allScored - .sort((a, b) => b.score - a.score) - .slice(0, topk); - const repoMap = withRepoMap - ? await buildRepoMapAttachment(repoRoot, wikiDir, repoMapMaxFiles, repoMapMaxSymbols) - : undefined; - - return successResponse({ - repoRoot, - lang: langSel, - rows, - ...(repoMap ? { repo_map: repoMap } : {}) - }); -}; + diff --git a/src/mcp/schemas/searchSchemas.ts b/src/mcp/schemas/searchSchemas.ts index 63c6513..037c3e7 100644 --- a/src/mcp/schemas/searchSchemas.ts +++ b/src/mcp/schemas/searchSchemas.ts @@ -29,22 +29,6 @@ export const SearchSymbolsArgsSchema = z.object({ export type SearchSymbolsArgs = z.infer; -/** - * Schema for semantic_search tool - */ -export const SemanticSearchArgsSchema = z.object({ - path: z.string().min(1, 'path is required'), - query: z.string().min(1, 'query is required'), - topk: z.number().int().positive().default(10), - lang: LangEnum.default('auto'), - with_repo_map: z.boolean().default(false), - repo_map_max_files: z.number().int().positive().default(20), - repo_map_max_symbols: z.number().int().positive().default(5), - wiki_dir: z.string().optional(), -}); - -export type SemanticSearchArgs = z.infer; - /** * Schema for repo_map tool */ diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 43e3c70..fee2a42 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -90,7 +90,6 @@ export class GitAIV2MCPServer { list_files: schemas.ListFilesArgsSchema, read_file: schemas.ReadFileArgsSchema, search_symbols: schemas.SearchSymbolsArgsSchema, - semantic_search: schemas.SemanticSearchArgsSchema, repo_map: schemas.RepoMapArgsSchema, ast_graph_query: schemas.AstGraphQueryArgsSchema, ast_graph_find: schemas.AstGraphFindArgsSchema, diff --git a/src/mcp/tools/index.ts b/src/mcp/tools/index.ts index 5462d45..1272db5 100644 --- a/src/mcp/tools/index.ts +++ b/src/mcp/tools/index.ts @@ -12,7 +12,6 @@ import { } from './fileTools'; import { searchSymbolsDefinition, - semanticSearchDefinition, repoMapDefinition } from './searchTools'; import { @@ -37,9 +36,8 @@ export const allTools: ToolDefinition[] = [ listFilesDefinition, readFileDefinition, - // Search tools (3) + // Search tools (2) searchSymbolsDefinition, - semanticSearchDefinition, repoMapDefinition, // AST graph tools (7) diff --git a/src/mcp/tools/searchTools.ts b/src/mcp/tools/searchTools.ts index c4318ae..99cc82b 100644 --- a/src/mcp/tools/searchTools.ts +++ b/src/mcp/tools/searchTools.ts @@ -1,7 +1,6 @@ import type { ToolDefinition } from '../types'; import { handleSearchSymbols, - handleSemanticSearch, handleRepoMap } from '../handlers'; @@ -28,26 +27,6 @@ export const searchSymbolsDefinition: ToolDefinition = { handler: handleSearchSymbols }; -export const semanticSearchDefinition: ToolDefinition = { - name: 'semantic_search', - description: 'Semantic search using SQ8 vectors stored in LanceDB (brute-force). Risk: low (read-only).', - inputSchema: { - type: 'object', - properties: { - query: { type: 'string' }, - path: { type: 'string', description: 'Repository root path' }, - topk: { type: 'number', default: 10 }, - lang: { type: 'string', enum: ['auto', 'all', 'java', 'ts'], default: 'auto' }, - with_repo_map: { type: 'boolean', default: false }, - repo_map_max_files: { type: 'number', default: 20 }, - repo_map_max_symbols: { type: 'number', default: 5 }, - wiki_dir: { type: 'string', description: 'Wiki dir relative to repo root (optional)' } - }, - required: ['path', 'query'] - }, - handler: handleSemanticSearch -}; - export const repoMapDefinition: ToolDefinition = { name: 'repo_map', description: 'Generate a lightweight repository map (ranked files + top symbols + wiki links). Risk: low (read-only).', diff --git a/test/e2e.test.js b/test/e2e.test.js index 0fbce27..cb36d84 100644 --- a/test/e2e.test.js +++ b/test/e2e.test.js @@ -156,22 +156,6 @@ test('git-ai works in Spring Boot and Vue repos', async () => { assert.ok(obj.rows.some(r => String(r.file || '').includes('app/src/main/java/'))); } - { - const res = runOk('node', [CLI, 'ai', 'semantic', 'hello controller', '--topk', '5'], springRepo); - const obj = JSON.parse(res.stdout); - assert.ok(Array.isArray(obj.hits)); - assert.ok(obj.hits.length > 0); - } - - { - const res = runOk('node', [CLI, 'ai', 'semantic', 'hello controller', '--topk', '5', '--with-repo-map', '--repo-map-files', '5', '--repo-map-symbols', '2'], springRepo); - const obj = JSON.parse(res.stdout); - assert.ok(Array.isArray(obj.hits)); - assert.ok(obj.repo_map && obj.repo_map.enabled === true); - assert.ok(Array.isArray(obj.repo_map.files)); - assert.ok(obj.repo_map.files.length > 0); - } - { const res = runOk('node', [CLI, 'ai', 'graph', 'find', 'HelloController'], springRepo); const obj = JSON.parse(res.stdout); diff --git a/test/mcp.smoke.test.js b/test/mcp.smoke.test.js index 351cf75..267109f 100644 --- a/test/mcp.smoke.test.js +++ b/test/mcp.smoke.test.js @@ -80,7 +80,7 @@ test('mcp server supports atomic tool calls via path arg', async () => { const toolNames = new Set((res.tools ?? []).map(t => t.name)); assert.ok(toolNames.has('search_symbols')); - assert.ok(toolNames.has('semantic_search')); + assert.ok(!toolNames.has('semantic_search'), 'semantic_search should not be registered'); assert.ok(toolNames.has('repo_map')); assert.ok(toolNames.has('get_repo')); assert.ok(toolNames.has('check_index')); @@ -150,14 +150,6 @@ test('mcp server supports atomic tool calls via path arg', async () => { assert.ok(parsed.repo_map.files.length > 0); } - { - const call = await client.callTool({ name: 'semantic_search', arguments: { path: repoDir, query: 'hello world', topk: 3 } }); - const text = String(call?.content?.[0]?.text ?? ''); - const parsed = text ? JSON.parse(text) : null; - assert.ok(parsed && Array.isArray(parsed.rows)); - assert.ok(parsed.rows.length > 0); - } - { const call = await client.callTool({ name: 'repo_map', arguments: { path: repoDir, max_files: 5, max_symbols: 2 } }); const text = String(call?.content?.[0]?.text ?? ''); diff --git a/test/retrieval.test.ts b/test/retrieval.test.ts index 8fb846c..8e5e40b 100644 --- a/test/retrieval.test.ts +++ b/test/retrieval.test.ts @@ -15,9 +15,6 @@ import { fuseResults } from '../dist/src/core/retrieval/fuser.js'; // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore dist module has no typings import { rerank } from '../dist/src/core/retrieval/reranker.js'; -// eslint-disable-next-line @typescript-eslint/ban-ts-comment -// @ts-ignore dist module has no typings -import { runAdaptiveRetrieval } from '../dist/src/core/search.js'; import type { QueryType, RetrievalResult } from '../src/core/retrieval/types'; test('classifyQuery identifies historical intent', () => { @@ -65,14 +62,3 @@ test('rerank boosts lexical overlap', () => { const ranked = rerank('auth', candidates, { query: 'auth', limit: 2 }); assert.equal(ranked[0]?.id, 'a'); }); - -test('runAdaptiveRetrieval produces fused and reranked results', () => { - const candidates: RetrievalResult[] = [ - { source: 'vector', id: 'vec', score: 0.8, text: 'semantic auth flow' }, - { source: 'graph', id: 'graph', score: 0.7, text: 'callers of auth' }, - ]; - const out = runAdaptiveRetrieval('auth flow', candidates, { limit: 2 }); - assert.equal(out.query, 'auth flow'); - assert.ok(out.weights.vectorWeight > 0); - assert.equal(out.results.length, 2); -});