diff --git a/src/cli/commands/queryFilesCommand.ts b/src/cli/commands/queryFilesCommand.ts new file mode 100644 index 0000000..b536a7c --- /dev/null +++ b/src/cli/commands/queryFilesCommand.ts @@ -0,0 +1,19 @@ +import { Command } from 'commander'; +import { executeHandler } from '../types.js'; + +export const queryFilesCommand = new Command('query-files') + .description('Query refs table by file name match (substring/prefix/wildcard/regex/fuzzy)') + .argument('', 'File name pattern to search') + .option('-p, --path ', 'Path inside the repository', '.') + .option('--limit ', 'Limit results', '50') + .option('--mode ', 'Mode: substring|prefix|wildcard|regex|fuzzy (default: auto)') + .option('--case-insensitive', 'Case-insensitive matching', false) + .option('--max-candidates ', 'Max candidates to fetch before filtering', '1000') + .option('--lang ', 'Language: auto|all|java|ts|python|go|rust|c|markdown|yaml', 'auto') + .option('--with-repo-map', 'Attach a lightweight repo map (ranked files + top symbols + wiki links)', false) + .option('--repo-map-files ', 'Max repo map files', '20') + .option('--repo-map-symbols ', 'Max repo map symbols per file', '5') + .option('--wiki ', 'Wiki directory (default: docs/wiki or wiki)', '') + .action(async (pattern, options) => { + await executeHandler('query-files', { pattern, ...options }); + }); diff --git a/src/cli/handlers/queryFilesHandlers.ts b/src/cli/handlers/queryFilesHandlers.ts new file mode 100644 index 0000000..dc5f80c --- /dev/null +++ b/src/cli/handlers/queryFilesHandlers.ts @@ -0,0 +1,303 @@ +import path from 'path'; +import fs from 'fs-extra'; +import { inferWorkspaceRoot, resolveGitRoot } from '../../core/git'; +import { defaultDbDir, openTablesByLang, type IndexLang } from '../../core/lancedb'; +import { queryManifestWorkspace } from '../../core/workspace'; +import { inferSymbolSearchMode, type SymbolSearchMode } from '../../core/symbolSearch'; +import { createLogger } from '../../core/log'; +import { resolveLangs } from '../../core/indexCheck'; +import { generateRepoMap, type FileRank } from '../../core/repoMap'; +import type { CLIResult, CLIError } from '../types'; +import { success, error } from '../types'; +import { resolveRepoContext, validateIndex, resolveLanguages, type RepoContext } from '../helpers'; +import type { SearchFilesInput } from '../schemas/queryFilesSchemas'; + +function isCLIError(value: unknown): value is CLIError { + return typeof value === 'object' && value !== null && 'ok' in value && (value as any).ok === false; +} + +async function buildRepoMapAttachment( + repoRoot: string, + options: { wiki: string; repoMapFiles: number; repoMapSymbols: number } +): Promise<{ enabled: boolean; wikiDir: string; files: FileRank[] } | { enabled: boolean; skippedReason: string }> { + try { + const wikiDir = resolveWikiDir(repoRoot, options.wiki); + const files = await generateRepoMap({ + repoRoot, + maxFiles: options.repoMapFiles, + maxSymbolsPerFile: options.repoMapSymbols, + wikiDir, + }); + return { enabled: true, wikiDir, files }; + } catch (e: any) { + return { enabled: false, skippedReason: String(e?.message ?? e) }; + } +} + +function resolveWikiDir(repoRoot: string, wikiOpt: string): string { + const w = String(wikiOpt ?? '').trim(); + if (w) return path.resolve(repoRoot, w); + const candidates = [path.join(repoRoot, 'docs', 'wiki'), path.join(repoRoot, 'wiki')]; + for (const c of candidates) { + if (fs.existsSync(c)) return c; + } + return ''; +} + +function inferLangFromFile(file: string): IndexLang { + const f = String(file); + if (f.endsWith('.md') || f.endsWith('.mdx')) return 'markdown'; + if (f.endsWith('.yml') || f.endsWith('.yaml')) return 'yaml'; + if (f.endsWith('.java')) return 'java'; + if (f.endsWith('.c') || f.endsWith('.h')) return 'c'; + if (f.endsWith('.go')) return 'go'; + if (f.endsWith('.py')) return 'python'; + if (f.endsWith('.rs')) return 'rust'; + return 'ts'; +} + +function filterWorkspaceRowsByLang(rows: any[], langSel: string): any[] { + const sel = String(langSel ?? 'auto'); + if (sel === 'auto' || sel === 'all') return rows; + const target = sel as IndexLang; + return rows.filter(r => inferLangFromFile(String((r as any).file ?? '')) === target); +} + +function escapeQuotes(s: string): string { + return s.replace(/'/g, "''"); +} + +function buildFileWhere(pattern: string, mode: SymbolSearchMode, caseInsensitive: boolean): string | null { + const safe = escapeQuotes(pattern); + if (!safe) return null; + const likeOp = caseInsensitive ? 'ILIKE' : 'LIKE'; + + if (mode === 'prefix') { + return `file ${likeOp} '${safe}%'`; + } + + if (mode === 'substring' || mode === 'wildcard') { + return `file ${likeOp} '%${safe}%'`; + } + + // For regex and fuzzy, we'll handle them in memory after fetching + return null; +} + +function buildRegex(pattern: string, caseInsensitive: boolean): RegExp | null { + try { + const flags = caseInsensitive ? 'i' : ''; + return new RegExp(pattern, flags); + } catch { + return null; + } +} + +function globToRegex(pattern: string, caseInsensitive: boolean): RegExp | null { + try { + const body = pattern + .split('') + .map(ch => { + if (ch === '*') return '.*'; + if (ch === '?') return '.'; + return escapeRegex(ch); + }) + .join(''); + const flags = caseInsensitive ? 'i' : ''; + return new RegExp(`^${body}$`, flags); + } catch { + return null; + } +} + +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function filterAndRankFileRows>( + rows: T[], + pattern: string, + mode: SymbolSearchMode, + caseInsensitive: boolean, + limit: number +): T[] { + const getFile = (r: any) => String(r?.file ?? ''); + const finalLimit = Math.max(1, limit); + + if (mode === 'substring' || mode === 'prefix') { + const p = caseInsensitive ? pattern.toLowerCase() : pattern; + const filtered = rows.filter(r => { + const f = getFile(r); + const fs = caseInsensitive ? f.toLowerCase() : f; + return mode === 'prefix' ? fs.startsWith(p) : fs.includes(p); + }); + return filtered.slice(0, finalLimit); + } + + if (mode === 'wildcard') { + const re = globToRegex(pattern, caseInsensitive); + if (!re) return []; + const filtered = rows.filter(r => re!.test(getFile(r))); + return filtered.slice(0, finalLimit); + } + + if (mode === 'regex') { + const re = buildRegex(pattern, caseInsensitive); + if (!re) return []; + const filtered = rows.filter(r => re!.test(getFile(r))); + return filtered.slice(0, finalLimit); + } + + // Fuzzy matching for files + const scored = rows + .map(r => { + const f = getFile(r); + const score = fuzzyFileScore(pattern, f, caseInsensitive); + return { r, score }; + }) + .filter(x => x.score >= 0) + .sort((a, b) => b.score - a.score) + .slice(0, finalLimit); + + return scored.map(x => x.r); +} + +function fuzzyFileScore(needle: string, haystack: string, caseInsensitive: boolean): number { + if (!needle) return 0; + const n = caseInsensitive ? needle.toLowerCase() : needle; + const h = caseInsensitive ? haystack.toLowerCase() : haystack; + + let i = 0; + let score = 0; + let lastMatch = -2; + + for (let j = 0; j < h.length && i < n.length; j++) { + if (h[j] === n[i]) { + score += j === lastMatch + 1 ? 2 : 1; + lastMatch = j; + i++; + } + } + + if (i < n.length) return -1; + return score; +} + +export async function handleSearchFiles(input: SearchFilesInput): Promise { + const log = createLogger({ component: 'cli', cmd: 'query-files' }); + const startedAt = Date.now(); + + const repoRoot = await resolveGitRoot(path.resolve(input.path)); + const mode = inferSymbolSearchMode(input.pattern, input.mode); + + if (inferWorkspaceRoot(repoRoot)) { + const res = await queryManifestWorkspace({ + manifestRepoRoot: repoRoot, + keyword: input.pattern, + limit: input.maxCandidates, + }); + const filteredByLang = filterWorkspaceRowsByLang(res.rows, input.lang); + const rows = filterAndRankFileRows( + filteredByLang, + input.pattern, + mode, + input.caseInsensitive, + input.limit + ); + log.info('query_files', { + ok: true, + repoRoot, + workspace: true, + mode, + case_insensitive: input.caseInsensitive, + limit: input.limit, + max_candidates: input.maxCandidates, + candidates: res.rows.length, + rows: rows.length, + duration_ms: Date.now() - startedAt, + }); + const repoMap = input.withRepoMap + ? { enabled: false, skippedReason: 'workspace_mode_not_supported' } + : undefined; + return success({ ...res, rows, ...(repoMap ? { repo_map: repoMap } : {}) }); + } + + const ctxOrError = await resolveRepoContext(input.path); + + if (isCLIError(ctxOrError)) { + return ctxOrError; + } + + const ctx = ctxOrError as RepoContext; + + const validationError = validateIndex(ctx); + if (validationError) { + return validationError; + } + + const langs = resolveLanguages(ctx.meta, input.lang); + if (langs.length === 0) { + return error('lang_not_available', { + lang: input.lang, + available: ctx.meta?.languages ?? [], + }); + } + + try { + const dbDir = defaultDbDir(ctx.repoRoot); + const dim = typeof ctx.meta?.dim === 'number' ? ctx.meta.dim : 256; + const { byLang } = await openTablesByLang({ dbDir, dim, mode: 'open_only', languages: langs as IndexLang[] }); + + // Build WHERE clause based on mode + const where = buildFileWhere(input.pattern, mode, input.caseInsensitive); + + const candidates: any[] = []; + for (const lang of langs) { + const t = byLang[lang as IndexLang]; + if (!t) continue; + + // Fetch candidates based on mode + // For regex/fuzzy, we fetch all and filter in memory + const shouldFetchAll = mode === 'regex' || mode === 'fuzzy'; + const rows = shouldFetchAll + ? await t.refs.query().limit(input.maxCandidates).toArray() + : where + ? await t.refs.query().where(where).limit(input.maxCandidates).toArray() + : await t.refs.query().limit(input.maxCandidates).toArray(); + + for (const r of rows as any[]) candidates.push({ ...r, lang }); + } + + // Filter and rank by file name + const rows = filterAndRankFileRows(candidates, input.pattern, mode, input.caseInsensitive, input.limit); + + log.info('query_files', { + ok: true, + repoRoot: ctx.repoRoot, + workspace: false, + lang: input.lang, + langs, + mode, + case_insensitive: input.caseInsensitive, + limit: input.limit, + max_candidates: input.maxCandidates, + candidates: candidates.length, + rows: rows.length, + duration_ms: Date.now() - startedAt, + }); + + const repoMap = input.withRepoMap ? await buildRepoMapAttachment(ctx.repoRoot, input) : undefined; + + return success({ + repoRoot: ctx.repoRoot, + count: rows.length, + lang: input.lang, + rows, + ...(repoMap ? { repo_map: repoMap } : {}), + }); + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + log.error('query_files', { ok: false, duration_ms: Date.now() - startedAt, err: message }); + return error('query_files_failed', { message }); + } +} diff --git a/src/cli/registry.ts b/src/cli/registry.ts index 144f3f6..1c32062 100644 --- a/src/cli/registry.ts +++ b/src/cli/registry.ts @@ -20,9 +20,11 @@ import { import { SemanticSearchSchema } from './schemas/semanticSchemas'; import { IndexRepoSchema } from './schemas/indexSchemas'; import { SearchSymbolsSchema } from './schemas/querySchemas'; +import { SearchFilesSchema } from './schemas/queryFilesSchemas'; import { handleSemanticSearch } from './handlers/semanticHandlers'; import { handleIndexRepo } from './handlers/indexHandlers'; import { handleSearchSymbols } from './handlers/queryHandlers'; +import { handleSearchFiles } from './handlers/queryFilesHandlers'; import { CheckIndexSchema, StatusSchema } from './schemas/statusSchemas'; import { handleCheckIndex, handleStatus } from './handlers/statusHandlers'; import { PackIndexSchema, UnpackIndexSchema } from './schemas/archiveSchemas'; @@ -59,6 +61,10 @@ export const cliHandlers: Record> = { schema: SearchSymbolsSchema, handler: handleSearchSymbols, }, + 'query-files': { + schema: SearchFilesSchema, + handler: handleSearchFiles, + }, 'status': { schema: StatusSchema, handler: handleStatus, diff --git a/src/cli/schemas/queryFilesSchemas.ts b/src/cli/schemas/queryFilesSchemas.ts new file mode 100644 index 0000000..049c53c --- /dev/null +++ b/src/cli/schemas/queryFilesSchemas.ts @@ -0,0 +1,20 @@ +import { z } from 'zod'; + +const languageEnum = z.enum(['auto', 'all', 'java', 'ts', 'python', 'go', 'rust', 'c', 'markdown', 'yaml']); +const searchModeEnum = z.enum(['substring', 'prefix', 'wildcard', 'regex', 'fuzzy']); + +export const SearchFilesSchema = z.object({ + pattern: z.string().min(1, 'Pattern is required'), + path: z.string().default('.'), + limit: z.coerce.number().int().positive().default(50), + mode: searchModeEnum.optional(), + caseInsensitive: z.boolean().default(false), + maxCandidates: z.coerce.number().int().positive().default(1000), + lang: languageEnum.default('auto'), + withRepoMap: z.boolean().default(false), + repoMapFiles: z.coerce.number().int().positive().default(20), + repoMapSymbols: z.coerce.number().int().positive().default(5), + wiki: z.string().default(''), +}); + +export type SearchFilesInput = z.infer; diff --git a/src/commands/ai.ts b/src/commands/ai.ts index 708a623..fb49b57 100644 --- a/src/commands/ai.ts +++ b/src/commands/ai.ts @@ -1,6 +1,7 @@ import { Command } from 'commander'; import { indexCommand } from '../cli/commands/indexCommand.js'; import { queryCommand } from '../cli/commands/queryCommand.js'; +import { queryFilesCommand } from '../cli/commands/queryFilesCommand.js'; import { semanticCommand } from '../cli/commands/semanticCommand.js'; import { serveCommand, agentCommand } from '../cli/commands/serveCommands.js'; import { packCommand, unpackCommand } from '../cli/commands/archiveCommands.js'; @@ -16,6 +17,7 @@ export const aiCommand = new Command('ai') .addCommand(statusCommand) .addCommand(repoMapCommand) .addCommand(queryCommand) + .addCommand(queryFilesCommand) .addCommand(semanticCommand) .addCommand(graphCommand) .addCommand(packCommand) diff --git a/test/queryFiles.test.ts b/test/queryFiles.test.ts new file mode 100644 index 0000000..427baeb --- /dev/null +++ b/test/queryFiles.test.ts @@ -0,0 +1,273 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +// @ts-ignore dist module has no typings +import { handleSearchFiles } from '../dist/src/cli/handlers/queryFilesHandlers.js'; + +const testPath = '.'; + +test('query-files: substring search finds test files', async () => { + const result = await handleSearchFiles({ + pattern: '.test.ts', + path: testPath, + limit: 50, + mode: 'substring', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); + assert(Array.isArray(result.rows), 'Result should contain rows array'); + assert(result.rows.length > 0, 'Should find at least one .test.ts file'); + assert( + result.rows.some((row: any) => row.file.includes('.test.ts')), + 'Results should include .test.ts files', + ); +}); + +test('query-files: prefix search finds src/core files', async () => { + const result = await handleSearchFiles({ + pattern: 'src/core', + path: testPath, + limit: 50, + mode: 'prefix', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); + assert(Array.isArray(result.rows), 'Result should contain rows array'); + assert(result.rows.length > 0, 'Should find files in src/core'); + assert( + result.rows.every((row: any) => row.file.startsWith('src/core')), + 'All results should start with src/core', + ); +}); + +test('query-files: case-insensitive substring', async () => { + const result = await handleSearchFiles({ + pattern: 'CLI', + path: testPath, + limit: 50, + mode: 'substring', + caseInsensitive: true, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); + assert(Array.isArray(result.rows), 'Result should contain rows array'); +}); + +test('query-files: language filtering works', async () => { + const result = await handleSearchFiles({ + pattern: '.test', + path: testPath, + limit: 50, + mode: 'substring', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); + assert(Array.isArray(result.rows), 'Result should contain rows array'); +}); + +test('query-files: limit parameter respected', async () => { + const limitResult = await handleSearchFiles({ + pattern: 'src', + path: testPath, + limit: 5, + mode: 'substring', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'all', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(limitResult.ok, 'Query should succeed'); + assert( + limitResult.rows.length <= 5, + 'Result count should not exceed limit of 5', + ); +}); + +test('query-files: wildcard search with asterisk', async () => { + const result = await handleSearchFiles({ + pattern: 'src/*/handlers', + path: testPath, + limit: 50, + mode: 'wildcard', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'all', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); +}); + +test('query-files: fuzzy search finds partial matches', async () => { + const result = await handleSearchFiles({ + pattern: 'qryfs', + path: testPath, + limit: 50, + mode: 'fuzzy', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); + assert(Array.isArray(result.rows), 'Result should contain rows array'); +}); + +test('query-files: regex search with pattern', async () => { + const result = await handleSearchFiles({ + pattern: '.*\\.test\\.ts$', + path: testPath, + limit: 50, + mode: 'regex', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); + assert(Array.isArray(result.rows), 'Result should contain rows array'); + assert( + result.rows.every((row: any) => /.*\.test\.ts$/.test(row.file)), + 'All results should match regex pattern', + ); +}); + +test('query-files: empty pattern returns error', async () => { + const result = await handleSearchFiles({ + pattern: '', + path: testPath, + limit: 50, + mode: 'substring', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); +}); + +test('query-files: invalid mode handled gracefully', async () => { + const result = await handleSearchFiles({ + pattern: 'test', + path: testPath, + limit: 50, + mode: 'invalid' as any, + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert( + !result.ok || result.rows.length >= 0, + 'Should either fail or return empty array', + ); +}); + +test('query-files: with repo map returns repo context', async () => { + const result = await handleSearchFiles({ + pattern: 'handler', + path: testPath, + limit: 10, + mode: 'substring', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: true, + repoMapFiles: 5, + repoMapSymbols: 3, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); +}); + +test('query-files: result objects have required fields', async () => { + const result = await handleSearchFiles({ + pattern: '.ts', + path: testPath, + limit: 10, + mode: 'substring', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'ts', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed'); + assert(result.rows.length > 0, 'Should find files'); + + const firstRow = result.rows[0]; + assert(firstRow.file, 'Result should have file field'); + assert(firstRow.ref_id, 'Result should have ref_id field'); + assert(firstRow.kind, 'Result should have kind field'); + assert(firstRow.symbol, 'Result should have symbol field'); +}); + +test('query-files: handles special characters in pattern', async () => { + const result = await handleSearchFiles({ + pattern: 'src/cli/', + path: testPath, + limit: 20, + mode: 'substring', + caseInsensitive: false, + maxCandidates: 1000, + lang: 'all', + withRepoMap: false, + repoMapFiles: 20, + repoMapSymbols: 5, + wiki: '', + }); + + assert(result.ok, 'Query should succeed with path separator'); + assert(Array.isArray(result.rows), 'Result should contain rows array'); +});