From ac9ce2183dfc42a05027a602372bc64a2d940b38 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 16:58:45 +0530 Subject: [PATCH 01/10] Add architecture summary MCP tool --- src/main.c | 3 +- src/mcp/mcp.c | 361 +++++++++++- src/store/store.c | 1136 +++++++++++++++++++++++++++++++++++++- src/store/store.h | 63 +++ tests/test_integration.c | 15 + tests/test_mcp.c | 144 ++++- tests/test_store_arch.c | 214 +++++++ tests/test_store_nodes.c | 30 + 8 files changed, 1960 insertions(+), 6 deletions(-) diff --git a/src/main.c b/src/main.c index 70eadcdc..f442aa0c 100644 --- a/src/main.c +++ b/src/main.c @@ -149,7 +149,8 @@ static void print_help(void) { printf("\nSupported agents (auto-detected):\n"); printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); - printf(" get_code_snippet, get_graph_schema, get_architecture, search_code,\n"); + printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); + printf(" get_architecture_summary, search_code,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 3530acc3..47329a1c 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 14 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 15 graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -34,6 +34,7 @@ #endif #include #include // int64_t +#include #include #include #include @@ -79,6 +80,111 @@ static char *yy_doc_to_str(yyjson_mut_doc *doc) { return s; } +typedef struct { + char *buf; + size_t len; + size_t cap; + size_t limit; + bool truncated; +} markdown_builder_t; + +static void markdown_builder_init(markdown_builder_t *b, size_t limit) { + b->cap = 512; + b->buf = malloc(b->cap); + b->len = 0; + b->limit = limit; + b->truncated = false; + if (b->buf) { + b->buf[0] = '\0'; + } +} + +static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { + if (!b->buf) { + return false; + } + while (b->len + need + 1 > b->cap) { + b->cap *= 2; + b->buf = safe_realloc(b->buf, b->cap); + } + return true; +} + +static bool markdown_builder_append_raw(markdown_builder_t *b, const char *text) { + if (!b || !b->buf || !text || b->truncated) { + return false; + } + size_t add = strlen(text); + if (b->len + add > b->limit) { + b->truncated = true; + return false; + } + if (!markdown_builder_reserve(b, add)) { + return false; + } + memcpy(b->buf + b->len, text, add); + b->len += add; + b->buf[b->len] = '\0'; + return true; +} + +static bool markdown_builder_appendf(markdown_builder_t *b, const char *fmt, ...) { + if (!b || !b->buf || !fmt || b->truncated) { + return false; + } + + va_list ap; + va_start(ap, fmt); + va_list ap_copy; + va_copy(ap_copy, ap); + int needed = vsnprintf(NULL, 0, fmt, ap_copy); + va_end(ap_copy); + if (needed < 0) { + va_end(ap); + return false; + } + if (b->len + (size_t)needed > b->limit) { + b->truncated = true; + va_end(ap); + return false; + } + if (!markdown_builder_reserve(b, (size_t)needed)) { + va_end(ap); + return false; + } + vsnprintf(b->buf + b->len, b->cap - b->len, fmt, ap); + va_end(ap); + b->len += (size_t)needed; + return true; +} + +static char *markdown_builder_finish(markdown_builder_t *b) { + const char *note = "\n_Truncated at max_tokens._\n"; + if (!b || !b->buf) { + return NULL; + } + if (b->truncated) { + size_t note_len = strlen(note); + if (note_len <= b->limit) { + size_t keep_len = b->len; + size_t final_len = 0; + if (keep_len + note_len > b->limit) { + keep_len = b->limit - note_len; + } + final_len = keep_len + note_len; + if (final_len > b->len && + !markdown_builder_reserve(b, final_len - b->len)) { + return b->buf; + } + b->len = keep_len; + memcpy(b->buf + b->len, note, note_len); + b->len = final_len; + b->buf[b->len] = '\0'; + } + } + return b->buf; +} + /* ══════════════════════════════════════════════════════════════════ * JSON-RPC PARSING * ══════════════════════════════════════════════════════════════════ */ @@ -281,6 +387,17 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"aspects\":{\"type\":" "\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"project\"]}"}, + {"get_architecture_summary", + "Generate a structured markdown architecture summary from the existing SQLite graph, with " + "optional focus filtering and output size control.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\",\"description\":" + "\"Indexed project name (from list_projects).\"},\"project_path\":{\"type\":\"string\"," + "\"description\":\"Deprecated alias: path to the indexed project.\"},\"max_tokens\":{" + "\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output size. Controls " + "detail level.\"},\"focus\":{\"type\":\"string\",\"description\":\"Optional domain keyword " + "to zoom into (for example payment or inventory).\"}},\"anyOf\":[{\"required\":[" + "\"project\"]},{\"required\":[\"project_path\"]}]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -1218,6 +1335,245 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { return result; } +static bool same_project_path(const char *lhs, const char *rhs) { + if (!lhs || !rhs) { + return false; + } + + char lhs_real[4096]; + char rhs_real[4096]; +#ifdef _WIN32 + if (_fullpath(lhs_real, lhs, sizeof(lhs_real)) && _fullpath(rhs_real, rhs, sizeof(rhs_real))) { + return strcmp(lhs_real, rhs_real) == 0; + } +#else + if (realpath(lhs, lhs_real) && realpath(rhs, rhs_real)) { + return strcmp(lhs_real, rhs_real) == 0; + } +#endif + return strcmp(lhs, rhs) == 0; +} + +static char *handle_get_architecture_summary(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *project_path = cbm_mcp_get_string_arg(args, "project_path"); + char *focus = cbm_mcp_get_string_arg(args, "focus"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", 2000); + char *display_path = NULL; + + if (!project && !project_path) { + free(focus); + return cbm_mcp_text_result("project is required", true); + } + if (max_tokens <= 0) { + max_tokens = 2000; + } + + if (!project) { + project = cbm_project_name_from_path(project_path); + if (!project) { + free(project_path); + free(focus); + return cbm_mcp_text_result("unable to derive project name from project_path", true); + } + } + + cbm_store_t *store = resolve_store(srv, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(project_path); + free(focus); + return _res; + } + + cbm_project_t proj_info = {0}; + if (cbm_store_get_project(store, project, &proj_info) != CBM_STORE_OK) { + cbm_project_free_fields(&proj_info); + free(project); + free(project_path); + free(focus); + return cbm_mcp_text_result("project is not indexed", true); + } + if (project_path && proj_info.root_path && proj_info.root_path[0] && + !same_project_path(project_path, proj_info.root_path)) { + cbm_project_free_fields(&proj_info); + free(project); + free(project_path); + free(focus); + return cbm_mcp_text_result("project_path does not match project", true); + } + if (proj_info.root_path && proj_info.root_path[0]) { + display_path = heap_strdup(proj_info.root_path); + } else if (project_path && project_path[0]) { + display_path = heap_strdup(project_path); + } + cbm_project_free_fields(&proj_info); + + cbm_architecture_summary_t summary = {0}; + if (cbm_store_get_architecture_summary(store, project, focus, &summary) != CBM_STORE_OK) { + free(project); + free(project_path); + free(display_path); + free(focus); + return cbm_mcp_text_result("failed to build architecture summary", true); + } + + size_t char_budget = (size_t)max_tokens * 4U; + if (char_budget < 512) { + char_budget = 512; + } + markdown_builder_t md; + markdown_builder_init(&md, char_budget); + + const char *display_name = display_path ? cbm_path_base(display_path) : project; + (void)markdown_builder_appendf(&md, "## Project: %s\n", display_name ? display_name : project); + if (focus && focus[0]) { + (void)markdown_builder_appendf(&md, "Focus: %s\n", focus); + } + (void)markdown_builder_appendf(&md, "Files: %d | Functions: %d | Classes: %d | Routes: %d\n\n", + summary.total_files, summary.total_functions, + summary.total_classes, summary.total_routes); + + (void)markdown_builder_append_raw(&md, "## Key Files (by connectivity)\n"); + if (summary.file_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching files.\n\n"); + } else { + for (int i = 0; i < summary.file_count; i++) { + if (!markdown_builder_appendf(&md, "%d. %s - %d inbound calls, %d outbound\n", i + 1, + summary.files[i].file ? summary.files[i].file : "", + summary.files[i].inbound_calls, + summary.files[i].outbound_calls)) { + break; + } + if (summary.files[i].symbol_count > 0) { + (void)markdown_builder_append_raw(&md, " Key methods: "); + for (int j = 0; j < summary.files[i].symbol_count; j++) { + if (j > 0 && !markdown_builder_append_raw(&md, ", ")) { + break; + } + if (summary.files[i].symbols[j].span_lines > 0) { + (void)markdown_builder_appendf( + &md, "%s (%d lines)", + summary.files[i].symbols[j].name ? summary.files[i].symbols[j].name : "", + summary.files[i].symbols[j].span_lines); + } else { + (void)markdown_builder_appendf( + &md, "%s", + summary.files[i].symbols[j].name ? summary.files[i].symbols[j].name : ""); + } + } + (void)markdown_builder_append_raw(&md, "\n"); + } + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Route Map\n"); + if (summary.route_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching routes.\n\n"); + } else { + for (int i = 0; i < summary.route_count; i++) { + (void)markdown_builder_appendf( + &md, "%s %s", summary.routes[i].method ? summary.routes[i].method : "", + summary.routes[i].path ? summary.routes[i].path : ""); + if (summary.routes[i].handler && summary.routes[i].handler[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].handler); + } + if (summary.routes[i].service && summary.routes[i].service[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].service); + } + if (summary.routes[i].next && summary.routes[i].next[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].next); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Module Clusters (Louvain communities)\n"); + if (summary.cluster_count == 0) { + (void)markdown_builder_append_raw(&md, "No multi-file clusters found.\n\n"); + } else { + for (int i = 0; i < summary.cluster_count; i++) { + (void)markdown_builder_appendf(&md, "Cluster %d (%d files)\n", summary.clusters[i].id, + summary.clusters[i].file_count); + if (summary.clusters[i].core_file_count > 0) { + (void)markdown_builder_append_raw(&md, "Core: "); + for (int j = 0; j < summary.clusters[i].core_file_count; j++) { + if (j > 0) { + (void)markdown_builder_append_raw(&md, ", "); + } + (void)markdown_builder_appendf( + &md, "%s", + summary.clusters[i].core_files[j] + ? cbm_path_base(summary.clusters[i].core_files[j]) + : ""); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + if (summary.clusters[i].entry_point_count > 0) { + (void)markdown_builder_append_raw(&md, "Entry: "); + for (int j = 0; j < summary.clusters[i].entry_point_count; j++) { + if (j > 0) { + (void)markdown_builder_append_raw(&md, ", "); + } + (void)markdown_builder_appendf( + &md, "%s", + summary.clusters[i].entry_points[j] + ? summary.clusters[i].entry_points[j] + : ""); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + } + + (void)markdown_builder_append_raw(&md, "## High-Connectivity Functions (in_degree >= 5)\n"); + if (summary.function_count == 0) { + (void)markdown_builder_append_raw(&md, "None above threshold.\n\n"); + } else { + for (int i = 0; i < summary.function_count; i++) { + (void)markdown_builder_appendf( + &md, "%s - called by %d functions", + summary.functions[i].name ? summary.functions[i].name : "", + summary.functions[i].in_degree); + if (summary.functions[i].file && summary.functions[i].file[0]) { + (void)markdown_builder_appendf(&md, " [%s]", summary.functions[i].file); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Entry Points\n"); + if (summary.entry_point_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching entry points.\n"); + } else { + for (int i = 0; i < summary.entry_point_count; i++) { + (void)markdown_builder_appendf(&md, "%s: %d\n", + summary.entry_points[i].kind + ? summary.entry_points[i].kind + : "Other", + summary.entry_points[i].count); + } + } + + char *markdown = markdown_builder_finish(&md); + char *result = cbm_mcp_text_result(markdown ? markdown : "", false); + + free(markdown); + cbm_store_architecture_summary_free(&summary); + free(project); + free(project_path); + free(display_path); + free(focus); + return result; +} + static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *func_name = cbm_mcp_get_string_arg(args, "function_name"); char *project = cbm_mcp_get_string_arg(args, "project"); @@ -2704,6 +3060,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture") == 0) { return handle_get_architecture(srv, args_json); } + if (strcmp(tool_name, "get_architecture_summary") == 0) { + return handle_get_architecture_summary(srv, args_json); + } /* Pipeline-dependent tools */ if (strcmp(tool_name, "index_repository") == 0) { diff --git a/src/store/store.c b/src/store/store.c index 88aa7078..ca196255 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -9,11 +9,14 @@ // for ISO timestamp #include "store/store.h" +#include "foundation/hash_table.h" #include "foundation/platform.h" #include "foundation/compat.h" #include "foundation/compat_regex.h" #include +#include +#include #include #include #include @@ -246,6 +249,27 @@ static int configure_pragmas(cbm_store_t *s, bool in_memory) { return rc; } +static int configure_query_pragmas(cbm_store_t *s) { + int rc; + rc = exec_sql(s, "PRAGMA foreign_keys = ON;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA temp_store = MEMORY;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA busy_timeout = 10000;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA query_only = ON;"); + if (rc != CBM_STORE_OK) { + return rc; + } + return exec_sql(s, "PRAGMA mmap_size = 67108864;"); /* 64 MB */ +} + /* ── REGEXP function for SQLite ──────────────────────────────────── */ static void sqlite_regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) { @@ -375,8 +399,8 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { return NULL; } - /* Open read-write but do NOT create — returns SQLITE_CANTOPEN if absent. */ - int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READWRITE, NULL); + /* Open read-only and do NOT create — query tools should never need write access. */ + int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READONLY, NULL); if (rc != SQLITE_OK) { /* sqlite3_open_v2 allocates a handle even on failure — must close it. */ sqlite3_close(s->db); @@ -395,7 +419,7 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { sqlite3_create_function(s->db, "iregexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, sqlite_iregexp, NULL, NULL); - if (configure_pragmas(s, false) != CBM_STORE_OK) { + if (configure_query_pragmas(s) != CBM_STORE_OK) { sqlite3_close(s->db); free((void *)s->db_path); free(s); @@ -4085,6 +4109,1112 @@ void cbm_store_architecture_free(cbm_architecture_info_t *out) { memset(out, 0, sizeof(*out)); } +typedef struct { + int64_t node_id; + char *path; + int inbound_calls; + int outbound_calls; +} arch_summary_file_row_t; + +typedef struct { + int community; + int *member_indices; + int member_count; + int member_cap; +} arch_summary_cluster_row_t; + +static char *summary_focus_term(const char *focus) { + if (!focus || !focus[0]) { + return NULL; + } + size_t len = strlen(focus); + char *term = malloc(len + 1); + if (!term) { + return NULL; + } + for (size_t i = 0; i < len; i++) { + term[i] = (char)tolower((unsigned char)focus[i]); + } + term[len] = '\0'; + return term; +} + +static char *summary_focus_like(const char *focus) { + char *term = summary_focus_term(focus); + if (!term) { + return NULL; + } + size_t len = strlen(term); + char *like = malloc(len + 3); + if (!like) { + free(term); + return NULL; + } + like[0] = '%'; + memcpy(like + 1, term, len); + like[len + 1] = '%'; + like[len + 2] = '\0'; + free(term); + return like; +} + +static bool summary_text_matches(const char *focus_term, const char *text) { + if (!focus_term || !focus_term[0]) { + return true; + } + if (!text || !text[0]) { + return false; + } + return cbm_strcasestr(text, focus_term) != NULL; +} + +static int summary_count_nodes(cbm_store_t *s, const char *project, const char *label_sql, + const char *focus_like) { + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT COUNT(*) FROM nodes " + "WHERE project=?1 AND %s " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(name, '')) LIKE ?2 " + "OR lower(COALESCE(qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(file_path, '')) LIKE ?2 " + "OR lower(COALESCE(properties, '')) LIKE ?2);", + label_sql); + } else { + snprintf(sql, sizeof(sql), + "SELECT COUNT(*) FROM nodes " + "WHERE project=?1 AND %s " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%';", + label_sql); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_count_nodes"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + int count = 0; + if (sqlite3_step(stmt) == SQLITE_ROW) { + count = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return count; +} + +static int summary_file_row_cmp(const void *lhs, const void *rhs) { + const arch_summary_file_row_t *a = lhs; + const arch_summary_file_row_t *b = rhs; + int64_t a_score = (int64_t)a->inbound_calls + (int64_t)a->outbound_calls; + int64_t b_score = (int64_t)b->inbound_calls + (int64_t)b->outbound_calls; + if (a_score != b_score) { + return b_score > a_score ? 1 : -1; + } + if (a->inbound_calls != b->inbound_calls) { + return b->inbound_calls > a->inbound_calls ? 1 : -1; + } + if (!a->path) { + return 1; + } + if (!b->path) { + return -1; + } + return strcmp(a->path, b->path); +} + +static void summary_copy_json_scalar(yyjson_val *val, char *out, size_t out_sz) { + if (out_sz == 0) { + return; + } + out[0] = '\0'; + if (!val) { + return; + } + + if (yyjson_is_str(val)) { + snprintf(out, out_sz, "%s", yyjson_get_str(val)); + return; + } + if (yyjson_is_bool(val)) { + snprintf(out, out_sz, "%s", yyjson_get_bool(val) ? "true" : "false"); + return; + } + if (yyjson_is_int(val) || yyjson_is_sint(val)) { + snprintf(out, out_sz, "%lld", (long long)yyjson_get_sint(val)); + return; + } + if (yyjson_is_uint(val)) { + snprintf(out, out_sz, "%llu", (unsigned long long)yyjson_get_uint(val)); + return; + } + if (yyjson_is_real(val)) { + snprintf(out, out_sz, "%.17g", yyjson_get_real(val)); + } +} + +static void summary_extract_route_fields(const char *props, char *method, size_t method_sz, + char *path, size_t path_sz, char *handler, + size_t handler_sz) { + method[0] = '\0'; + path[0] = '\0'; + handler[0] = '\0'; + if (!props) { + return; + } + + yyjson_doc *doc = yyjson_read(props, strlen(props), 0); + if (!doc) { + return; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + if (yyjson_is_obj(root)) { + summary_copy_json_scalar(yyjson_obj_get(root, "method"), method, method_sz); + summary_copy_json_scalar(yyjson_obj_get(root, "path"), path, path_sz); + summary_copy_json_scalar(yyjson_obj_get(root, "handler"), handler, handler_sz); + } + yyjson_doc_free(doc); +} + +static int summary_collect_file_rows(cbm_store_t *s, const char *project, const char *focus_like, + arch_summary_file_row_t **out_rows, int *out_count, + cbm_louvain_edge_t **out_edges, int *out_edge_count) { + *out_rows = NULL; + *out_count = 0; + *out_edges = NULL; + *out_edge_count = 0; + + const char *files_sql_no_focus = + "SELECT id, COALESCE(NULLIF(file_path, ''), name) " + "FROM nodes " + "WHERE project=?1 AND label='File' " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%test%' " + "ORDER BY 2;"; + const char *files_sql_focus = + "SELECT n.id, COALESCE(NULLIF(n.file_path, ''), n.name) " + "FROM nodes n " + "WHERE n.project=?1 AND n.label='File' " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%test%' " + "AND (lower(COALESCE(n.file_path, '')) LIKE ?2 " + "OR lower(COALESCE(n.name, '')) LIKE ?2 " + "OR lower(COALESCE(n.qualified_name, '')) LIKE ?2 " + "OR EXISTS (SELECT 1 FROM nodes m " + " WHERE m.project = n.project " + " AND m.file_path = n.file_path " + " AND lower(COALESCE(m.file_path, '')) NOT LIKE '%test%' " + " AND (lower(COALESCE(m.name, '')) LIKE ?2 " + " OR lower(COALESCE(m.qualified_name, '')) LIKE ?2 " + " OR lower(COALESCE(m.file_path, '')) LIKE ?2 " + " OR lower(COALESCE(m.properties, '')) LIKE ?2))) " + "ORDER BY 2;"; + + sqlite3_stmt *stmt = NULL; + const char *files_sql = (focus_like && focus_like[0]) ? files_sql_focus : files_sql_no_focus; + if (sqlite3_prepare_v2(s->db, files_sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_files"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + int cap = 16; + int count = 0; + arch_summary_file_row_t *rows = calloc(cap, sizeof(arch_summary_file_row_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + int old_cap = cap; + cap *= 2; + rows = safe_realloc(rows, (size_t)cap * sizeof(arch_summary_file_row_t)); + memset(rows + old_cap, 0, (size_t)(cap - old_cap) * sizeof(arch_summary_file_row_t)); + } + rows[count].node_id = sqlite3_column_int64(stmt, 0); + rows[count].path = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + count++; + } + sqlite3_finalize(stmt); + + CBMHashTable *file_rows = cbm_ht_create(count > 0 ? (uint32_t)count * 2U : 32U); + if (!file_rows) { + for (int i = 0; i < count; i++) { + free(rows[i].path); + } + free(rows); + store_set_error(s, "summary_collect_file_rows: file lookup alloc failed"); + return CBM_STORE_ERR; + } + for (int i = 0; i < count; i++) { + if (rows[i].path && rows[i].path[0]) { + cbm_ht_set(file_rows, rows[i].path, (void *)((intptr_t)i + 1)); + } + } + + const char *edges_sql = + "SELECT src.file_path, dst.file_path " + "FROM edges e " + "JOIN nodes src ON src.id = e.source_id " + "JOIN nodes dst ON dst.id = e.target_id " + "WHERE e.project=?1 AND e.type='CALLS' " + "AND src.file_path <> '' AND dst.file_path <> '' " + "AND src.file_path <> dst.file_path " + "AND lower(src.file_path) NOT LIKE '%test%' " + "AND lower(dst.file_path) NOT LIKE '%test%';"; + + stmt = NULL; + if (sqlite3_prepare_v2(s->db, edges_sql, -1, &stmt, NULL) != SQLITE_OK) { + cbm_ht_free(file_rows); + for (int i = 0; i < count; i++) { + free(rows[i].path); + } + free(rows); + store_set_error_sqlite(s, "summary_collect_edges"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int edge_cap = 64; + int edge_count = 0; + cbm_louvain_edge_t *edges = malloc((size_t)edge_cap * sizeof(cbm_louvain_edge_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *src_file = (const char *)sqlite3_column_text(stmt, 0); + const char *dst_file = (const char *)sqlite3_column_text(stmt, 1); + intptr_t src_val = (intptr_t)(src_file ? cbm_ht_get(file_rows, src_file) : NULL); + intptr_t dst_val = (intptr_t)(dst_file ? cbm_ht_get(file_rows, dst_file) : NULL); + int src_idx = src_val ? (int)(src_val - 1) : -1; + int dst_idx = dst_val ? (int)(dst_val - 1) : -1; + if (src_idx < 0 || dst_idx < 0 || src_idx == dst_idx) { + continue; + } + + rows[src_idx].outbound_calls++; + rows[dst_idx].inbound_calls++; + + if (edge_count >= edge_cap) { + edge_cap *= 2; + edges = safe_realloc(edges, (size_t)edge_cap * sizeof(cbm_louvain_edge_t)); + } + edges[edge_count].src = rows[src_idx].node_id; + edges[edge_count].dst = rows[dst_idx].node_id; + edge_count++; + } + sqlite3_finalize(stmt); + cbm_ht_free(file_rows); + + *out_rows = rows; + *out_count = count; + *out_edges = edges; + *out_edge_count = edge_count; + return CBM_STORE_OK; +} + +static int summary_fill_key_symbols(cbm_store_t *s, const char *project, cbm_arch_summary_file_t *file) { + const char *sql = + "SELECT name, " + "CASE WHEN end_line >= start_line AND start_line > 0 " + "THEN end_line - start_line + 1 ELSE 0 END AS span " + "FROM nodes " + "WHERE project=?1 AND file_path=?2 " + "AND label IN ('Function','Method') " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "ORDER BY span DESC, name " + "LIMIT 3;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_fill_key_symbols"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + bind_text(stmt, 2, file->file); + + int cap = 4; + int count = 0; + cbm_arch_summary_symbol_t *symbols = calloc(cap, sizeof(cbm_arch_summary_symbol_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + cap *= 2; + symbols = safe_realloc(symbols, (size_t)cap * sizeof(cbm_arch_summary_symbol_t)); + } + symbols[count].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + symbols[count].span_lines = sqlite3_column_int(stmt, 1); + count++; + } + sqlite3_finalize(stmt); + + file->symbols = symbols; + file->symbol_count = count; + return CBM_STORE_OK; +} + +static int summary_find_handler_node(cbm_store_t *s, const char *project, const char *handler, + int64_t *out_id, char **out_file_path) { + *out_id = 0; + *out_file_path = NULL; + if (!handler || !handler[0]) { + return CBM_STORE_NOT_FOUND; + } + + char owner[256] = ""; + char member[256] = ""; + const char *sep = strchr(handler, '@'); + int sep_len = 1; + const char *scope = strstr(handler, "::"); + if (scope && (!sep || scope < sep)) { + sep = scope; + sep_len = 2; + } + + if (sep) { + size_t owner_len = (size_t)(sep - handler); + if (owner_len >= sizeof(owner)) { + owner_len = sizeof(owner) - 1; + } + memcpy(owner, handler, owner_len); + owner[owner_len] = '\0'; + snprintf(member, sizeof(member), "%s", sep + sep_len); + } else { + snprintf(member, sizeof(member), "%s", handler); + } + + cbm_node_t *nodes = NULL; + int count = 0; + if (cbm_store_find_nodes_by_name(s, project, member, &nodes, &count) != CBM_STORE_OK || + count == 0) { + cbm_store_free_nodes(nodes, count); + return CBM_STORE_NOT_FOUND; + } + + int pick = 0; + for (int i = 0; i < count; i++) { + if (owner[0] && + ((nodes[i].qualified_name && cbm_strcasestr(nodes[i].qualified_name, owner)) || + (nodes[i].file_path && cbm_strcasestr(nodes[i].file_path, owner)))) { + pick = i; + break; + } + if (!owner[0] && nodes[i].file_path && !cbm_is_test_file_path(nodes[i].file_path)) { + pick = i; + break; + } + } + + *out_id = nodes[pick].id; + if (nodes[pick].file_path && nodes[pick].file_path[0]) { + *out_file_path = heap_strdup(nodes[pick].file_path); + } + cbm_store_free_nodes(nodes, count); + return CBM_STORE_OK; +} + +static int summary_query_primary_callee(cbm_store_t *s, int64_t source_id, const char *source_file, + char **out_name, int64_t *out_id, char **out_file_path) { + *out_name = NULL; + if (out_id) { + *out_id = 0; + } + if (out_file_path) { + *out_file_path = NULL; + } + + const char *sql = + "SELECT n.id, n.name, COALESCE(n.file_path, '') " + "FROM edges e " + "JOIN nodes n ON n.id = e.target_id " + "WHERE e.source_id=?1 AND e.type='CALLS' " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "ORDER BY CASE " + " WHEN ?2 <> '' AND COALESCE(n.file_path, '') <> '' " + " AND COALESCE(n.file_path, '') <> ?2 THEN 0 " + " ELSE 1 " + " END, " + " CASE WHEN n.label IN ('Method','Function') THEN 0 ELSE 1 END, " + " n.name " + "LIMIT 1;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_query_primary_callee"); + return CBM_STORE_ERR; + } + sqlite3_bind_int64(stmt, 1, source_id); + bind_text(stmt, 2, source_file ? source_file : ""); + + if (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 1); + const char *file_path = (const char *)sqlite3_column_text(stmt, 2); + if (out_id) { + *out_id = sqlite3_column_int64(stmt, 0); + } + *out_name = heap_strdup(name); + if (out_file_path && file_path && file_path[0]) { + *out_file_path = heap_strdup(file_path); + } + } + sqlite3_finalize(stmt); + return CBM_STORE_OK; +} + +static int summary_collect_routes(cbm_store_t *s, const char *project, const char *focus_term, + cbm_arch_summary_route_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + + const char *sql = + "SELECT name, properties, COALESCE(file_path, '') " + "FROM nodes " + "WHERE project=?1 AND label='Route' " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "ORDER BY name;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_routes"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 8; + int count = 0; + cbm_arch_summary_route_t *routes = calloc(cap, sizeof(cbm_arch_summary_route_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 0); + const char *props = (const char *)sqlite3_column_text(stmt, 1); + const char *route_file = (const char *)sqlite3_column_text(stmt, 2); + if (cbm_is_test_file_path(route_file)) { + continue; + } + + char method[64]; + char path[512]; + char handler[256]; + summary_extract_route_fields(props, method, sizeof(method), path, sizeof(path), handler, + sizeof(handler)); + + if (!path[0] && name) { + snprintf(path, sizeof(path), "%s", name); + } + + int64_t handler_id = 0; + char *handler_file = NULL; + if (handler[0]) { + (void)summary_find_handler_node(s, project, handler, &handler_id, &handler_file); + } + + char *service = NULL; + char *next = NULL; + char *service_file = NULL; + int64_t service_id = 0; + if (handler_id > 0) { + (void)summary_query_primary_callee(s, handler_id, handler_file, &service, &service_id, + &service_file); + if (service_id > 0) { + (void)summary_query_primary_callee(s, service_id, + service_file ? service_file : handler_file, &next, + NULL, NULL); + } + } + + if (focus_term && focus_term[0] && + !summary_text_matches(focus_term, method) && + !summary_text_matches(focus_term, path) && + !summary_text_matches(focus_term, handler) && + !summary_text_matches(focus_term, service) && + !summary_text_matches(focus_term, next) && + !summary_text_matches(focus_term, handler_file)) { + free(service_file); + free(handler_file); + free(service); + free(next); + continue; + } + + if (count >= cap) { + cap *= 2; + routes = safe_realloc(routes, (size_t)cap * sizeof(cbm_arch_summary_route_t)); + } + + routes[count].method = heap_strdup(method[0] ? method : ""); + routes[count].path = heap_strdup(path[0] ? path : ""); + routes[count].handler = heap_strdup(handler[0] ? handler : ""); + routes[count].service = service; + routes[count].next = next; + routes[count].handler_file = handler_file; + free(service_file); + count++; + } + sqlite3_finalize(stmt); + + *out_arr = routes; + *out_count = count; + return CBM_STORE_OK; +} + +static void summary_cluster_add_member(arch_summary_cluster_row_t *cluster, int member_index) { + if (cluster->member_count >= cluster->member_cap) { + cluster->member_cap = cluster->member_cap ? cluster->member_cap * 2 : 4; + cluster->member_indices = + safe_realloc(cluster->member_indices, (size_t)cluster->member_cap * sizeof(int)); + } + cluster->member_indices[cluster->member_count++] = member_index; +} + +static const char *summary_handler_owner_token(const char *handler, char *buf, size_t buf_size) { + if (!handler || !handler[0] || !buf || buf_size == 0) { + return NULL; + } + + const char *sep = strchr(handler, '@'); + size_t len = sep ? (size_t)(sep - handler) : strlen(handler); + const char *scope = strstr(handler, "::"); + if (scope && (!sep || scope < sep)) { + len = (size_t)(scope - handler); + } + if (len == 0) { + return NULL; + } + if (len >= buf_size) { + len = buf_size - 1; + } + memcpy(buf, handler, len); + buf[len] = '\0'; + return buf; +} + +static int summary_cluster_cmp(const void *lhs, const void *rhs) { + const arch_summary_cluster_row_t *a = lhs; + const arch_summary_cluster_row_t *b = rhs; + return b->member_count - a->member_count; +} + +static bool summary_cluster_has_file(const arch_summary_cluster_row_t *cluster, + const arch_summary_file_row_t *rows, const char *path) { + if (!cluster || !rows || !path) { + return false; + } + for (int i = 0; i < cluster->member_count; i++) { + const char *candidate = rows[cluster->member_indices[i]].path; + if (candidate && strcmp(candidate, path) == 0) { + return true; + } + } + return false; +} + +static bool summary_cluster_matches_route(const arch_summary_cluster_row_t *cluster, + const arch_summary_file_row_t *rows, + const cbm_arch_summary_route_t *route) { + char owner_buf[256]; + const char *owner = NULL; + + if (!cluster || !rows || !route) { + return false; + } + if (summary_cluster_has_file(cluster, rows, route->handler_file)) { + return true; + } + + owner = summary_handler_owner_token(route->handler, owner_buf, sizeof(owner_buf)); + if (!owner || !owner[0]) { + return false; + } + + for (int i = 0; i < cluster->member_count; i++) { + const char *candidate = rows[cluster->member_indices[i]].path; + if (candidate && cbm_strcasestr(candidate, owner)) { + return true; + } + } + return false; +} + +static int summary_build_clusters(const arch_summary_file_row_t *rows, int row_count, + const cbm_louvain_edge_t *edges, int edge_count, + const cbm_arch_summary_route_t *routes, int route_count, + cbm_arch_summary_cluster_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + if (row_count == 0 || edge_count == 0) { + return CBM_STORE_OK; + } + + int64_t *node_ids = malloc((size_t)row_count * sizeof(int64_t)); + for (int i = 0; i < row_count; i++) { + node_ids[i] = rows[i].node_id; + } + + cbm_louvain_result_t *results = NULL; + int result_count = 0; + int rc = cbm_louvain(node_ids, row_count, edges, edge_count, &results, &result_count); + free(node_ids); + if (rc != CBM_STORE_OK) { + return rc; + } + + CBMHashTable *row_by_id = cbm_ht_create(row_count > 0 ? (uint32_t)row_count * 2U : 32U); + char(*row_id_keys)[32] = NULL; + if (!row_by_id) { + free(results); + return CBM_STORE_ERR; + } + row_id_keys = calloc((size_t)row_count, sizeof(*row_id_keys)); + if (!row_id_keys) { + cbm_ht_free(row_by_id); + free(results); + return CBM_STORE_ERR; + } + for (int i = 0; i < row_count; i++) { + snprintf(row_id_keys[i], sizeof(row_id_keys[i]), "%lld", (long long)rows[i].node_id); + cbm_ht_set(row_by_id, row_id_keys[i], (void *)((intptr_t)i + 1)); + } + + int cap = 8; + int count = 0; + arch_summary_cluster_row_t *clusters = calloc(cap, sizeof(arch_summary_cluster_row_t)); + for (int i = 0; i < result_count; i++) { + char result_key[32]; + snprintf(result_key, sizeof(result_key), "%lld", (long long)results[i].node_id); + intptr_t row_val = (intptr_t)cbm_ht_get(row_by_id, result_key); + int member_index = row_val ? (int)(row_val - 1) : -1; + if (member_index < 0) { + continue; + } + + int slot = -1; + for (int j = 0; j < count; j++) { + if (clusters[j].community == results[i].community) { + slot = j; + break; + } + } + if (slot < 0) { + if (count >= cap) { + int old_cap = cap; + cap *= 2; + clusters = safe_realloc(clusters, (size_t)cap * sizeof(arch_summary_cluster_row_t)); + memset(clusters + old_cap, 0, + (size_t)(cap - old_cap) * sizeof(arch_summary_cluster_row_t)); + } + slot = count++; + clusters[slot].community = results[i].community; + } + summary_cluster_add_member(&clusters[slot], member_index); + } + free(row_id_keys); + cbm_ht_free(row_by_id); + free(results); + + int write_idx = 0; + for (int i = 0; i < count; i++) { + if (clusters[i].member_count < 2) { + free(clusters[i].member_indices); + continue; + } + if (write_idx != i) { + clusters[write_idx] = clusters[i]; + } + write_idx++; + } + count = write_idx; + if (count == 0) { + free(clusters); + return CBM_STORE_OK; + } + + qsort(clusters, (size_t)count, sizeof(arch_summary_cluster_row_t), summary_cluster_cmp); + if (count > 6) { + for (int i = 6; i < count; i++) { + free(clusters[i].member_indices); + } + count = 6; + } + + cbm_arch_summary_cluster_t *out = calloc((size_t)count, sizeof(cbm_arch_summary_cluster_t)); + for (int i = 0; i < count; i++) { + out[i].id = i + 1; + out[i].file_count = clusters[i].member_count; + + int core_count = clusters[i].member_count < 3 ? clusters[i].member_count : 3; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + out[i].core_files = calloc((size_t)core_count, sizeof(char *)); + out[i].core_file_count = core_count; + for (int core = 0; core < core_count; core++) { + int best_idx = -1; + int best_score = -1; + for (int m = 0; m < clusters[i].member_count; m++) { + int row_idx = clusters[i].member_indices[m]; + int score = rows[row_idx].inbound_calls + rows[row_idx].outbound_calls; + bool already_used = false; + for (int prev = 0; prev < core; prev++) { + const char *used = out[i].core_files[prev]; + if (used && rows[row_idx].path && strcmp(used, rows[row_idx].path) == 0) { + already_used = true; + break; + } + } + if (!already_used && score > best_score) { + best_score = score; + best_idx = row_idx; + } + } + if (best_idx >= 0) { + ((char **)out[i].core_files)[core] = heap_strdup(rows[best_idx].path); + } + } + + int entry_cap = 4; + int entry_count = 0; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **entries = calloc((size_t)entry_cap, sizeof(char *)); + for (int r = 0; r < route_count; r++) { + if (!summary_cluster_matches_route(&clusters[i], rows, &routes[r])) { + continue; + } + char label[768]; + if (routes[r].method && routes[r].method[0]) { + snprintf(label, sizeof(label), "%s %s", routes[r].method, + routes[r].path ? routes[r].path : ""); + } else { + snprintf(label, sizeof(label), "%s", routes[r].path ? routes[r].path : ""); + } + bool duplicate = false; + for (int e = 0; e < entry_count; e++) { + if (strcmp(entries[e], label) == 0) { + duplicate = true; + break; + } + } + if (duplicate) { + continue; + } + if (entry_count >= entry_cap) { + entry_cap *= 2; + entries = safe_realloc(entries, (size_t)entry_cap * sizeof(char *)); + } + entries[entry_count++] = heap_strdup(label); + } + out[i].entry_points = (const char **)entries; + out[i].entry_point_count = entry_count; + free(clusters[i].member_indices); + } + free(clusters); + + *out_arr = out; + *out_count = count; + return CBM_STORE_OK; +} + +static int summary_collect_hot_functions(cbm_store_t *s, const char *project, const char *focus_like, + cbm_arch_summary_function_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT n.name, COALESCE(n.file_path, ''), COUNT(*) AS fan_in " + "FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type='CALLS' " + "WHERE n.project=?1 " + "AND n.label IN ('Function','Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(n.name, '')) LIKE ?2 " + "OR lower(COALESCE(n.qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(n.file_path, '')) LIKE ?2 " + "OR lower(COALESCE(n.properties, '')) LIKE ?2) " + "GROUP BY n.id " + "HAVING fan_in >= 5 " + "ORDER BY fan_in DESC, n.name " + "LIMIT 15;"); + } else { + snprintf(sql, sizeof(sql), + "SELECT n.name, COALESCE(n.file_path, ''), COUNT(*) AS fan_in " + "FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type='CALLS' " + "WHERE n.project=?1 " + "AND n.label IN ('Function','Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%%test%%' " + "GROUP BY n.id " + "HAVING fan_in >= 5 " + "ORDER BY fan_in DESC, n.name " + "LIMIT 15;"); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_hot_functions"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + int cap = 8; + int count = 0; + cbm_arch_summary_function_t *arr = calloc(cap, sizeof(cbm_arch_summary_function_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + cap *= 2; + arr = safe_realloc(arr, (size_t)cap * sizeof(cbm_arch_summary_function_t)); + } + arr[count].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[count].file = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + arr[count].in_degree = sqlite3_column_int(stmt, 2); + count++; + } + sqlite3_finalize(stmt); + + *out_arr = arr; + *out_count = count; + return CBM_STORE_OK; +} + +static const char *summary_entry_group_name(const char *label) { + if (!label || !label[0]) { + return "Other"; + } + if (strcmp(label, "Route") == 0) { + return "Routes"; + } + if (cbm_strcasestr(label, "command")) { + return "Commands"; + } + if (cbm_strcasestr(label, "job")) { + return "Jobs"; + } + if (cbm_strcasestr(label, "cron") || cbm_strcasestr(label, "schedule")) { + return "Crons"; + } + if (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0) { + return "Functions"; + } + return "Other"; +} + +static int summary_add_entry_group(cbm_arch_summary_entry_group_t **groups, int *count, int *cap, + const char *kind, int add_count) { + if (!kind || add_count <= 0) { + return CBM_STORE_OK; + } + for (int i = 0; i < *count; i++) { + if (strcmp((*groups)[i].kind, kind) == 0) { + (*groups)[i].count += add_count; + return CBM_STORE_OK; + } + } + if (*count >= *cap) { + *cap = *cap ? *cap * 2 : 4; + *groups = safe_realloc(*groups, (size_t)*cap * sizeof(cbm_arch_summary_entry_group_t)); + } + (*groups)[*count].kind = heap_strdup(kind); + (*groups)[*count].count = add_count; + (*count)++; + return CBM_STORE_OK; +} + +static int summary_collect_entry_points(cbm_store_t *s, const char *project, const char *focus_like, + int route_count, cbm_arch_summary_entry_group_t **out_arr, + int *out_count) { + *out_arr = NULL; + *out_count = 0; + + int cap = 4; + int count = 0; + cbm_arch_summary_entry_group_t *groups = + calloc((size_t)cap, sizeof(cbm_arch_summary_entry_group_t)); + if (route_count > 0) { + (void)summary_add_entry_group(&groups, &count, &cap, "Routes", route_count); + } + + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT label, COUNT(*) " + "FROM nodes " + "WHERE project=?1 " + "AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(name, '')) LIKE ?2 " + "OR lower(COALESCE(qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(file_path, '')) LIKE ?2 " + "OR lower(COALESCE(properties, '')) LIKE ?2) " + "GROUP BY label;"); + } else { + snprintf(sql, sizeof(sql), + "SELECT label, COUNT(*) " + "FROM nodes " + "WHERE project=?1 " + "AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "GROUP BY label;"); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + free(groups); + store_set_error_sqlite(s, "summary_collect_entry_points"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *label = (const char *)sqlite3_column_text(stmt, 0); + int label_count = sqlite3_column_int(stmt, 1); + const char *kind = summary_entry_group_name(label); + if (strcmp(kind, "Routes") == 0) { + continue; + } + (void)summary_add_entry_group(&groups, &count, &cap, kind, label_count); + } + sqlite3_finalize(stmt); + + *out_arr = groups; + *out_count = count; + return CBM_STORE_OK; +} + +int cbm_store_get_architecture_summary(cbm_store_t *s, const char *project, const char *focus, + cbm_architecture_summary_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !project) { + return CBM_STORE_ERR; + } + + char *focus_like = summary_focus_like(focus); + char *focus_term = summary_focus_term(focus); + arch_summary_file_row_t *rows = NULL; + cbm_louvain_edge_t *edges = NULL; + int row_count = 0; + int edge_count = 0; + int rc = summary_collect_file_rows(s, project, focus_like, &rows, &row_count, &edges, &edge_count); + if (rc != CBM_STORE_OK) { + free(focus_like); + free(focus_term); + return rc; + } + out->total_files = summary_count_nodes(s, project, "label='File'", NULL); + out->total_functions = summary_count_nodes(s, project, "label IN ('Function','Method')", NULL); + out->total_classes = summary_count_nodes(s, project, "label IN ('Class','Interface')", NULL); + out->total_routes = summary_count_nodes(s, project, "label='Route'", NULL); + + qsort(rows, (size_t)row_count, sizeof(arch_summary_file_row_t), summary_file_row_cmp); + int top_file_count = row_count < 15 ? row_count : 15; + if (top_file_count > 0) { + out->files = calloc((size_t)top_file_count, sizeof(cbm_arch_summary_file_t)); + out->file_count = top_file_count; + for (int i = 0; i < top_file_count; i++) { + out->files[i].file = heap_strdup(rows[i].path); + out->files[i].inbound_calls = rows[i].inbound_calls; + out->files[i].outbound_calls = rows[i].outbound_calls; + rc = summary_fill_key_symbols(s, project, &out->files[i]); + if (rc != CBM_STORE_OK) { + break; + } + } + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_routes(s, project, focus_term, &out->routes, &out->route_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_build_clusters(rows, row_count, edges, edge_count, out->routes, out->route_count, + &out->clusters, &out->cluster_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_hot_functions(s, project, focus_like, &out->functions, + &out->function_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_entry_points(s, project, focus_like, out->total_routes, + &out->entry_points, &out->entry_point_count); + } + + for (int i = 0; i < row_count; i++) { + free(rows[i].path); + } + free(rows); + free(edges); + free(focus_like); + free(focus_term); + + if (rc != CBM_STORE_OK) { + cbm_store_architecture_summary_free(out); + } + return rc; +} + +void cbm_store_architecture_summary_free(cbm_architecture_summary_t *out) { + if (!out) { + return; + } + for (int i = 0; i < out->file_count; i++) { + free((void *)out->files[i].file); + for (int j = 0; j < out->files[i].symbol_count; j++) { + free((void *)out->files[i].symbols[j].name); + } + free(out->files[i].symbols); + } + free(out->files); + + for (int i = 0; i < out->route_count; i++) { + free((void *)out->routes[i].method); + free((void *)out->routes[i].path); + free((void *)out->routes[i].handler); + free((void *)out->routes[i].service); + free((void *)out->routes[i].next); + free((void *)out->routes[i].handler_file); + } + free(out->routes); + + for (int i = 0; i < out->cluster_count; i++) { + for (int j = 0; j < out->clusters[i].core_file_count; j++) { + free((void *)out->clusters[i].core_files[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].core_files); + for (int j = 0; j < out->clusters[i].entry_point_count; j++) { + free((void *)out->clusters[i].entry_points[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].entry_points); + } + free(out->clusters); + + for (int i = 0; i < out->function_count; i++) { + free((void *)out->functions[i].name); + free((void *)out->functions[i].file); + } + free(out->functions); + + for (int i = 0; i < out->entry_point_count; i++) { + free((void *)out->entry_points[i].kind); + } + free(out->entry_points); + memset(out, 0, sizeof(*out)); +} + /* ── ADR (Architecture Decision Record) ────────────────────────── */ static const char *canonical_sections[] = {"PURPOSE", "STACK", "ARCHITECTURE", diff --git a/src/store/store.h b/src/store/store.h index 17b0df11..d02fec77 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -514,6 +514,69 @@ int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char * int aspect_count, cbm_architecture_info_t *out); void cbm_store_architecture_free(cbm_architecture_info_t *out); +typedef struct { + const char *name; + int span_lines; +} cbm_arch_summary_symbol_t; + +typedef struct { + const char *file; + int inbound_calls; + int outbound_calls; + cbm_arch_summary_symbol_t *symbols; + int symbol_count; +} cbm_arch_summary_file_t; + +typedef struct { + const char *method; + const char *path; + const char *handler; + const char *service; + const char *next; + const char *handler_file; +} cbm_arch_summary_route_t; + +typedef struct { + int id; + int file_count; + const char **core_files; + int core_file_count; + const char **entry_points; + int entry_point_count; +} cbm_arch_summary_cluster_t; + +typedef struct { + const char *name; + const char *file; + int in_degree; +} cbm_arch_summary_function_t; + +typedef struct { + const char *kind; + int count; +} cbm_arch_summary_entry_group_t; + +typedef struct { + cbm_arch_summary_file_t *files; + cbm_arch_summary_route_t *routes; + cbm_arch_summary_cluster_t *clusters; + cbm_arch_summary_function_t *functions; + cbm_arch_summary_entry_group_t *entry_points; + int total_files; + int total_functions; + int total_classes; + int total_routes; + int file_count; + int route_count; + int cluster_count; + int function_count; + int entry_point_count; +} cbm_architecture_summary_t; + +int cbm_store_get_architecture_summary(cbm_store_t *s, const char *project, const char *focus, + cbm_architecture_summary_t *out); +void cbm_store_architecture_summary_free(cbm_architecture_summary_t *out); + /* ── ADR (Architecture Decision Record) ────────────────────────── */ #define CBM_ADR_MAX_LENGTH 8000 diff --git a/tests/test_integration.c b/tests/test_integration.c index 046cb856..318bce3e 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -367,6 +367,20 @@ TEST(integ_mcp_get_architecture) { PASS(); } +TEST(integ_mcp_get_architecture_summary) { + char args[512]; + snprintf(args, sizeof(args), + "{\"project\":\"%s\",\"max_tokens\":1200,\"focus\":\"main\"}", g_project); + + char *resp = call_tool("get_architecture_summary", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + ASSERT_NOT_NULL(strstr(resp, "## Key Files")); + ASSERT_NOT_NULL(strstr(resp, "main.py")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -554,6 +568,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_query_graph_calls); RUN_TEST(integ_mcp_get_graph_schema); RUN_TEST(integ_mcp_get_architecture); + RUN_TEST(integ_mcp_get_architecture_summary); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 77ec9c99..dd3c0bb2 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -6,10 +6,12 @@ #include "../src/foundation/compat.h" #include "test_framework.h" #include +#include #include #include #include #include +#include /* ══════════════════════════════════════════════════════════════════ * JSON-RPC PARSING @@ -129,7 +131,7 @@ TEST(mcp_initialize_response) { TEST(mcp_tools_list) { char *json = cbm_mcp_tools_list(); ASSERT_NOT_NULL(json); - /* Should contain all 14 tools */ + /* Should contain all 15 tools */ ASSERT_NOT_NULL(strstr(json, "index_repository")); ASSERT_NOT_NULL(strstr(json, "search_graph")); ASSERT_NOT_NULL(strstr(json, "query_graph")); @@ -137,6 +139,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_code_snippet")); ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); + ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); ASSERT_NOT_NULL(strstr(json, "delete_project")); @@ -490,6 +493,142 @@ TEST(tool_get_architecture_empty) { PASS(); } +TEST(tool_get_architecture_summary_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + + char *resp = + cbm_mcp_server_handle(srv, "{\"jsonrpc\":\"2.0\",\"id\":25,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{}}}"); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "project is required")); + free(resp); + + cbm_mcp_server_free(srv); + PASS(); +} + +static cbm_mcp_server_t *setup_arch_summary_server(char *tmp_dir, size_t tmp_sz) { + snprintf(tmp_dir, tmp_sz, "/tmp/cbm_mcp_arch_XXXXXX"); + if (!cbm_mkdtemp(tmp_dir)) { + return NULL; + } + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + rmdir(tmp_dir); + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + rmdir(tmp_dir); + return NULL; + } + + char *proj_name = cbm_project_name_from_path(tmp_dir); + if (!proj_name) { + cbm_mcp_server_free(srv); + rmdir(tmp_dir); + return NULL; + } + + cbm_mcp_server_set_project(srv, proj_name); + cbm_store_upsert_project(st, proj_name, tmp_dir); + + int64_t prev_fn_id = 0; + for (int i = 0; i < 24; i++) { + char file_name[64]; + char file_qn[128]; + char fn_name[32]; + char fn_qn[160]; + + snprintf(file_name, sizeof(file_name), "pkg/file%02d.go", i); + snprintf(file_qn, sizeof(file_qn), "%s.pkg.file%02d", proj_name, i); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", i); + snprintf(fn_qn, sizeof(fn_qn), "%s.pkg.file%02d.%s", proj_name, i, fn_name); + + cbm_node_t file = {.project = proj_name, + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(st, &file); + + cbm_node_t fn = {.project = proj_name, + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name, + .start_line = 1, + .end_line = 40 + i}; + int64_t fn_id = cbm_store_upsert_node(st, &fn); + if (prev_fn_id > 0) { + cbm_edge_t edge = { + .project = proj_name, .source_id = prev_fn_id, .target_id = fn_id, .type = "CALLS"}; + cbm_store_insert_edge(st, &edge); + } + prev_fn_id = fn_id; + } + + free(proj_name); + return srv; +} + +static void cleanup_arch_summary_server(char *tmp_dir, cbm_mcp_server_t *srv) { + cbm_mcp_server_free(srv); + if (tmp_dir && tmp_dir[0]) { + rmdir(tmp_dir); + } +} + +TEST(tool_get_architecture_summary_truncated) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + char *proj_name = cbm_project_name_from_path(tmp_dir); + ASSERT_NOT_NULL(proj_name); + + char req[1024]; + snprintf(req, sizeof(req), + "{\"jsonrpc\":\"2.0\",\"id\":26,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{\"project\":\"%s\",\"max_tokens\":1}}}", + proj_name); + + char *resp = cbm_mcp_server_handle(srv, req); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + ASSERT_NOT_NULL(strstr(resp, "_Truncated at max_tokens._")); + free(resp); + free(proj_name); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + +TEST(tool_get_architecture_summary_project_path_alias) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + + char req[1024]; + snprintf(req, sizeof(req), + "{\"jsonrpc\":\"2.0\",\"id\":27,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{\"project_path\":\"%s\",\"max_tokens\":64}}}", + tmp_dir); + + char *resp = cbm_mcp_server_handle(srv, req); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + free(resp); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + TEST(tool_query_graph_missing_query) { cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); @@ -1703,6 +1842,9 @@ SUITE(mcp) { RUN_TEST(tool_trace_missing_function_name); RUN_TEST(tool_delete_project_not_found); RUN_TEST(tool_get_architecture_empty); + RUN_TEST(tool_get_architecture_summary_missing_project); + RUN_TEST(tool_get_architecture_summary_truncated); + RUN_TEST(tool_get_architecture_summary_project_path_alias); RUN_TEST(tool_query_graph_missing_query); /* Pipeline-dependent tool handlers */ diff --git a/tests/test_store_arch.c b/tests/test_store_arch.c index 32663f3a..1427042a 100644 --- a/tests/test_store_arch.c +++ b/tests/test_store_arch.c @@ -379,6 +379,215 @@ TEST(arch_clusters) { PASS(); } +TEST(arch_summary_basic) { + cbm_store_t *s = setup_arch_test_store(); + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 5); + ASSERT_TRUE(summary.total_functions >= 5); + ASSERT_EQ(summary.total_routes, 1); + ASSERT_TRUE(summary.file_count > 0); + ASSERT_NOT_NULL(summary.files[0].file); + ASSERT_EQ(summary.route_count, 1); + ASSERT_STR_EQ(summary.routes[0].handler, "HandleRequest"); + ASSERT_STR_EQ(summary.routes[0].service, "ProcessOrder"); + ASSERT_STR_EQ(summary.routes[0].next, "ValidateOrder"); + ASSERT_TRUE(summary.entry_point_count > 0); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_focus) { + cbm_store_t *s = setup_arch_test_store(); + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", "service", &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 5); + ASSERT_TRUE(summary.total_functions >= 5); + ASSERT_TRUE(summary.file_count >= 1); + ASSERT_NOT_NULL(strstr(summary.files[0].file, "service.go")); + ASSERT_EQ(summary.total_routes, 1); + ASSERT_EQ(summary.route_count, 0); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_many_files) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + int64_t prev_fn_id = 0; + for (int i = 0; i < 20; i++) { + char file_name[64]; + char file_qn[96]; + char fn_name[32]; + char fn_qn[128]; + snprintf(file_name, sizeof(file_name), "pkg/file%02d.go", i); + snprintf(file_qn, sizeof(file_qn), "test.pkg.file%02d", i); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", i); + snprintf(fn_qn, sizeof(fn_qn), "test.pkg.file%02d.%s", i, fn_name); + + cbm_node_t file = {.project = "test", + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(s, &file); + + cbm_node_t fn = {.project = "test", + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name}; + int64_t fn_id = cbm_store_upsert_node(s, &fn); + if (prev_fn_id > 0) { + cbm_edge_t e = { + .project = "test", .source_id = prev_fn_id, .target_id = fn_id, .type = "CALLS"}; + cbm_store_insert_edge(s, &e); + } + prev_fn_id = fn_id; + } + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 20); + ASSERT_TRUE(summary.file_count > 0); + ASSERT_TRUE(summary.total_functions >= 20); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_cluster_growth) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + for (int pair = 0; pair < 9; pair++) { + int64_t pair_ids[2] = {0}; + for (int idx = 0; idx < 2; idx++) { + int file_no = pair * 2 + idx; + char file_name[64]; + char file_qn[96]; + char fn_name[32]; + char fn_qn[128]; + snprintf(file_name, sizeof(file_name), "cluster/file%02d.go", file_no); + snprintf(file_qn, sizeof(file_qn), "test.cluster.file%02d", file_no); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", file_no); + snprintf(fn_qn, sizeof(fn_qn), "test.cluster.file%02d.%s", file_no, fn_name); + + cbm_node_t file = {.project = "test", + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(s, &file); + + cbm_node_t fn = {.project = "test", + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name}; + pair_ids[idx] = cbm_store_upsert_node(s, &fn); + } + + cbm_edge_t edge = { + .project = "test", .source_id = pair_ids[0], .target_id = pair_ids[1], .type = "CALLS"}; + cbm_store_insert_edge(s, &edge); + } + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 18); + ASSERT_TRUE(summary.cluster_count > 0); + ASSERT_TRUE(summary.clusters[0].file_count >= 2); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_cluster_entry_fallback) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + cbm_node_t controller_file = {.project = "test", + .label = "File", + .name = "app/Http/Controllers/OrderController.php", + .qualified_name = "test.app.controllers.OrderController", + .file_path = "app/Http/Controllers/OrderController.php"}; + cbm_node_t service_file = {.project = "test", + .label = "File", + .name = "app/Services/OrderService.php", + .qualified_name = "test.app.services.OrderService", + .file_path = "app/Services/OrderService.php"}; + cbm_store_upsert_node(s, &controller_file); + cbm_store_upsert_node(s, &service_file); + + cbm_node_t controller_fn = {.project = "test", + .label = "Method", + .name = "handle", + .qualified_name = "test.app.controllers.OrderController.handle", + .file_path = "app/Http/Controllers/OrderController.php"}; + cbm_node_t service_fn = {.project = "test", + .label = "Method", + .name = "processOrder", + .qualified_name = "test.app.services.OrderService.processOrder", + .file_path = "app/Services/OrderService.php"}; + int64_t controller_id = cbm_store_upsert_node(s, &controller_fn); + int64_t service_id = cbm_store_upsert_node(s, &service_fn); + + cbm_edge_t edge = { + .project = "test", .source_id = controller_id, .target_id = service_id, .type = "CALLS"}; + cbm_store_insert_edge(s, &edge); + + cbm_node_t route = {.project = "test", + .label = "Route", + .name = "/orders", + .qualified_name = "test.routes.orders", + .properties_json = + "{\"method\":\"POST\",\"path\":\"/orders\",\"handler\":" + "\"OrderController@store\"}"}; + cbm_store_upsert_node(s, &route); + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.route_count, 1); + ASSERT_TRUE(summary.routes[0].handler_file == NULL); + ASSERT_TRUE(summary.cluster_count > 0); + + bool found_entry = false; + for (int i = 0; i < summary.cluster_count; i++) { + for (int j = 0; j < summary.clusters[i].entry_point_count; j++) { + if (strcmp(summary.clusters[i].entry_points[j], "POST /orders") == 0) { + found_entry = true; + break; + } + } + if (found_entry) { + break; + } + } + ASSERT_TRUE(found_entry); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + /* ── ADR tests ──────────────────────────────────────────────────── */ TEST(adr_store_and_retrieve) { @@ -978,6 +1187,11 @@ SUITE(store_arch) { RUN_TEST(arch_layers); RUN_TEST(arch_file_tree); RUN_TEST(arch_clusters); + RUN_TEST(arch_summary_basic); + RUN_TEST(arch_summary_focus); + RUN_TEST(arch_summary_many_files); + RUN_TEST(arch_summary_cluster_growth); + RUN_TEST(arch_summary_cluster_entry_fallback); /* ADR */ RUN_TEST(adr_store_and_retrieve); diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index b433ff2a..6cfc93f3 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include /* ── Schema / Open / Close ──────────────────────────────────────── */ @@ -111,6 +113,33 @@ TEST(store_project_delete) { PASS(); } +TEST(store_open_path_query_readonly_db) { + char path[] = "/tmp/cbm_store_query_XXXXXX"; + int fd = mkstemp(path); + ASSERT_TRUE(fd >= 0); + close(fd); + + cbm_store_t *writer = cbm_store_open_path(path); + ASSERT_NOT_NULL(writer); + ASSERT_EQ(cbm_store_upsert_project(writer, "readonly-proj", "/tmp/readonly-proj"), CBM_STORE_OK); + cbm_store_close(writer); + + ASSERT_EQ(chmod(path, 0444), 0); + + cbm_store_t *reader = cbm_store_open_path_query(path); + ASSERT_NOT_NULL(reader); + + cbm_project_t proj = {0}; + ASSERT_EQ(cbm_store_get_project(reader, "readonly-proj", &proj), CBM_STORE_OK); + ASSERT_STR_EQ(proj.root_path, "/tmp/readonly-proj"); + cbm_project_free_fields(&proj); + cbm_store_close(reader); + + chmod(path, 0644); + unlink(path); + PASS(); +} + /* ── Node CRUD ──────────────────────────────────────────────────── */ TEST(store_node_crud) { @@ -1511,6 +1540,7 @@ SUITE(store_nodes) { RUN_TEST(store_project_crud); RUN_TEST(store_project_update); RUN_TEST(store_project_delete); + RUN_TEST(store_open_path_query_readonly_db); RUN_TEST(store_node_crud); RUN_TEST(store_node_dedup); RUN_TEST(store_node_find_by_label); From b6f16cf472ba80a2539f167ac8cb4957e25619ce Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:27:28 +0530 Subject: [PATCH 02/10] Add PageRank ranking to graph tools --- src/main.c | 1 + src/mcp/mcp.c | 127 ++++++++- src/mcp/mcp.h | 2 +- src/pipeline/pipeline.c | 9 + src/pipeline/pipeline_incremental.c | 63 +++-- src/store/store.c | 419 +++++++++++++++++++++++++++- src/store/store.h | 23 +- tests/test_integration.c | 31 +- tests/test_mcp.c | 113 +++++++- tests/test_pipeline.c | 40 +++ tests/test_store_search.c | 77 +++++ 11 files changed, 865 insertions(+), 40 deletions(-) diff --git a/src/main.c b/src/main.c index f442aa0c..46c13e6f 100644 --- a/src/main.c +++ b/src/main.c @@ -151,6 +151,7 @@ static void print_help(void) { printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); printf(" get_architecture_summary, search_code,\n"); + printf(" get_key_symbols,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 47329a1c..8b313dbb 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 15 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 16 graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -348,7 +348,9 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"integer\"},\"max_degree\":{\"type\":\"integer\"},\"exclude_entry_points\":{" "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" "\"integer\",\"description\":\"Max results. Default: " - "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0}},\"required\":[\"project\"]}"}, + "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0},\"ranked\":{\"type\":\"boolean\"," + "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}}," + "\"required\":[\"project\"]}"}, {"query_graph", "Execute a Cypher query against the knowledge graph for complex multi-hop patterns, " @@ -365,8 +367,9 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"object\",\"properties\":{\"function_name\":{\"type\":\"string\"},\"project\":{" "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" - "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"function_" - "name\",\"project\"]}"}, + "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"ranked\":{\"type\":\"boolean\"," + "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}}," + "\"required\":[\"function_name\",\"project\"]}"}, {"get_code_snippet", "Read source code for a function/class/symbol. IMPORTANT: First call search_graph to find the " @@ -398,6 +401,14 @@ static const tool_def_t TOOLS[] = { "to zoom into (for example payment or inventory).\"}},\"anyOf\":[{\"required\":[" "\"project\"]},{\"required\":[\"project_path\"]}]}"}, + {"get_key_symbols", + "Human-readable ranked symbol list: top functions/classes by PageRank importance. Use this " + "for fast first-session orientation and central entry-point discovery.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"limit\":{\"type\":" + "\"integer\",\"default\":20},\"focus\":{\"type\":\"string\",\"description\":\"Optional " + "keyword to narrow symbols by name, qualified name, or file path.\"}},\"required\":[" + "\"project\"]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -611,6 +622,21 @@ bool cbm_mcp_get_bool_arg(const char *args_json, const char *key) { return result; } +static bool cbm_mcp_get_bool_arg_default(const char *args_json, const char *key, bool default_val) { + yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); + if (!doc) { + return default_val; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *val = yyjson_obj_get(root, key); + bool result = default_val; + if (val && yyjson_is_bool(val)) { + result = yyjson_get_bool(val); + } + yyjson_doc_free(doc); + return result; +} + /* ══════════════════════════════════════════════════════════════════ * MCP SERVER * ══════════════════════════════════════════════════════════════════ */ @@ -1061,6 +1087,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { int offset = cbm_mcp_get_int_arg(args, "offset", 0); int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); + bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); cbm_search_params_t params = { .project = project, @@ -1071,6 +1098,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { .offset = offset, .min_degree = min_degree, .max_degree = max_degree, + .sort_by = ranked ? "relevance" : "name", }; cbm_search_output_t out = {0}; @@ -1094,6 +1122,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { sr->node.file_path ? sr->node.file_path : ""); yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); yyjson_mut_arr_add_val(results, item); } yyjson_mut_obj_add_val(doc, root, "results", results); @@ -1335,6 +1364,82 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { return result; } +static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *focus = cbm_mcp_get_string_arg(args, "focus"); + int limit = cbm_mcp_get_int_arg(args, "limit", 20); + cbm_store_t *store = resolve_store(srv, project); + REQUIRE_STORE(store, project); + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(focus); + return not_indexed; + } + + cbm_key_symbol_t *symbols = NULL; + int count = 0; + if (cbm_store_get_key_symbols(store, project, focus, limit, &symbols, &count) != + CBM_STORE_OK) { + free(project); + free(focus); + return cbm_mcp_text_result("failed to load key symbols", true); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "project", project ? project : ""); + yyjson_mut_obj_add_int(doc, root, "count", count); + yyjson_mut_val *results = yyjson_mut_arr(doc); + for (int i = 0; i < count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", symbols[i].name ? symbols[i].name : ""); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + symbols[i].qualified_name ? symbols[i].qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", symbols[i].label ? symbols[i].label : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", + symbols[i].file_path ? symbols[i].file_path : ""); + yyjson_mut_obj_add_int(doc, item, "in_degree", symbols[i].in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", symbols[i].out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", symbols[i].pagerank); + yyjson_mut_arr_add_val(results, item); + } + yyjson_mut_obj_add_val(doc, root, "results", results); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + cbm_store_key_symbols_free(symbols, count); + free(project); + free(focus); + + { + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; + } +} + +static int node_hop_rank_cmp(const void *lhs, const void *rhs) { + const cbm_node_hop_t *a = lhs; + const cbm_node_hop_t *b = rhs; + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + if (a->hop != b->hop) { + return a->hop - b->hop; + } + if (!a->node.name || !b->node.name) { + return 0; + } + return strcmp(a->node.name, b->node.name); +} + static bool same_project_path(const char *lhs, const char *rhs) { if (!lhs || !rhs) { return false; @@ -1580,6 +1685,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_store_t *store = resolve_store(srv, project); char *direction = cbm_mcp_get_string_arg(args, "direction"); int depth = cbm_mcp_get_int_arg(args, "depth", 3); + bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); if (!func_name) { free(project); @@ -1645,6 +1751,10 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { if (do_outbound) { cbm_store_bfs(store, nodes[0].id, "outbound", edge_types, edge_type_count, depth, 100, &tr_out); + if (ranked && tr_out.visited_count > 1) { + qsort(tr_out.visited, (size_t)tr_out.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } yyjson_mut_val *callees = yyjson_mut_arr(doc); for (int i = 0; i < tr_out.visited_count; i++) { @@ -1655,6 +1765,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { doc, item, "qualified_name", tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); + yyjson_mut_obj_add_real(doc, item, "pagerank", tr_out.visited[i].pagerank); yyjson_mut_arr_add_val(callees, item); } yyjson_mut_obj_add_val(doc, root, "callees", callees); @@ -1663,6 +1774,10 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { if (do_inbound) { cbm_store_bfs(store, nodes[0].id, "inbound", edge_types, edge_type_count, depth, 100, &tr_in); + if (ranked && tr_in.visited_count > 1) { + qsort(tr_in.visited, (size_t)tr_in.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } yyjson_mut_val *callers = yyjson_mut_arr(doc); for (int i = 0; i < tr_in.visited_count; i++) { @@ -1673,6 +1788,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { doc, item, "qualified_name", tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); + yyjson_mut_obj_add_real(doc, item, "pagerank", tr_in.visited[i].pagerank); yyjson_mut_arr_add_val(callers, item); } yyjson_mut_obj_add_val(doc, root, "callers", callers); @@ -3060,6 +3176,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture") == 0) { return handle_get_architecture(srv, args_json); } + if (strcmp(tool_name, "get_key_symbols") == 0) { + return handle_get_key_symbols(srv, args_json); + } if (strcmp(tool_name, "get_architecture_summary") == 0) { return handle_get_architecture_summary(srv, args_json); } diff --git a/src/mcp/mcp.h b/src/mcp/mcp.h index 7e65912e..1f24dd8c 100644 --- a/src/mcp/mcp.h +++ b/src/mcp/mcp.h @@ -2,7 +2,7 @@ * mcp.h — MCP (Model Context Protocol) server for codebase-memory-mcp. * * Implements JSON-RPC 2.0 over stdio with the MCP tool calling protocol. - * Provides 14 graph analysis tools (search, trace, query, index, etc.) + * Provides 16 graph analysis tools (search, trace, query, index, etc.) */ #ifndef CBM_MCP_H #define CBM_MCP_H diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 66f47eac..41671775 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -816,8 +816,17 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { mtime_ns, fst.st_size); } } + if (cbm_store_compute_pagerank(hash_store, p->project_name, 20, 0.85) != + CBM_STORE_OK) { + cbm_log_error("pipeline.err", "phase", "pagerank", "project", p->project_name, + "error", cbm_store_error(hash_store)); + cbm_store_close(hash_store); + rc = -1; + goto cleanup; + } cbm_store_close(hash_store); cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); + cbm_log_info("pass.timing", "pass", "pagerank", "project", p->project_name); } } diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 1799f838..795f32d2 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -159,10 +159,11 @@ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_inf /* ── Incremental pipeline entry point ────────────────────────────── */ -int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_file_info_t *files, - int file_count) { - struct timespec t0; - cbm_clock_gettime(CLOCK_MONOTONIC, &t0); +int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_file_info_t *files, + int file_count) { + struct timespec t0; + struct timespec t; + cbm_clock_gettime(CLOCK_MONOTONIC, &t0); const char *project = cbm_pipeline_project_name(p); @@ -191,14 +192,25 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.classify", "changed", itoa_buf(n_changed), "unchanged", itoa_buf(n_unchanged), "deleted", itoa_buf(deleted_count)); - /* Fast path: nothing changed → skip */ - if (n_changed == 0 && deleted_count == 0) { - cbm_log_info("incremental.noop", "reason", "no_changes"); - free(is_changed); - free(deleted); - cbm_store_free_file_hashes(stored, stored_count); - cbm_store_close(store); - return 0; + /* Fast path: nothing changed → skip */ + if (n_changed == 0 && deleted_count == 0) { + cbm_log_info("incremental.noop", "reason", "no_changes"); + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { + cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + free(is_changed); + free(deleted); + cbm_store_free_file_hashes(stored, stored_count); + cbm_store_close(store); + return -1; + } + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + free(is_changed); + free(deleted); + cbm_store_free_file_hashes(stored, stored_count); + cbm_store_close(store); + return 0; } cbm_store_free_file_hashes(stored, stored_count); @@ -245,9 +257,8 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil .cancelled = cbm_pipeline_cancelled_ptr(p), }; - /* Run passes on changed files only */ - struct timespec t; - cbm_clock_gettime(CLOCK_MONOTONIC, &t); + /* Run passes on changed files only */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); cbm_pipeline_pass_definitions(&ctx, changed_files, ci); cbm_log_info("pass.timing", "pass", "incr_definitions", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); @@ -284,11 +295,23 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.merged", "nodes", itoa_buf(new_nodes), "edges", itoa_buf(new_edges)); - /* Persist updated file hashes for ALL files */ - persist_hashes(store, project, files, file_count); - - /* Cleanup */ - cbm_gbuf_free(gbuf); + /* Persist updated file hashes for ALL files */ + persist_hashes(store, project, files, file_count); + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { + cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + cbm_gbuf_free(gbuf); + cbm_registry_free(registry); + free(changed_files); + cbm_store_close(store); + return -1; + } + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + + /* Cleanup */ + cbm_gbuf_free(gbuf); cbm_registry_free(registry); free(changed_files); cbm_store_close(store); diff --git a/src/store/store.c b/src/store/store.c index ca196255..90ebc7d0 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -41,6 +41,8 @@ struct cbm_store { sqlite3 *db; const char *db_path; /* heap-allocated, or NULL for :memory: */ char errbuf[512]; + bool node_scores_checked; + bool node_scores_exists; /* Prepared statements (lazily initialized, cached for lifetime) */ sqlite3_stmt *stmt_upsert_node; @@ -125,6 +127,31 @@ static char *heap_strdup(const char *s) { return d; } +static bool store_has_node_scores_table(cbm_store_t *s) { + if (!s || !s->db) { + return false; + } + if (s->node_scores_checked) { + return s->node_scores_exists; + } + + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2( + s->db, + "SELECT 1 FROM sqlite_master WHERE type='table' AND name='node_scores' LIMIT 1;", -1, + &stmt, NULL); + if (rc != SQLITE_OK) { + s->node_scores_checked = true; + s->node_scores_exists = false; + return false; + } + + s->node_scores_exists = (sqlite3_step(stmt) == SQLITE_ROW); + s->node_scores_checked = true; + sqlite3_finalize(stmt); + return s->node_scores_exists; +} + /* Prepare a statement (cached). If already prepared, reset+clear. */ static sqlite3_stmt *prepare_cached(cbm_store_t *s, sqlite3_stmt **slot, const char *sql) { if (!s || !s->db) { @@ -200,6 +227,12 @@ static int init_schema(cbm_store_t *s) { " source_hash TEXT NOT NULL," " created_at TEXT NOT NULL," " updated_at TEXT NOT NULL" + ");" + "CREATE TABLE IF NOT EXISTS node_scores (" + " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," + " node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE," + " pagerank REAL NOT NULL," + " PRIMARY KEY (project, node_id)" ");"; return exec_sql(s, ddl); @@ -214,7 +247,8 @@ static int create_user_indexes(cbm_store_t *s) { "CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id, type);" "CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(project, type);" "CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(project, target_id, type);" - "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);"; + "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);" + "CREATE INDEX IF NOT EXISTS idx_node_scores_rank ON node_scores(project, pagerank DESC);"; return exec_sql(s, sql); } @@ -375,6 +409,9 @@ static cbm_store_t *store_open_internal(const char *path, bool in_memory) { return NULL; } + s->node_scores_checked = true; + s->node_scores_exists = true; + return s; } @@ -409,6 +446,8 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { } s->db_path = heap_strdup(db_path); + s->node_scores_checked = false; + s->node_scores_exists = false; /* Security: block ATTACH/DETACH to prevent file creation via SQL injection. */ sqlite3_set_authorizer(s->db, store_authorizer, NULL); @@ -599,7 +638,8 @@ int cbm_store_drop_indexes(cbm_store_t *s) { "DROP INDEX IF EXISTS idx_edges_target;" "DROP INDEX IF EXISTS idx_edges_type;" "DROP INDEX IF EXISTS idx_edges_target_type;" - "DROP INDEX IF EXISTS idx_edges_source_type;"); + "DROP INDEX IF EXISTS idx_edges_source_type;" + "DROP INDEX IF EXISTS idx_node_scores_rank;"); } int cbm_store_create_indexes(cbm_store_t *s) { @@ -1834,6 +1874,335 @@ int cbm_store_restore_from(cbm_store_t *dst, cbm_store_t *src) { return CBM_STORE_OK; } +/* ── PageRank ───────────────────────────────────────────────────── */ + +typedef struct { + int src_idx; + int dst_idx; +} cbm_pagerank_edge_ref_t; + +static int pagerank_find_node_index(const int64_t *node_ids, int count, int64_t node_id) { + int lo = 0; + int hi = count - 1; + while (lo <= hi) { + int mid = lo + ((hi - lo) / 2); + if (node_ids[mid] == node_id) { + return mid; + } + if (node_ids[mid] < node_id) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return -1; +} + +int cbm_store_compute_pagerank(cbm_store_t *s, const char *project, int iterations, double damping) { + int rc = CBM_STORE_OK; + sqlite3_stmt *stmt = NULL; + sqlite3_stmt *insert_stmt = NULL; + int64_t *node_ids = NULL; + int node_cap = 0; + int node_count = 0; + cbm_pagerank_edge_ref_t *edges = NULL; + int edge_cap = 0; + int edge_count = 0; + int *out_degree = NULL; + double *scores = NULL; + double *next_scores = NULL; + + if (!s || !s->db || !project) { + return CBM_STORE_ERR; + } + if (!store_has_node_scores_table(s)) { + store_set_error(s, "node_scores table is unavailable"); + return CBM_STORE_ERR; + } + if (iterations <= 0) { + iterations = 20; + } + if (damping <= 0.0 || damping >= 1.0) { + damping = 0.85; + } + + rc = sqlite3_prepare_v2( + s->db, + "SELECT id FROM nodes " + "WHERE project = ?1 AND label IN ('Function','Method','Class') " + "ORDER BY id;", + -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.nodes"); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (node_count >= node_cap) { + node_cap = node_cap > 0 ? node_cap * 2 : 128; + node_ids = safe_realloc(node_ids, (size_t)node_cap * sizeof(int64_t)); + } + node_ids[node_count++] = sqlite3_column_int64(stmt, 0); + } + sqlite3_finalize(stmt); + stmt = NULL; + + if (node_count > 0) { + out_degree = calloc((size_t)node_count, sizeof(int)); + scores = malloc((size_t)node_count * sizeof(double)); + next_scores = malloc((size_t)node_count * sizeof(double)); + if (!out_degree || !scores || !next_scores) { + store_set_error(s, "pagerank allocation failed"); + rc = CBM_STORE_ERR; + goto cleanup; + } + + rc = sqlite3_prepare_v2( + s->db, + "SELECT source_id, target_id FROM edges WHERE project = ?1 AND type = 'CALLS' " + "ORDER BY source_id, target_id;", + -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.edges"); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW) { + int src_idx = + pagerank_find_node_index(node_ids, node_count, sqlite3_column_int64(stmt, 0)); + int dst_idx = + pagerank_find_node_index(node_ids, node_count, sqlite3_column_int64(stmt, 1)); + if (src_idx < 0 || dst_idx < 0) { + continue; + } + if (edge_count >= edge_cap) { + edge_cap = edge_cap > 0 ? edge_cap * 2 : 256; + edges = safe_realloc(edges, (size_t)edge_cap * sizeof(cbm_pagerank_edge_ref_t)); + } + edges[edge_count].src_idx = src_idx; + edges[edge_count].dst_idx = dst_idx; + out_degree[src_idx]++; + edge_count++; + } + sqlite3_finalize(stmt); + stmt = NULL; + + for (int i = 0; i < node_count; i++) { + scores[i] = 1.0 / (double)node_count; + } + + for (int iter = 0; iter < iterations; iter++) { + double dangling_mass = 0.0; + double base = 0.0; + + for (int i = 0; i < node_count; i++) { + if (out_degree[i] == 0) { + dangling_mass += scores[i]; + } + } + + base = ((1.0 - damping) + (damping * dangling_mass)) / (double)node_count; + for (int i = 0; i < node_count; i++) { + next_scores[i] = base; + } + + for (int i = 0; i < edge_count; i++) { + int src_idx = edges[i].src_idx; + int dst_idx = edges[i].dst_idx; + if (out_degree[src_idx] > 0) { + next_scores[dst_idx] += + damping * (scores[src_idx] / (double)out_degree[src_idx]); + } + } + + { + double *tmp = scores; + scores = next_scores; + next_scores = tmp; + } + } + } + + rc = cbm_store_begin(s); + if (rc != CBM_STORE_OK) { + goto cleanup; + } + + rc = sqlite3_prepare_v2(s->db, "DELETE FROM node_scores WHERE project = ?1;", -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.delete"); + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "pagerank.delete"); + sqlite3_finalize(stmt); + stmt = NULL; + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + sqlite3_finalize(stmt); + stmt = NULL; + + if (node_count == 0) { + rc = cbm_store_commit(s); + goto cleanup; + } + + rc = sqlite3_prepare_v2( + s->db, "INSERT INTO node_scores (project, node_id, pagerank) VALUES (?1, ?2, ?3);", -1, + &insert_stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.insert"); + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + + for (int i = 0; i < node_count; i++) { + sqlite3_reset(insert_stmt); + sqlite3_clear_bindings(insert_stmt); + bind_text(insert_stmt, 1, project); + sqlite3_bind_int64(insert_stmt, 2, node_ids[i]); + sqlite3_bind_double(insert_stmt, 3, scores[i]); + if (sqlite3_step(insert_stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "pagerank.insert"); + sqlite3_finalize(insert_stmt); + insert_stmt = NULL; + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + } + + sqlite3_finalize(insert_stmt); + insert_stmt = NULL; + rc = cbm_store_commit(s); + if (rc != CBM_STORE_OK) { + goto cleanup; + } + + rc = CBM_STORE_OK; + +cleanup: + if (stmt) { + sqlite3_finalize(stmt); + } + if (insert_stmt) { + sqlite3_finalize(insert_stmt); + } + free(node_ids); + free(edges); + free(out_degree); + free(scores); + free(next_scores); + return rc; +} + +int cbm_store_get_key_symbols(cbm_store_t *s, const char *project, const char *focus, int limit, + cbm_key_symbol_t **out, int *count) { + sqlite3_stmt *stmt = NULL; + cbm_key_symbol_t *symbols = NULL; + int cap = 0; + int n = 0; + char *focus_like = NULL; + bool has_scores = false; + char sql[2048]; + + if (out) { + *out = NULL; + } + if (count) { + *count = 0; + } + if (!s || !s->db || !project || !out || !count) { + return CBM_STORE_ERR; + } + + if (limit <= 0) { + limit = 20; + } + has_scores = store_has_node_scores_table(s); + if (focus && focus[0]) { + size_t len = strlen(focus); + focus_like = malloc(len + 3); + if (!focus_like) { + return CBM_STORE_ERR; + } + focus_like[0] = '%'; + memcpy(focus_like + 1, focus, len); + focus_like[len + 1] = '%'; + focus_like[len + 2] = '\0'; + } + + snprintf( + sql, sizeof(sql), + "SELECT n.name, n.qualified_name, n.label, n.file_path, " + "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " + "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg, " + "%s " + "FROM nodes n %s " + "WHERE n.project = ?1 AND n.label IN ('Function','Method','Class') %s" + "ORDER BY pagerank DESC, in_deg DESC, out_deg DESC, n.name " + "LIMIT %d;", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + focus_like ? "AND (n.name LIKE ?2 OR n.qualified_name LIKE ?2 OR n.file_path LIKE ?2) " + : "", + limit); + + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "key_symbols.prepare"); + free(focus_like); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like) { + bind_text(stmt, 2, focus_like); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap = cap > 0 ? cap * 2 : 16; + symbols = safe_realloc(symbols, (size_t)cap * sizeof(cbm_key_symbol_t)); + } + memset(&symbols[n], 0, sizeof(cbm_key_symbol_t)); + symbols[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + symbols[n].qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + symbols[n].label = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + symbols[n].file_path = heap_strdup((const char *)sqlite3_column_text(stmt, 3)); + symbols[n].in_degree = sqlite3_column_int(stmt, 4); + symbols[n].out_degree = sqlite3_column_int(stmt, 5); + symbols[n].pagerank = sqlite3_column_double(stmt, 6); + n++; + } + + sqlite3_finalize(stmt); + free(focus_like); + *out = symbols; + *count = n; + return CBM_STORE_OK; +} + +void cbm_store_key_symbols_free(cbm_key_symbol_t *symbols, int count) { + if (!symbols) { + return; + } + for (int i = 0; i < count; i++) { + free((void *)symbols[i].name); + free((void *)symbols[i].qualified_name); + free((void *)symbols[i].label); + free((void *)symbols[i].file_path); + } + free(symbols); +} + /* ── Search ─────────────────────────────────────────────────────── */ /* Convert a glob pattern to SQL LIKE pattern. */ @@ -1978,13 +2347,14 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear char sql[4096]; char count_sql[4096]; int bind_idx = 0; + bool has_scores = store_has_node_scores_table(s); /* We build a query that selects nodes with optional degree subqueries */ const char *select_cols = "SELECT n.id, n.project, n.label, n.name, n.qualified_name, " "n.file_path, n.start_line, n.end_line, n.properties, " "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " - "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg "; + "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg, "; /* Start building WHERE */ char where[2048] = ""; @@ -2067,9 +2437,18 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear /* Build full SQL */ if (nparams > 0) { - snprintf(sql, sizeof(sql), "%s FROM nodes n WHERE %s", select_cols, where); + snprintf(sql, sizeof(sql), "%s%s FROM nodes n %s WHERE %s", select_cols, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + where); } else { - snprintf(sql, sizeof(sql), "%s FROM nodes n", select_cols); + snprintf(sql, sizeof(sql), "%s%s FROM nodes n %s", select_cols, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); } /* Degree filters: -1 = no filter, 0+ = active filter. @@ -2100,12 +2479,20 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear * When degree filter wraps in subquery, column refs lose the "n." prefix. */ int limit = params->limit > 0 ? params->limit : 500000; int offset = params->offset; - bool has_degree_wrap = has_degree_filter; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *name_col = has_degree_wrap ? "name" : "n.name"; + const char *sort_by = (params->sort_by && params->sort_by[0]) ? params->sort_by : "name"; char order_limit[128]; - snprintf(order_limit, sizeof(order_limit), " ORDER BY %s LIMIT %d OFFSET %d", name_col, limit, - offset); + if (strcmp(sort_by, "degree") == 0) { + snprintf(order_limit, sizeof(order_limit), + " ORDER BY (in_deg + out_deg) DESC, pagerank DESC, name LIMIT %d OFFSET %d", + limit, offset); + } else if (strcmp(sort_by, "relevance") == 0) { + snprintf(order_limit, sizeof(order_limit), + " ORDER BY pagerank DESC, (in_deg + out_deg) DESC, name LIMIT %d OFFSET %d", + limit, offset); + } else { + snprintf(order_limit, sizeof(order_limit), " ORDER BY name LIMIT %d OFFSET %d", limit, + offset); + } strncat(sql, order_limit, sizeof(sql) - strlen(sql) - 1); /* Execute count query */ @@ -2147,6 +2534,7 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear scan_node(main_stmt, &results[n].node); results[n].in_degree = sqlite3_column_int(main_stmt, 9); results[n].out_degree = sqlite3_column_int(main_stmt, 10); + results[n].pagerank = sqlite3_column_double(main_stmt, 11); n++; } @@ -2219,6 +2607,7 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const char sql[4096]; const char *join_cond; const char *next_id; + bool has_scores = store_has_node_scores_table(s); // NOLINTNEXTLINE(readability-implicit-bool-conversion) bool is_inbound = direction && strcmp(direction, "inbound") == 0; @@ -2240,13 +2629,18 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const " WHERE e.type IN (%s) AND bfs.hop < %d" ")" "SELECT DISTINCT n.id, n.project, n.label, n.name, n.qualified_name, " - "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop " + "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop, %s " "FROM bfs " "JOIN nodes n ON n.id = bfs.node_id " + "%s " "WHERE bfs.hop > 0 " /* exclude root */ "ORDER BY bfs.hop " "LIMIT %d;", - (long long)start_id, next_id, join_cond, types_clause, max_depth, max_results); + (long long)start_id, next_id, join_cond, types_clause, max_depth, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + max_results); sqlite3_stmt *stmt = NULL; rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); @@ -2275,6 +2669,7 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const } scan_node(stmt, &visited[n].node); visited[n].hop = sqlite3_column_int(stmt, 9); + visited[n].pagerank = sqlite3_column_double(stmt, 10); n++; } diff --git a/src/store/store.h b/src/store/store.h index d02fec77..99c240e3 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -116,7 +116,7 @@ typedef struct { int offset; bool exclude_entry_points; bool include_connected; - const char *sort_by; /* "relevance" / "name" / "degree", NULL = relevance */ + const char *sort_by; /* "relevance" / "name" / "degree", NULL = name */ bool case_sensitive; const char **exclude_labels; /* NULL-terminated array, or NULL */ } cbm_search_params_t; @@ -125,6 +125,7 @@ typedef struct { cbm_node_t node; int in_degree; int out_degree; + double pagerank; /* connected_names: allocated array of strings, count in connected_count */ const char **connected_names; int connected_count; @@ -141,6 +142,7 @@ typedef struct { typedef struct { cbm_node_t node; int hop; /* BFS depth from root */ + double pagerank; } cbm_node_hop_t; typedef struct { @@ -360,6 +362,25 @@ int cbm_store_delete_file_hash(cbm_store_t *s, const char *project, const char * int cbm_store_delete_file_hashes(cbm_store_t *s, const char *project); +/* ── PageRank ───────────────────────────────────────────────────── */ + +int cbm_store_compute_pagerank(cbm_store_t *s, const char *project, int iterations, double damping); + +typedef struct { + const char *name; + const char *qualified_name; + const char *label; + const char *file_path; + int in_degree; + int out_degree; + double pagerank; +} cbm_key_symbol_t; + +int cbm_store_get_key_symbols(cbm_store_t *s, const char *project, const char *focus, int limit, + cbm_key_symbol_t **out, int *count); + +void cbm_store_key_symbols_free(cbm_key_symbol_t *symbols, int count); + /* ── Search ─────────────────────────────────────────────────────── */ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_search_output_t *out); diff --git a/tests/test_integration.c b/tests/test_integration.c index 318bce3e..d0a14dde 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -381,6 +381,18 @@ TEST(integ_mcp_get_architecture_summary) { PASS(); } +TEST(integ_mcp_get_key_symbols) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"limit\":5}", g_project); + + char *resp = call_tool("get_key_symbols", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "pagerank")); + ASSERT_TRUE(strstr(resp, "Add") || strstr(resp, "greet") || strstr(resp, "Multiply")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -539,6 +551,21 @@ TEST(integ_store_bfs_traversal) { PASS(); } +TEST(integ_store_key_symbols_ranked) { + cbm_store_t *store = cbm_store_open_path(g_dbpath); + ASSERT_NOT_NULL(store); + + cbm_key_symbol_t *symbols = NULL; + int count = 0; + ASSERT_EQ(cbm_store_get_key_symbols(store, g_project, NULL, 5, &symbols, &count), CBM_STORE_OK); + ASSERT_GT(count, 0); + ASSERT_TRUE(symbols[0].pagerank > 0.0); + + cbm_store_key_symbols_free(symbols, count); + cbm_store_close(store); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -548,7 +575,7 @@ SUITE(integration) { if (integration_setup() != 0) { printf(" %-50s", "integration_setup"); printf("SKIP (setup failed)\n"); - tf_skip_count += 16; /* skip all integration tests */ + tf_skip_count += 25; /* skip all integration tests */ integration_teardown(); return; } @@ -569,6 +596,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_get_graph_schema); RUN_TEST(integ_mcp_get_architecture); RUN_TEST(integ_mcp_get_architecture_summary); + RUN_TEST(integ_mcp_get_key_symbols); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); @@ -576,6 +604,7 @@ SUITE(integration) { RUN_TEST(integ_store_search_by_degree); RUN_TEST(integ_store_find_by_file); RUN_TEST(integ_store_bfs_traversal); + RUN_TEST(integ_store_key_symbols_ranked); /* Pipeline API tests (no db needed) */ RUN_TEST(integ_pipeline_fqn_compute); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index dd3c0bb2..8aeeb096 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -131,7 +131,7 @@ TEST(mcp_initialize_response) { TEST(mcp_tools_list) { char *json = cbm_mcp_tools_list(); ASSERT_NOT_NULL(json); - /* Should contain all 15 tools */ + /* Should contain all public tools */ ASSERT_NOT_NULL(strstr(json, "index_repository")); ASSERT_NOT_NULL(strstr(json, "search_graph")); ASSERT_NOT_NULL(strstr(json, "query_graph")); @@ -139,6 +139,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_code_snippet")); ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); + ASSERT_NOT_NULL(strstr(json, "get_key_symbols")); ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); @@ -583,6 +584,51 @@ static void cleanup_arch_summary_server(char *tmp_dir, cbm_mcp_server_t *srv) { } } +static cbm_mcp_server_t *setup_pagerank_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "test-rank", "/tmp/test-rank"); + cbm_mcp_server_set_project(srv, "test-rank"); + + cbm_node_t nodes[] = { + {.project = "test-rank", .label = "Function", .name = "Root", .qualified_name = "test-rank.Root"}, + {.project = "test-rank", .label = "Function", .name = "Small", .qualified_name = "test-rank.Small"}, + {.project = "test-rank", .label = "Function", .name = "Hub", .qualified_name = "test-rank.Hub"}, + {.project = "test-rank", .label = "Function", .name = "Leaf", .qualified_name = "test-rank.Leaf"}, + {.project = "test-rank", .label = "Function", .name = "CallerB", .qualified_name = "test-rank.CallerB"}, + {.project = "test-rank", .label = "Function", .name = "CallerC", .qualified_name = "test-rank.CallerC"}, + }; + int64_t ids[6]; + for (int i = 0; i < 6; i++) { + ids[i] = cbm_store_upsert_node(st, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "test-rank", .source_id = ids[0], .target_id = ids[1], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[1], .target_id = ids[2], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[4], .target_id = ids[2], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[5], .target_id = ids[2], .type = "CALLS"}, + }; + for (int i = 0; i < 4; i++) { + cbm_store_insert_edge(st, &edges[i]); + } + + if (cbm_store_compute_pagerank(st, "test-rank", 20, 0.85) != CBM_STORE_OK) { + cbm_mcp_server_free(srv); + return NULL; + } + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1120,6 +1166,68 @@ static char *extract_text_content(const char *mcp_result) { return result; } +TEST(tool_search_graph_ranked_pagerank) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "search_graph", + "{\"project\":\"test-rank\",\"label\":\"Function\",\"limit\":10}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Small\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_key_symbols_ranked) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "get_key_symbols", "{\"project\":\"test-rank\",\"limit\":3}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"results\"")); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_ranked_pagerank) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-rank\",\"function_name\":\"Root\",\"direction\":\"outbound\",\"depth\":3}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"callees\"")); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Small\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -1834,6 +1942,7 @@ SUITE(mcp) { RUN_TEST(tool_get_graph_schema_empty); RUN_TEST(tool_unknown_tool); RUN_TEST(tool_search_graph_basic); + RUN_TEST(tool_search_graph_ranked_pagerank); RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); @@ -1845,6 +1954,8 @@ SUITE(mcp) { RUN_TEST(tool_get_architecture_summary_missing_project); RUN_TEST(tool_get_architecture_summary_truncated); RUN_TEST(tool_get_architecture_summary_project_path_alias); + RUN_TEST(tool_get_key_symbols_ranked); + RUN_TEST(tool_trace_call_path_ranked_pagerank); RUN_TEST(tool_query_graph_missing_query); /* Pipeline-dependent tool handlers */ diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 569fa04e..8578bdc5 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -19,6 +19,7 @@ #include #include "graph_buffer/graph_buffer.h" #include "yyjson/yyjson.h" +#include /* ── Helper: create temp test repo with known layout ───────────── */ @@ -4971,6 +4972,44 @@ TEST(incremental_full_then_noop) { PASS(); } +TEST(incremental_noop_backfills_pagerank) { + if (setup_incremental_repo() != 0) { SKIP("setup failed"); } + + cbm_pipeline_t *p = cbm_pipeline_new(g_incr_tmpdir, g_incr_dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + char *project = strdup(cbm_pipeline_project_name(p)); + cbm_pipeline_free(p); + + cbm_store_t *s = cbm_store_open_path(g_incr_dbpath); + ASSERT_NOT_NULL(s); + ASSERT_EQ(sqlite3_exec(cbm_store_get_db(s), "DELETE FROM node_scores;", NULL, NULL, NULL), + SQLITE_OK); + cbm_store_close(s); + + p = cbm_pipeline_new(g_incr_tmpdir, g_incr_dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + s = cbm_store_open_path(g_incr_dbpath); + ASSERT_NOT_NULL(s); + sqlite3_stmt *stmt = NULL; + ASSERT_EQ(sqlite3_prepare_v2(cbm_store_get_db(s), + "SELECT COUNT(*) FROM node_scores WHERE project = ?1;", -1, + &stmt, NULL), + SQLITE_OK); + ASSERT_EQ(sqlite3_bind_text(stmt, 1, project, -1, SQLITE_STATIC), SQLITE_OK); + ASSERT_EQ(sqlite3_step(stmt), SQLITE_ROW); + ASSERT_GT(sqlite3_column_int(stmt, 0), 0); + sqlite3_finalize(stmt); + cbm_store_close(s); + free(project); + + cleanup_incremental_repo(); + PASS(); +} + TEST(incremental_detects_changed_file) { /* Full index, modify one file, re-index → changed file re-parsed */ if (setup_incremental_repo() != 0) { SKIP("setup failed"); } @@ -5914,6 +5953,7 @@ SUITE(pipeline) { RUN_TEST(pipeline_fastapi_depends_edges); /* Incremental */ RUN_TEST(incremental_full_then_noop); + RUN_TEST(incremental_noop_backfills_pagerank); RUN_TEST(incremental_detects_changed_file); RUN_TEST(incremental_detects_deleted_file); RUN_TEST(incremental_new_file_added); diff --git a/tests/test_store_search.c b/tests/test_store_search.c index 05df8680..4a5b7b3e 100644 --- a/tests/test_store_search.c +++ b/tests/test_store_search.c @@ -47,6 +47,41 @@ static cbm_store_t *setup_search_store(int64_t *ids) { return s; } +static cbm_store_t *setup_pagerank_store(int64_t *ids) { + cbm_store_t *s = cbm_store_open_memory(); + cbm_store_upsert_project(s, "test", "/tmp/test"); + + cbm_node_t nodes[] = { + {.project = "test", .label = "Function", .name = "Root", .qualified_name = "test.Root"}, + {.project = "test", .label = "Function", .name = "Small", .qualified_name = "test.Small"}, + {.project = "test", .label = "Function", .name = "Hub", .qualified_name = "test.Hub"}, + {.project = "test", .label = "Function", .name = "Leaf", .qualified_name = "test.Leaf"}, + {.project = "test", .label = "Function", .name = "CallerB", .qualified_name = "test.CallerB"}, + {.project = "test", .label = "Function", .name = "CallerC", .qualified_name = "test.CallerC"}, + }; + const int node_count = (int)(sizeof(nodes) / sizeof(nodes[0])); + for (int i = 0; i < node_count; i++) { + ids[i] = cbm_store_upsert_node(s, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "test", .source_id = ids[0], .target_id = ids[1], .type = "CALLS"}, + {.project = "test", .source_id = ids[1], .target_id = ids[2], .type = "CALLS"}, + {.project = "test", .source_id = ids[4], .target_id = ids[2], .type = "CALLS"}, + {.project = "test", .source_id = ids[5], .target_id = ids[2], .type = "CALLS"}, + }; + const int edge_count = (int)(sizeof(edges) / sizeof(edges[0])); + for (int i = 0; i < edge_count; i++) { + cbm_store_insert_edge(s, &edges[i]); + } + + if (cbm_store_compute_pagerank(s, "test", 20, 0.85) != CBM_STORE_OK) { + cbm_store_close(s); + return NULL; + } + return s; +} + /* ── Search by label ────────────────────────────────────────────── */ TEST(store_search_by_label) { @@ -595,6 +630,46 @@ TEST(store_search_case_insensitive) { PASS(); } +TEST(store_search_ranked_by_pagerank) { + int64_t ids[6]; + cbm_store_t *s = setup_pagerank_store(ids); + ASSERT_NOT_NULL(s); + + cbm_search_params_t params = {.project = "test", + .label = "Function", + .limit = 10, + .min_degree = -1, + .max_degree = -1, + .sort_by = "relevance"}; + cbm_search_output_t out = {0}; + int rc = cbm_store_search(s, ¶ms, &out); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_GTE(out.count, 4); + ASSERT_STR_EQ(out.results[0].node.name, "Hub"); + ASSERT_TRUE(out.results[0].pagerank > out.results[1].pagerank); + cbm_store_search_free(&out); + + cbm_store_close(s); + PASS(); +} + +TEST(store_get_key_symbols_ranked) { + int64_t ids[6]; + cbm_store_t *s = setup_pagerank_store(ids); + ASSERT_NOT_NULL(s); + cbm_key_symbol_t *symbols = NULL; + int count = 0; + + ASSERT_EQ(cbm_store_get_key_symbols(s, "test", NULL, 3, &symbols, &count), CBM_STORE_OK); + ASSERT_EQ(count, 3); + ASSERT_STR_EQ(symbols[0].name, "Hub"); + ASSERT_TRUE(symbols[0].pagerank > symbols[1].pagerank); + cbm_store_key_symbols_free(symbols, count); + + cbm_store_close(s); + PASS(); +} + /* ── Impact: HopToRisk ─────────────────────────────────────────── */ TEST(store_hop_to_risk) { @@ -1217,6 +1292,8 @@ SUITE(store_search) { RUN_TEST(store_search_all); RUN_TEST(store_search_exclude_labels); RUN_TEST(store_search_case_insensitive); + RUN_TEST(store_search_ranked_by_pagerank); + RUN_TEST(store_get_key_symbols_ranked); RUN_TEST(store_bfs_outbound); RUN_TEST(store_bfs_inbound); RUN_TEST(store_bfs_cross_service); From 0af23ec9d2f3f0c9cc4e00faa0bd024c788dd71d Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:35:23 +0530 Subject: [PATCH 03/10] Make PageRank failures non-fatal during indexing --- src/pipeline/pipeline.c | 9 ++++----- src/pipeline/pipeline_incremental.c | 27 +++++++++++---------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 41671775..5cddbcb1 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -818,11 +818,10 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { } if (cbm_store_compute_pagerank(hash_store, p->project_name, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("pipeline.err", "phase", "pagerank", "project", p->project_name, - "error", cbm_store_error(hash_store)); - cbm_store_close(hash_store); - rc = -1; - goto cleanup; + /* PageRank is a ranking enhancement; keep a successful index even if + * score computation fails on this pass. */ + cbm_log_warn("pipeline.warn", "phase", "pagerank", "project", p->project_name, + "error", cbm_store_error(hash_store)); } cbm_store_close(hash_store); cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 795f32d2..fa802ba9 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -197,15 +197,13 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.noop", "reason", "no_changes"); cbm_clock_gettime(CLOCK_MONOTONIC, &t); if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", - cbm_store_error(store)); - free(is_changed); - free(deleted); - cbm_store_free_file_hashes(stored, stored_count); - cbm_store_close(store); - return -1; + /* Preserve the successful no-op index result even if ranking refresh fails. */ + cbm_log_warn("incremental.warn", "msg", "pagerank_failed", "project", project, + "error", cbm_store_error(store)); + } else { + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); } - cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); free(is_changed); free(deleted); cbm_store_free_file_hashes(stored, stored_count); @@ -300,15 +298,12 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_clock_gettime(CLOCK_MONOTONIC, &t); if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", - cbm_store_error(store)); - cbm_gbuf_free(gbuf); - cbm_registry_free(registry); - free(changed_files); - cbm_store_close(store); - return -1; + cbm_log_warn("incremental.warn", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + } else { + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); } - cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); /* Cleanup */ cbm_gbuf_free(gbuf); From 408be5154a0041a1e8b84694225854bb6867d367 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:53:03 +0530 Subject: [PATCH 04/10] Fix read-only query opens for snapshot DBs --- src/store/store.c | 51 ++++++++++++++++++++++++++++++++++++++-- tests/test_store_nodes.c | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/src/store/store.c b/src/store/store.c index 90ebc7d0..00b857ac 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -127,6 +127,43 @@ static char *heap_strdup(const char *s) { return d; } +static bool sqlite_uri_path_safe_char(unsigned char c) { + return isalnum(c) || c == '/' || c == '.' || c == '_' || c == '-' || c == '~' || c == ':'; +} + +static char *sqlite_readonly_immutable_uri(const char *db_path) { + if (!db_path) { + return NULL; + } + + static const char suffix[] = "?mode=ro&immutable=1"; + size_t path_len = strlen(db_path); + size_t cap = strlen("file:") + (path_len * 3) + sizeof(suffix); + char *uri = malloc(cap); + if (!uri) { + return NULL; + } + + char *dst = uri; + memcpy(dst, "file:", strlen("file:")); + dst += strlen("file:"); + + static const char hex[] = "0123456789ABCDEF"; + for (size_t i = 0; i < path_len; i++) { + unsigned char c = (unsigned char)db_path[i]; + if (sqlite_uri_path_safe_char(c)) { + *dst++ = (char)c; + } else { + *dst++ = '%'; + *dst++ = hex[(c >> 4) & 0x0F]; + *dst++ = hex[c & 0x0F]; + } + } + + memcpy(dst, suffix, sizeof(suffix)); + return uri; +} + static bool store_has_node_scores_table(cbm_store_t *s) { if (!s || !s->db) { return false; @@ -436,8 +473,18 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { return NULL; } - /* Open read-only and do NOT create — query tools should never need write access. */ - int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READONLY, NULL); + /* Query tools read atomically-written snapshot DBs. Open them via an + * immutable URI so SQLite does not try to create WAL/SHM sidecars when the + * file was produced by the direct page writer and later reopened in WAL + * mode by the indexing pipeline. */ + char *uri = sqlite_readonly_immutable_uri(db_path); + if (!uri) { + free(s); + return NULL; + } + + int rc = sqlite3_open_v2(uri, &s->db, SQLITE_OPEN_READONLY | SQLITE_OPEN_URI, NULL); + free(uri); if (rc != SQLITE_OK) { /* sqlite3_open_v2 allocates a handle even on failure — must close it. */ sqlite3_close(s->db); diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index 6cfc93f3..cef33bdb 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -5,6 +5,7 @@ * TestNodeDedup, TestProjectCRUD, TestUpsertNodeBatch, etc.) */ #include "test_framework.h" +#include "sqlite_writer.h" #include #include #include @@ -140,6 +141,54 @@ TEST(store_open_path_query_readonly_db) { PASS(); } +TEST(store_open_path_query_direct_writer_db) { + char path[] = "/tmp/cbm_store_query_writer_XXXXXX"; + int fd = mkstemp(path); + ASSERT_TRUE(fd >= 0); + close(fd); + + CBMDumpNode node = { + .id = 1, + .project = "writer-proj", + .label = "Function", + .name = "Hello", + .qualified_name = "writer-proj.main.Hello", + .file_path = "main.go", + .start_line = 1, + .end_line = 3, + .properties = "{}", + }; + ASSERT_EQ(cbm_write_db(path, "writer-proj", "/tmp/writer-proj", "2026-03-25T00:00:00Z", + &node, 1, NULL, 0), + 0); + + /* Reopen in the same way the pipeline does to add post-dump metadata. */ + cbm_store_t *writer = cbm_store_open_path(path); + ASSERT_NOT_NULL(writer); + ASSERT_EQ(cbm_store_upsert_file_hash(writer, "writer-proj", "main.go", "abc123", 1, 64), + CBM_STORE_OK); + cbm_store_close(writer); + + cbm_store_t *reader = cbm_store_open_path_query(path); + ASSERT_NOT_NULL(reader); + ASSERT_TRUE(cbm_store_check_integrity(reader)); + + cbm_project_t proj = {0}; + ASSERT_EQ(cbm_store_get_project(reader, "writer-proj", &proj), CBM_STORE_OK); + ASSERT_STR_EQ(proj.root_path, "/tmp/writer-proj"); + cbm_project_free_fields(&proj); + + cbm_node_t found = {0}; + ASSERT_EQ(cbm_store_find_node_by_qn(reader, "writer-proj", "writer-proj.main.Hello", &found), + CBM_STORE_OK); + ASSERT_STR_EQ(found.name, "Hello"); + cbm_node_free_fields(&found); + + cbm_store_close(reader); + unlink(path); + PASS(); +} + /* ── Node CRUD ──────────────────────────────────────────────────── */ TEST(store_node_crud) { @@ -1541,6 +1590,7 @@ SUITE(store_nodes) { RUN_TEST(store_project_update); RUN_TEST(store_project_delete); RUN_TEST(store_open_path_query_readonly_db); + RUN_TEST(store_open_path_query_direct_writer_db); RUN_TEST(store_node_crud); RUN_TEST(store_node_dedup); RUN_TEST(store_node_find_by_label); From 0559f34ca6460274e1afb82f821982f8005af5c4 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 18:42:36 +0530 Subject: [PATCH 05/10] Refine MCP token-budget truncation Account for optional signatures in the search_graph and trace_call_path size estimators, and improve compact trace chains to report omitted-node counts. This also documents the normal-path output enrichment introduced with Task 4: search_graph results now include file_path, start_line, end_line, and signature, and trace_call_path hop items now include file_path, start_line, and signature. --- src/mcp/mcp.c | 491 +++++++++++++++++++++++++++++++++++++++++++---- tests/test_mcp.c | 163 ++++++++++++++++ 2 files changed, 616 insertions(+), 38 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 8b313dbb..05bc6a26 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -46,6 +46,12 @@ /* Default snippet fallback line count */ #define SNIPPET_DEFAULT_LINES 50 +/* Approximate output budget: 1 token ~= 4 chars. */ +#define DEFAULT_MAX_TOKENS 2000 +#define MIN_JSON_CHAR_BUDGET 128 +#define MAX_FULL_BUDGET_ITEMS 5 +#define MAX_COMPACT_QUERY_CELL_CHARS 96 + /* Idle store eviction: close cached project store after this many seconds * of inactivity to free SQLite memory during idle periods. */ #define STORE_IDLE_TIMEOUT_S 60 @@ -80,6 +86,243 @@ static char *yy_doc_to_str(yyjson_mut_doc *doc) { return s; } +static size_t max_tokens_to_char_budget(int max_tokens) { + if (max_tokens <= 0) { + max_tokens = DEFAULT_MAX_TOKENS; + } + size_t budget = (size_t)max_tokens * 4U; + if (budget < MIN_JSON_CHAR_BUDGET) { + budget = MIN_JSON_CHAR_BUDGET; + } + return budget; +} + +static char *json_string_field_dup(const char *json, const char *key) { + if (!json || !key) { + return NULL; + } + + yyjson_doc *doc = yyjson_read(json, strlen(json), 0); + if (!doc) { + return NULL; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *value = root ? yyjson_obj_get(root, key) : NULL; + const char *str = value ? yyjson_get_str(value) : NULL; + char *dup = str ? heap_strdup(str) : NULL; + yyjson_doc_free(doc); + return dup; +} + +static char *node_signature_dup(const cbm_node_t *node) { + if (!node) { + return NULL; + } + return json_string_field_dup(node->properties_json, "signature"); +} + +static size_t estimate_signature_field_chars(const cbm_node_t *node) { + char *signature = node_signature_dup(node); + if (!signature || !signature[0]) { + free(signature); + return 0; + } + + size_t size = strlen(signature) + 24; + free(signature); + return size; +} + +static char *truncate_text_copy(const char *text, size_t max_chars) { + if (!text) { + return heap_strdup(""); + } + + size_t len = strlen(text); + if (len <= max_chars) { + return heap_strdup(text); + } + if (max_chars <= 3) { + char *out = malloc(max_chars + 1); + if (!out) { + return NULL; + } + for (size_t i = 0; i < max_chars; i++) { + out[i] = '.'; + } + out[max_chars] = '\0'; + return out; + } + + char *out = malloc(max_chars + 1); + if (!out) { + return NULL; + } + size_t keep = max_chars - 3; + memcpy(out, text, keep); + memcpy(out + keep, "...", 4); + return out; +} + +static char *build_compact_hop_chain(const cbm_node_hop_t *hops, int count) { + if (!hops || count <= 0) { + return NULL; + } + + const char *first = NULL; + const char *last = NULL; + int named_count = 0; + for (int i = 0; i < count; i++) { + if (hops[i].node.name && hops[i].node.name[0]) { + if (!first) { + first = hops[i].node.name; + } + last = hops[i].node.name; + named_count++; + } + } + if (!first) { + return NULL; + } + if (named_count <= 1 || !last || strcmp(first, last) == 0) { + return heap_strdup(first); + } + if (named_count == 2) { + size_t len = strlen(first) + strlen(last) + strlen(" -> ") + 1; + char *chain = malloc(len); + if (!chain) { + return NULL; + } + snprintf(chain, len, "%s -> %s", first, last); + return chain; + } + + int omitted = named_count - 2; + char omitted_buf[32]; + snprintf(omitted_buf, sizeof(omitted_buf), "%d", omitted); + + size_t len = + strlen(first) + strlen(last) + strlen(omitted_buf) + strlen(" -> ... ( more) -> ") + 1; + char *chain = malloc(len); + if (!chain) { + return NULL; + } + snprintf(chain, len, "%s -> ... (%d more) -> %s", first, omitted, last); + return chain; +} + +static size_t estimate_search_result_chars(const cbm_search_result_t *sr, bool compact) { + size_t size = 96; + size += strlen(sr->node.name ? sr->node.name : ""); + size += strlen(sr->node.file_path ? sr->node.file_path : ""); + size += estimate_signature_field_chars(&sr->node); + if (!compact) { + size += strlen(sr->node.qualified_name ? sr->node.qualified_name : ""); + size += strlen(sr->node.label ? sr->node.label : ""); + size += 48; + } else { + size += 24; + } + return size; +} + +static void add_search_result_item(yyjson_mut_doc *doc, yyjson_mut_val *results, + const cbm_search_result_t *sr, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", sr->node.file_path ? sr->node.file_path : ""); + yyjson_mut_obj_add_int(doc, item, "start_line", sr->node.start_line); + + char *signature = node_signature_dup(&sr->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, item, "signature", signature); + } + free(signature); + + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sr->node.qualified_name ? sr->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); + yyjson_mut_obj_add_int(doc, item, "end_line", sr->node.end_line); + yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); + } + + yyjson_mut_arr_add_val(results, item); +} + +static size_t estimate_node_hop_chars(const cbm_node_hop_t *hop, bool compact) { + size_t size = 80; + size += strlen(hop->node.name ? hop->node.name : ""); + size += strlen(hop->node.file_path ? hop->node.file_path : ""); + size += estimate_signature_field_chars(&hop->node); + if (!compact) { + size += strlen(hop->node.qualified_name ? hop->node.qualified_name : ""); + size += 40; + } else { + size += 20; + } + return size; +} + +static void add_node_hop_item(yyjson_mut_doc *doc, yyjson_mut_val *items, const cbm_node_hop_t *hop, + bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", hop->node.name ? hop->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", hop->node.file_path ? hop->node.file_path : ""); + yyjson_mut_obj_add_int(doc, item, "start_line", hop->node.start_line); + yyjson_mut_obj_add_int(doc, item, "hop", hop->hop); + + char *signature = node_signature_dup(&hop->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, item, "signature", signature); + } + free(signature); + + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + hop->node.qualified_name ? hop->node.qualified_name : ""); + yyjson_mut_obj_add_real(doc, item, "pagerank", hop->pagerank); + } + + yyjson_mut_arr_add_val(items, item); +} + +static size_t estimate_query_row_chars(const char *const *row, int col_count, bool compact) { + size_t size = 8; + for (int c = 0; c < col_count; c++) { + size += 4; + if (!row[c]) { + continue; + } + size += compact ? strnlen(row[c], MAX_COMPACT_QUERY_CELL_CHARS) : strlen(row[c]); + } + return size; +} + +static void add_query_row(yyjson_mut_doc *doc, yyjson_mut_val *rows, const char *const *row, + int col_count, + bool compact) { + yyjson_mut_val *out_row = yyjson_mut_arr(doc); + for (int c = 0; c < col_count; c++) { + const char *cell = row[c] ? row[c] : ""; + if (compact) { + char *clipped = truncate_text_copy(cell, MAX_COMPACT_QUERY_CELL_CHARS); + yyjson_mut_arr_add_strcpy(doc, out_row, clipped ? clipped : ""); + free(clipped); + } else { + yyjson_mut_arr_add_str(doc, out_row, cell); + } + } + yyjson_mut_arr_add_val(rows, out_row); +} + typedef struct { char *buf; size_t len; @@ -349,7 +592,9 @@ static const tool_def_t TOOLS[] = { "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" "\"integer\",\"description\":\"Max results. Default: " "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0},\"ranked\":{\"type\":\"boolean\"," - "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}}," + "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-ranked results when needed.\"}}," "\"required\":[\"project\"]}"}, {"query_graph", @@ -359,7 +604,9 @@ static const tool_def_t TOOLS[] = { "query\"},\"project\":{\"type\":\"string\"},\"max_rows\":{\"type\":\"integer\"," "\"description\":" "\"Optional row limit. Default: unlimited (100k " - "ceiling)\"}},\"required\":[\"query\",\"project\"]}"}, + "ceiling)\"},\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":" + "\"Maximum output size. Compacts lower-priority rows when needed.\"}}," + "\"required\":[\"query\",\"project\"]}"}, {"trace_call_path", "Trace function call paths — who calls a function and what it calls. Use INSTEAD OF grep when " @@ -368,7 +615,9 @@ static const tool_def_t TOOLS[] = { "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"ranked\":{\"type\":\"boolean\"," - "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}}," + "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-ranked path results when needed.\"}}," "\"required\":[\"function_name\",\"project\"]}"}, {"get_code_snippet", @@ -1087,7 +1336,9 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { int offset = cbm_mcp_get_int_arg(args, "offset", 0); int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); + size_t char_budget = max_tokens_to_char_budget(max_tokens); cbm_search_params_t params = { .project = project, @@ -1112,24 +1363,55 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *results = yyjson_mut_arr(doc); for (int i = 0; i < out.count; i++) { - cbm_search_result_t *sr = &out.results[i]; - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); - yyjson_mut_obj_add_str(doc, item, "qualified_name", - sr->node.qualified_name ? sr->node.qualified_name : ""); - yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); - yyjson_mut_obj_add_str(doc, item, "file_path", - sr->node.file_path ? sr->node.file_path : ""); - yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); - yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); - yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); - yyjson_mut_arr_add_val(results, item); + add_search_result_item(doc, results, &out.results[i], false); } yyjson_mut_obj_add_val(doc, root, "results", results); yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + out.count); char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_int(doc, root, "total", out.total); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", out.total); + + results = yyjson_mut_arr(doc); + size_t used = 96; + int shown = 0; + int full_items = 0; + for (int i = 0; i < out.count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_search_result_chars(&out.results[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_search_result_chars(&out.results[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_search_result_item(doc, results, &out.results[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "results", results); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + cbm_store_search_free(&out); free(project); @@ -1147,6 +1429,8 @@ static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { char *project = cbm_mcp_get_string_arg(args, "project"); cbm_store_t *store = resolve_store(srv, project); int max_rows = cbm_mcp_get_int_arg(args, "max_rows", 0); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); if (!query) { free(project); @@ -1194,17 +1478,63 @@ static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { /* rows */ yyjson_mut_val *rows = yyjson_mut_arr(doc); for (int r = 0; r < result.row_count; r++) { - yyjson_mut_val *row = yyjson_mut_arr(doc); - for (int c = 0; c < result.col_count; c++) { - yyjson_mut_arr_add_str(doc, row, result.rows[r][c]); - } - yyjson_mut_arr_add_val(rows, row); + add_query_row(doc, rows, result.rows[r], result.col_count, false); } yyjson_mut_obj_add_val(doc, root, "rows", rows); yyjson_mut_obj_add_int(doc, root, "total", result.row_count); char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + cols = yyjson_mut_arr(doc); + for (int i = 0; i < result.col_count; i++) { + yyjson_mut_arr_add_str(doc, cols, result.columns[i]); + } + yyjson_mut_obj_add_val(doc, root, "columns", cols); + + rows = yyjson_mut_arr(doc); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", result.row_count); + + size_t used = 96; + for (int i = 0; i < result.col_count; i++) { + used += strlen(result.columns[i] ? result.columns[i] : "") + 4; + } + int shown = 0; + int full_rows = 0; + for (int r = 0; r < result.row_count; r++) { + bool compact = full_rows >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_query_row_chars(result.rows[r], result.col_count, compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_query_row_chars(result.rows[r], result.col_count, true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_query_row(doc, rows, result.rows[r], result.col_count, compact); + used += estimate; + shown++; + if (!compact) { + full_rows++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "rows", rows); + yyjson_mut_obj_add_int(doc, root, "total", result.row_count); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + cbm_cypher_result_free(&result); free(query); free(project); @@ -1685,7 +2015,9 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_store_t *store = resolve_store(srv, project); char *direction = cbm_mcp_get_string_arg(args, "direction"); int depth = cbm_mcp_get_int_arg(args, "depth", 3); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); + size_t char_budget = max_tokens_to_char_budget(max_tokens); if (!func_name) { free(project); @@ -1758,15 +2090,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *callees = yyjson_mut_arr(doc); for (int i = 0; i < tr_out.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_out.visited[i].node.name ? tr_out.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); - yyjson_mut_obj_add_real(doc, item, "pagerank", tr_out.visited[i].pagerank); - yyjson_mut_arr_add_val(callees, item); + add_node_hop_item(doc, callees, &tr_out.visited[i], false); } yyjson_mut_obj_add_val(doc, root, "callees", callees); } @@ -1781,15 +2105,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *callers = yyjson_mut_arr(doc); for (int i = 0; i < tr_in.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_in.visited[i].node.name ? tr_in.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); - yyjson_mut_obj_add_real(doc, item, "pagerank", tr_in.visited[i].pagerank); - yyjson_mut_arr_add_val(callers, item); + add_node_hop_item(doc, callers, &tr_in.visited[i], false); } yyjson_mut_obj_add_val(doc, root, "callers", callers); } @@ -1798,6 +2114,105 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "function", func_name); + yyjson_mut_obj_add_str(doc, root, "direction", direction); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + + int total_results = 0; + if (do_outbound) { + total_results += tr_out.visited_count; + } + if (do_inbound) { + total_results += tr_in.visited_count; + } + yyjson_mut_obj_add_int(doc, root, "total_results", total_results); + + size_t used = 96 + strlen(func_name) + strlen(direction); + int shown = 0; + + if (do_outbound) { + yyjson_mut_val *callees = yyjson_mut_arr(doc); + int shown_callees = 0; + int full_callees = 0; + for (int i = 0; i < tr_out.visited_count; i++) { + bool compact = full_callees >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&tr_out.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&tr_out.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_node_hop_item(doc, callees, &tr_out.visited[i], compact); + used += estimate; + shown++; + shown_callees++; + if (!compact) { + full_callees++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callees", callees); + if (shown_callees < tr_out.visited_count) { + char *chain = build_compact_hop_chain(tr_out.visited + shown_callees, + tr_out.visited_count - shown_callees); + if (chain && chain[0]) { + yyjson_mut_obj_add_strcpy(doc, root, "callees_chain", chain); + } + free(chain); + } + } + + if (do_inbound) { + yyjson_mut_val *callers = yyjson_mut_arr(doc); + int shown_callers = 0; + int full_callers = 0; + for (int i = 0; i < tr_in.visited_count; i++) { + bool compact = full_callers >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&tr_in.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&tr_in.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_node_hop_item(doc, callers, &tr_in.visited[i], compact); + used += estimate; + shown++; + shown_callers++; + if (!compact) { + full_callers++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callers", callers); + if (shown_callers < tr_in.visited_count) { + char *chain = + build_compact_hop_chain(tr_in.visited + shown_callers, + tr_in.visited_count - shown_callers); + if (chain && chain[0]) { + yyjson_mut_obj_add_strcpy(doc, root, "callers_chain", chain); + } + free(chain); + } + } + + yyjson_mut_obj_add_int(doc, root, "shown", shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + /* Now safe to free traversal data */ if (do_outbound) { cbm_store_traverse_free(&tr_out); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 8aeeb096..d140e84d 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -629,6 +629,52 @@ static cbm_mcp_server_t *setup_pagerank_server(void) { return srv; } +static cbm_mcp_server_t *setup_truncation_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "test-budget", "/tmp/test-budget"); + cbm_mcp_server_set_project(srv, "test-budget"); + + const char *sig = + "{\"signature\":\"func BudgetedOperation(alpha int, beta int, gamma int, delta int, " + "epsilon int, zeta int, eta int, theta int, iota int) string\"}"; + const char *names[] = {"Root", "A", "B", "C", "D", "E"}; + int64_t ids[6] = {0}; + + for (int i = 0; i < 6; i++) { + char qn[128]; + snprintf(qn, sizeof(qn), "test-budget.%s", names[i]); + cbm_node_t node = { + .project = "test-budget", + .label = "Function", + .name = names[i], + .qualified_name = qn, + .file_path = "pkg/budget.go", + .start_line = 10 + (i * 5), + .end_line = 13 + (i * 5), + .properties_json = sig, + }; + ids[i] = cbm_store_upsert_node(st, &node); + } + + for (int i = 0; i < 5; i++) { + cbm_edge_t edge = { + .project = "test-budget", .source_id = ids[i], .target_id = ids[i + 1], .type = "CALLS"}; + cbm_store_insert_edge(st, &edge); + } + + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1228,6 +1274,118 @@ TEST(tool_trace_call_path_ranked_pagerank) { PASS(); } +TEST(tool_search_graph_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "search_graph", + "{\"project\":\"test-rank\",\"label\":\"Function\",\"limit\":10,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-rank\",\"function_name\":\"Root\",\"direction\":\"outbound\"," + "\"depth\":3,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"callees\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_search_graph_long_signature_budget_respected) { + cbm_mcp_server_t *srv = setup_truncation_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "search_graph", + "{\"project\":\"test-budget\",\"label\":\"Function\",\"limit\":10,\"max_tokens\":100}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\":1")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_chain_shows_omitted_count) { + cbm_mcp_server_t *srv = setup_truncation_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-budget\",\"function_name\":\"Root\",\"direction\":\"outbound\"," + "\"depth\":5,\"max_tokens\":100}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"callees_chain\":\"")); + ASSERT_NOT_NULL(strstr(text, "more) ->")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_query_graph_max_tokens_truncates) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + char *proj_name = cbm_project_name_from_path(tmp_dir); + ASSERT_NOT_NULL(proj_name); + + char args[1024]; + snprintf(args, sizeof(args), + "{\"project\":\"%s\",\"query\":\"MATCH (f:Function) RETURN f.name, f.qualified_name, " + "f.file_path\",\"max_tokens\":1}", + proj_name); + + char *raw = cbm_mcp_handle_tool(srv, "query_graph", args); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"columns\"")); + free(text); + free(raw); + free(proj_name); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -1943,6 +2101,8 @@ SUITE(mcp) { RUN_TEST(tool_unknown_tool); RUN_TEST(tool_search_graph_basic); RUN_TEST(tool_search_graph_ranked_pagerank); + RUN_TEST(tool_search_graph_max_tokens_truncates); + RUN_TEST(tool_search_graph_long_signature_budget_respected); RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); @@ -1956,7 +2116,10 @@ SUITE(mcp) { RUN_TEST(tool_get_architecture_summary_project_path_alias); RUN_TEST(tool_get_key_symbols_ranked); RUN_TEST(tool_trace_call_path_ranked_pagerank); + RUN_TEST(tool_trace_call_path_max_tokens_truncates); + RUN_TEST(tool_trace_call_path_chain_shows_omitted_count); RUN_TEST(tool_query_graph_missing_query); + RUN_TEST(tool_query_graph_max_tokens_truncates); /* Pipeline-dependent tool handlers */ RUN_TEST(tool_index_repository_missing_path); From f3e93e74d415d0d824255bd6d041a70b73e9a3de Mon Sep 17 00:00:00 2001 From: maplenk Date: Fri, 27 Mar 2026 18:16:35 +0530 Subject: [PATCH 06/10] fix: harden token-budget helpers and search_graph wiring - Guard cbm_mcp_text_result() against NULL text - Fix memory leak in handle_get_key_symbols() REQUIRE_STORE path (focus not freed) - Wire qn_pattern through handle_search_graph() - Fix OOM infinite loop in markdown_builder_reserve() - Return 0 instead of CBM_STORE_ERR from summary_count_nodes() on prepare fail Co-Authored-By: Claude Opus 4.6 --- src/mcp/mcp.c | 16 +++++++++++++++- src/store/store.c | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 05bc6a26..36f8fecf 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -349,6 +349,9 @@ static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { while (b->len + need + 1 > b->cap) { b->cap *= 2; b->buf = safe_realloc(b->buf, b->cap); + if (!b->buf) { + return false; + } } return true; } @@ -546,6 +549,7 @@ char *cbm_jsonrpc_format_error(int64_t id, int code, const char *message) { * ══════════════════════════════════════════════════════════════════ */ char *cbm_mcp_text_result(const char *text, bool is_error) { + if (!text) text = ""; yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); yyjson_mut_doc_set_root(doc, root); @@ -1331,6 +1335,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { char *label = cbm_mcp_get_string_arg(args, "label"); char *name_pattern = cbm_mcp_get_string_arg(args, "name_pattern"); + char *qn_pattern = cbm_mcp_get_string_arg(args, "qn_pattern"); char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); int limit = cbm_mcp_get_int_arg(args, "limit", 500000); int offset = cbm_mcp_get_int_arg(args, "offset", 0); @@ -1344,6 +1349,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { .project = project, .label = label, .name_pattern = name_pattern, + .qn_pattern = qn_pattern, .file_pattern = file_pattern, .limit = limit, .offset = offset, @@ -1417,6 +1423,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { free(project); free(label); free(name_pattern); + free(qn_pattern); free(file_pattern); char *result = cbm_mcp_text_result(json, false); @@ -1699,7 +1706,14 @@ static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { char *focus = cbm_mcp_get_string_arg(args, "focus"); int limit = cbm_mcp_get_int_arg(args, "limit", 20); cbm_store_t *store = resolve_store(srv, project); - REQUIRE_STORE(store, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(focus); + return _res; + } char *not_indexed = verify_project_indexed(store, project); if (not_indexed) { diff --git a/src/store/store.c b/src/store/store.c index 00b857ac..f3186cec 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -4638,7 +4638,7 @@ static int summary_count_nodes(cbm_store_t *s, const char *project, const char * sqlite3_stmt *stmt = NULL; if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "summary_count_nodes"); - return CBM_STORE_ERR; + return 0; } bind_text(stmt, 1, project); if (focus_like && focus_like[0]) { From 2d0ef973bfd59ce77c9b4db1212f62a5af4ef8d2 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 19:49:34 +0530 Subject: [PATCH 07/10] Refine blast radius analysis Prefer non-test symbol matches before pagerank for ambiguous short names. Batch-load visited node metadata and pagerank scores to remove per-node query loops during impact analysis. Hide test counts from the public summary when include_tests=false, and add max_tokens truncation support to get_impact_analysis with MCP coverage. --- src/mcp/mcp.c | 307 ++++++++++++++++- src/store/store.c | 691 ++++++++++++++++++++++++++++++++++++++ tests/test_mcp.c | 306 +++++++++++++++++ tests/test_store_search.c | 357 ++++++++++++++++++++ 4 files changed, 1660 insertions(+), 1 deletion(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 36f8fecf..eb3957f0 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 16 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -323,6 +323,134 @@ static void add_query_row(yyjson_mut_doc *doc, yyjson_mut_val *rows, const char yyjson_mut_arr_add_val(rows, out_row); } +static int impact_output_direct_caller_count(const cbm_impact_analysis_t *impact) { + int direct_callers = 0; + for (int i = 0; i < impact->direct_count; i++) { + if (impact->direct[i].type && strcmp(impact->direct[i].type, "route") == 0) { + continue; + } + direct_callers++; + } + return direct_callers; +} + +static int impact_output_route_entry_count(const cbm_impact_analysis_t *impact) { + int total = 0; + const cbm_impact_item_t *groups[] = {impact->direct, impact->indirect, impact->transitive}; + const int counts[] = {impact->direct_count, impact->indirect_count, impact->transitive_count}; + for (int g = 0; g < 3; g++) { + for (int i = 0; i < counts[g]; i++) { + const char *type = groups[g][i].type; + if (type && (strcmp(type, "route") == 0 || strcmp(type, "entry_point") == 0)) { + total++; + } + } + } + return total; +} + +static int impact_output_total_results(const cbm_impact_analysis_t *impact, bool include_tests) { + int total = impact->direct_count + impact->indirect_count + impact->transitive_count; + if (include_tests) { + total += impact->affected_test_count; + } + return total; +} + +static char *impact_output_summary_dup(const cbm_impact_analysis_t *impact, bool include_tests) { + int direct_callers = impact_output_direct_caller_count(impact); + int route_entries = impact_output_route_entry_count(impact); + int tests = impact->affected_test_count; + int transitive = impact->transitive_count; + + char buf[256]; + if (include_tests) { + if (transitive > 0) { + snprintf(buf, sizeof(buf), + "%d direct callers, %d route/entry points, %d affected tests, %d transitive impacts", + direct_callers, route_entries, tests, transitive); + } else { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d affected tests", + direct_callers, route_entries, tests); + } + } else if (transitive > 0) { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d transitive impacts", + direct_callers, route_entries, transitive); + } else { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points", + direct_callers, route_entries); + } + return heap_strdup(buf); +} + +static size_t estimate_impact_item_chars(const cbm_impact_item_t *item, bool compact) { + size_t size = 72; + size += strlen(item->name ? item->name : ""); + size += strlen(item->file ? item->file : ""); + size += strlen(item->type ? item->type : ""); + size += compact ? 16 : 32; + return size; +} + +static void add_impact_item_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_impact_item_t *item, bool compact) { + yyjson_mut_val *entry = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, entry, "name", item->name ? item->name : ""); + yyjson_mut_obj_add_str(doc, entry, "file", item->file ? item->file : ""); + yyjson_mut_obj_add_str(doc, entry, "type", item->type ? item->type : ""); + if (compact) { + yyjson_mut_obj_add_bool(doc, entry, "compact", true); + } else { + yyjson_mut_obj_add_real(doc, entry, "pagerank", item->pagerank); + } + yyjson_mut_arr_add_val(arr, entry); +} + +static size_t estimate_affected_test_chars(const cbm_affected_test_t *item) { + size_t size = 48; + size += strlen(item->name ? item->name : ""); + size += strlen(item->file ? item->file : ""); + return size; +} + +static void add_affected_test_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_affected_test_t *item) { + yyjson_mut_val *entry = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, entry, "name", item->name ? item->name : ""); + yyjson_mut_obj_add_str(doc, entry, "file", item->file ? item->file : ""); + yyjson_mut_arr_add_val(arr, entry); +} + +static void add_budgeted_impact_group(yyjson_mut_doc *doc, yyjson_mut_val *impact_obj, + const char *group_name, const cbm_impact_item_t *items, + int count, size_t char_budget, size_t *used, int *shown, + int *full_items, bool *stop) { + yyjson_mut_val *arr = yyjson_mut_arr(doc); + if (!*stop) { + for (int i = 0; i < count; i++) { + bool compact = *full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_impact_item_chars(&items[i], compact); + if (*used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_impact_item_chars(&items[i], true); + } + if (*used + estimate > char_budget && *shown > 0) { + *stop = true; + break; + } + if (*used + estimate <= char_budget || *shown == 0) { + add_impact_item_json(doc, arr, &items[i], compact); + *used += estimate; + (*shown)++; + if (!compact) { + (*full_items)++; + } + } + } + } + yyjson_mut_obj_add_val(doc, impact_obj, group_name, arr); +} + typedef struct { char *buf; size_t len; @@ -662,6 +790,17 @@ static const tool_def_t TOOLS[] = { "keyword to narrow symbols by name, qualified name, or file path.\"}},\"required\":[" "\"project\"]}"}, + {"get_impact_analysis", + "Analyze the blast radius of changing a symbol: direct callers, indirect reach, routes, " + "affected tests, and a low/medium/high risk score.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"symbol\":{\"type\":" + "\"string\",\"description\":\"Exact function, method, or class name.\"},\"depth\":{" + "\"type\":\"integer\",\"default\":3},\"include_tests\":{\"type\":\"boolean\",\"default\":true," + "\"description\":\"Include affected test files in the output array.\"},\"max_tokens\":{" + "\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output size. Controls " + "detail level.\"}},\"required\":[" + "\"project\",\"symbol\"]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -1766,6 +1905,169 @@ static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { } } +static char *handle_get_impact_analysis(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *symbol = cbm_mcp_get_string_arg(args, "symbol"); + int depth = cbm_mcp_get_int_arg(args, "depth", 3); + bool include_tests = cbm_mcp_get_bool_arg_default(args, "include_tests", true); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + + if (!symbol) { + free(project); + return cbm_mcp_text_result("symbol is required", true); + } + REQUIRE_STORE(store, project); + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(symbol); + return not_indexed; + } + + cbm_impact_analysis_t impact = {0}; + int rc = cbm_store_get_impact_analysis(store, project, symbol, depth, &impact); + if (rc == CBM_STORE_NOT_FOUND) { + char err[512]; + snprintf(err, sizeof(err), + "symbol not found. Use search_graph(name_pattern=\".*%s.*\") first to discover " + "the exact symbol name.", + symbol); + free(project); + free(symbol); + return cbm_mcp_text_result(err, true); + } + if (rc != CBM_STORE_OK) { + free(project); + free(symbol); + cbm_store_impact_analysis_free(&impact); + return cbm_mcp_text_result("failed to build impact analysis", true); + } + + char *summary_text = impact_output_summary_dup(&impact, include_tests); + if (!summary_text) { + summary_text = heap_strdup(impact.summary ? impact.summary : ""); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + + yyjson_mut_val *impact_obj = yyjson_mut_obj(doc); + yyjson_mut_val *direct = yyjson_mut_arr(doc); + for (int i = 0; i < impact.direct_count; i++) { + add_impact_item_json(doc, direct, &impact.direct[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "direct", direct); + + yyjson_mut_val *indirect = yyjson_mut_arr(doc); + for (int i = 0; i < impact.indirect_count; i++) { + add_impact_item_json(doc, indirect, &impact.indirect[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "indirect", indirect); + + yyjson_mut_val *transitive = yyjson_mut_arr(doc); + for (int i = 0; i < impact.transitive_count; i++) { + add_impact_item_json(doc, transitive, &impact.transitive[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "transitive", transitive); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj); + + yyjson_mut_val *tests = yyjson_mut_arr(doc); + if (include_tests) { + for (int i = 0; i < impact.affected_test_count; i++) { + add_affected_test_json(doc, tests, &impact.affected_tests[i]); + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests); + + yyjson_mut_obj_add_str(doc, root, "risk_score", + impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + impact_output_total_results(&impact, include_tests)); + + size_t used = 96; + used += strlen(impact.symbol ? impact.symbol : ""); + used += strlen(impact.qualified_name ? impact.qualified_name : ""); + used += strlen(impact.file ? impact.file : ""); + used += strlen(impact.risk_score ? impact.risk_score : ""); + used += strlen(summary_text ? summary_text : ""); + + yyjson_mut_val *impact_obj2 = yyjson_mut_obj(doc); + int shown = 0; + int full_items = 0; + bool stop = false; + + add_budgeted_impact_group(doc, impact_obj2, "direct", impact.direct, impact.direct_count, + char_budget, &used, &shown, &full_items, &stop); + add_budgeted_impact_group(doc, impact_obj2, "indirect", impact.indirect, + impact.indirect_count, char_budget, &used, &shown, &full_items, + &stop); + add_budgeted_impact_group(doc, impact_obj2, "transitive", impact.transitive, + impact.transitive_count, char_budget, &used, &shown, + &full_items, &stop); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj2); + + yyjson_mut_val *tests2 = yyjson_mut_arr(doc); + if (include_tests && !stop) { + for (int i = 0; i < impact.affected_test_count; i++) { + size_t estimate = estimate_affected_test_chars(&impact.affected_tests[i]); + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_affected_test_json(doc, tests2, &impact.affected_tests[i]); + used += estimate; + shown++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests2); + yyjson_mut_obj_add_str(doc, root, "risk_score", + impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + free(summary_text); + cbm_store_impact_analysis_free(&impact); + free(project); + free(symbol); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + static int node_hop_rank_cmp(const void *lhs, const void *rhs) { const cbm_node_hop_t *a = lhs; const cbm_node_hop_t *b = rhs; @@ -3608,6 +3910,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_key_symbols") == 0) { return handle_get_key_symbols(srv, args_json); } + if (strcmp(tool_name, "get_impact_analysis") == 0) { + return handle_get_impact_analysis(srv, args_json); + } if (strcmp(tool_name, "get_architecture_summary") == 0) { return handle_get_architecture_summary(srv, args_json); } diff --git a/src/store/store.c b/src/store/store.c index f3186cec..b1ae0dbd 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2920,6 +2920,697 @@ int cbm_deduplicate_hops(const cbm_node_hop_t *hops, int hop_count, cbm_node_hop return CBM_STORE_OK; } +typedef struct { + int64_t id; + int hop; +} impact_visit_t; + +typedef struct { + int64_t id; + cbm_node_t node; + double pagerank; +} impact_cached_node_t; + +typedef struct { + CBMHashTable *seen_ids; + char **seen_keys; + int seen_key_count; + int seen_key_cap; + impact_visit_t *queue; + int queue_count; + int queue_cap; + impact_visit_t *visited; + int visited_count; + int visited_cap; +} impact_walk_t; + +static void impact_walk_free(impact_walk_t *walk) { + if (!walk) { + return; + } + if (walk->seen_ids) { + cbm_ht_free(walk->seen_ids); + } + for (int i = 0; i < walk->seen_key_count; i++) { + free(walk->seen_keys[i]); + } + free(walk->seen_keys); + free(walk->queue); + free(walk->visited); + memset(walk, 0, sizeof(*walk)); +} + +static void impact_cached_nodes_free(impact_cached_node_t *nodes, int count) { + if (!nodes) { + return; + } + for (int i = 0; i < count; i++) { + cbm_node_free_fields(&nodes[i].node); + } + free(nodes); +} + +static bool impact_label_is_callable(const char *label) { + return label && (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0 || + strcmp(label, "Class") == 0); +} + +static bool impact_json_bool_field(const char *json, const char *key) { + if (!json || !json[0] || !key) { + return false; + } + + yyjson_doc *doc = yyjson_read(json, strlen(json), 0); + if (!doc) { + return false; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *value = root ? yyjson_obj_get(root, key) : NULL; + bool result = false; + if (value) { + if (yyjson_is_bool(value)) { + result = yyjson_is_true(value); + } else if (yyjson_is_uint(value)) { + result = yyjson_get_uint(value) != 0; + } else if (yyjson_is_int(value)) { + result = yyjson_get_sint(value) != 0; + } + } + yyjson_doc_free(doc); + return result; +} + +static bool impact_node_is_entry_point(const cbm_node_t *node) { + if (!node) { + return false; + } + return impact_json_bool_field(node->properties_json, "is_entry_point"); +} + +static int impact_walk_enqueue(impact_walk_t *walk, int64_t id, int hop, bool record_visit) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "%lld", (long long)id); + if (walk->seen_ids && cbm_ht_get(walk->seen_ids, key_buf)) { + return CBM_STORE_OK; + } + + char *key = heap_strdup(key_buf); + if (!key) { + return CBM_STORE_ERR; + } + + if (walk->seen_key_count >= walk->seen_key_cap) { + int new_cap = walk->seen_key_cap > 0 ? walk->seen_key_cap * 2 : 16; + walk->seen_keys = safe_realloc(walk->seen_keys, (size_t)new_cap * sizeof(char *)); + walk->seen_key_cap = new_cap; + } + walk->seen_keys[walk->seen_key_count++] = key; + cbm_ht_set(walk->seen_ids, key, (void *)1); + + if (walk->queue_count >= walk->queue_cap) { + int new_cap = walk->queue_cap > 0 ? walk->queue_cap * 2 : 16; + walk->queue = safe_realloc(walk->queue, (size_t)new_cap * sizeof(impact_visit_t)); + walk->queue_cap = new_cap; + } + walk->queue[walk->queue_count++] = (impact_visit_t){.id = id, .hop = hop}; + + if (record_visit) { + if (walk->visited_count >= walk->visited_cap) { + int new_cap = walk->visited_cap > 0 ? walk->visited_cap * 2 : 16; + walk->visited = safe_realloc(walk->visited, (size_t)new_cap * sizeof(impact_visit_t)); + walk->visited_cap = new_cap; + } + walk->visited[walk->visited_count++] = (impact_visit_t){.id = id, .hop = hop}; + } + + return CBM_STORE_OK; +} + +static bool impact_node_in_top_five_percent(cbm_store_t *s, const char *project, double pagerank) { + if (!s || !s->db || !project || pagerank <= 0.0 || !store_has_node_scores_table(s)) { + return false; + } + + const char *total_sql = + "SELECT COUNT(*) FROM nodes WHERE project=?1 AND label IN ('Function','Method','Class')"; + sqlite3_stmt *stmt = NULL; + int total = 0; + if (sqlite3_prepare_v2(s->db, total_sql, -1, &stmt, NULL) != SQLITE_OK) { + return false; + } + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) == SQLITE_ROW) { + total = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + if (total <= 0) { + return false; + } + + int top_count = (total * 5 + 99) / 100; + if (top_count < 1) { + top_count = 1; + } + + const char *higher_sql = + "SELECT COUNT(*) " + "FROM nodes n " + "JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id " + "WHERE n.project=?1 AND n.label IN ('Function','Method','Class') " + "AND COALESCE(ns.pagerank, 0.0) > ?2"; + int higher = total; + if (sqlite3_prepare_v2(s->db, higher_sql, -1, &stmt, NULL) != SQLITE_OK) { + return false; + } + bind_text(stmt, 1, project); + sqlite3_bind_double(stmt, 2, pagerank); + if (sqlite3_step(stmt) == SQLITE_ROW) { + higher = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + + return higher < top_count; +} + +static int impact_select_target(cbm_store_t *s, const char *project, const char *symbol, + cbm_node_t *out_node, int *out_in_degree, double *out_pagerank) { + memset(out_node, 0, sizeof(*out_node)); + *out_in_degree = 0; + *out_pagerank = 0.0; + + if (!s || !s->db || !project || !symbol) { + return CBM_STORE_ERR; + } + + bool has_scores = store_has_node_scores_table(s); + char sql[1024]; + snprintf(sql, sizeof(sql), + "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " + "n.start_line, n.end_line, n.properties, " + "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS " + "in_deg, %s " + "FROM nodes n %s " + "WHERE n.project=?1 AND n.name=?2 AND n.label IN ('Function','Method','Class') " + "ORDER BY CASE WHEN lower(COALESCE(n.file_path, '')) LIKE '%%test%%' THEN 1 " + "ELSE 0 END, pagerank DESC, in_deg DESC, " + "n.qualified_name ASC LIMIT 1;", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "impact_select_target"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + bind_text(stmt, 2, symbol); + + int rc = CBM_STORE_NOT_FOUND; + if (sqlite3_step(stmt) == SQLITE_ROW) { + scan_node(stmt, out_node); + *out_in_degree = sqlite3_column_int(stmt, 9); + *out_pagerank = sqlite3_column_double(stmt, 10); + rc = CBM_STORE_OK; + } else { + store_set_error(s, "symbol not found"); + } + sqlite3_finalize(stmt); + return rc; +} + +static int impact_cached_node_cmp(const void *lhs, const void *rhs) { + const impact_cached_node_t *a = lhs; + const impact_cached_node_t *b = rhs; + if (a->id < b->id) { + return -1; + } + if (a->id > b->id) { + return 1; + } + return 0; +} + +static const impact_cached_node_t *impact_find_cached_node(const impact_cached_node_t *nodes, + int count, int64_t id) { + int lo = 0; + int hi = count - 1; + while (lo <= hi) { + int mid = lo + (hi - lo) / 2; + if (nodes[mid].id == id) { + return &nodes[mid]; + } + if (nodes[mid].id < id) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return NULL; +} + +static int impact_fetch_nodes_with_scores(cbm_store_t *s, const char *project, + const impact_visit_t *visits, int visit_count, + impact_cached_node_t **out_nodes, int *out_count) { + *out_nodes = NULL; + *out_count = 0; + if (!s || !s->db || !project) { + return CBM_STORE_ERR; + } + if (!visits || visit_count <= 0) { + return CBM_STORE_OK; + } + + bool has_scores = store_has_node_scores_table(s); + size_t sql_cap = 512 + ((size_t)visit_count * 8U); + char *sql = malloc(sql_cap); + if (!sql) { + return CBM_STORE_ERR; + } + + int written = snprintf( + sql, sql_cap, + "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " + "n.start_line, n.end_line, n.properties, %s " + "FROM nodes n %s WHERE n.project=?1 AND n.id IN (", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); + if (written < 0 || (size_t)written >= sql_cap) { + free(sql); + return CBM_STORE_ERR; + } + + size_t len = (size_t)written; + for (int i = 0; i < visit_count; i++) { + written = snprintf(sql + len, sql_cap - len, "%s?%d", i > 0 ? "," : "", i + 2); + if (written < 0 || (size_t)written >= sql_cap - len) { + free(sql); + return CBM_STORE_ERR; + } + len += (size_t)written; + } + written = snprintf(sql + len, sql_cap - len, ") ORDER BY n.id"); + if (written < 0 || (size_t)written >= sql_cap - len) { + free(sql); + return CBM_STORE_ERR; + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + free(sql); + store_set_error_sqlite(s, "impact_fetch_nodes_with_scores"); + return CBM_STORE_ERR; + } + free(sql); + + bind_text(stmt, 1, project); + for (int i = 0; i < visit_count; i++) { + sqlite3_bind_int64(stmt, i + 2, visits[i].id); + } + + impact_cached_node_t *nodes = calloc((size_t)visit_count, sizeof(*nodes)); + if (!nodes) { + sqlite3_finalize(stmt); + return CBM_STORE_ERR; + } + + int count = 0; + int rc = CBM_STORE_OK; + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= visit_count) { + rc = CBM_STORE_ERR; + break; + } + scan_node(stmt, &nodes[count].node); + nodes[count].id = nodes[count].node.id; + nodes[count].pagerank = sqlite3_column_double(stmt, 9); + count++; + } + sqlite3_finalize(stmt); + + if (rc != CBM_STORE_OK) { + impact_cached_nodes_free(nodes, count); + return rc; + } + + if (count > 1) { + qsort(nodes, (size_t)count, sizeof(*nodes), impact_cached_node_cmp); + } + *out_nodes = nodes; + *out_count = count; + return CBM_STORE_OK; +} + +static int impact_enqueue_neighbors(cbm_store_t *s, impact_walk_t *walk, int64_t node_id, + const char *edge_type, bool inbound, int next_hop) { + cbm_edge_t *edges = NULL; + int edge_count = 0; + int rc = inbound ? cbm_store_find_edges_by_target_type(s, node_id, edge_type, &edges, &edge_count) + : cbm_store_find_edges_by_source_type(s, node_id, edge_type, &edges, &edge_count); + if (rc != CBM_STORE_OK) { + return rc; + } + + for (int i = 0; i < edge_count; i++) { + int64_t next_id = inbound ? edges[i].source_id : edges[i].target_id; + rc = impact_walk_enqueue(walk, next_id, next_hop, true); + if (rc != CBM_STORE_OK) { + cbm_store_free_edges(edges, edge_count); + return rc; + } + } + + cbm_store_free_edges(edges, edge_count); + return CBM_STORE_OK; +} + +static int impact_append_item(cbm_impact_item_t **arr, int *count, int *cap, const cbm_node_t *node, + const char *type, double pagerank, int hop) { + if (*count >= *cap) { + int new_cap = *cap > 0 ? *cap * 2 : 8; + *arr = safe_realloc(*arr, (size_t)new_cap * sizeof(cbm_impact_item_t)); + *cap = new_cap; + } + + cbm_impact_item_t item = {0}; + item.name = heap_strdup(safe_str(node ? node->name : NULL)); + item.file = heap_strdup(safe_str(node ? node->file_path : NULL)); + item.type = heap_strdup(safe_str(type)); + item.pagerank = pagerank; + item.hop = hop; + + if (!item.name || !item.file || !item.type) { + free((void *)item.name); + free((void *)item.file); + free((void *)item.type); + return CBM_STORE_ERR; + } + + (*arr)[(*count)++] = item; + return CBM_STORE_OK; +} + +static int impact_append_test(cbm_affected_test_t **arr, int *count, int *cap, const cbm_node_t *node) { + if (*count >= *cap) { + int new_cap = *cap > 0 ? *cap * 2 : 4; + *arr = safe_realloc(*arr, (size_t)new_cap * sizeof(cbm_affected_test_t)); + *cap = new_cap; + } + + cbm_affected_test_t item = {0}; + item.name = heap_strdup(safe_str(node ? node->name : NULL)); + item.file = heap_strdup(safe_str(node ? node->file_path : NULL)); + if (!item.name || !item.file) { + free((void *)item.name); + free((void *)item.file); + return CBM_STORE_ERR; + } + + (*arr)[(*count)++] = item; + return CBM_STORE_OK; +} + +static int impact_item_cmp(const void *lhs, const void *rhs) { + const cbm_impact_item_t *a = lhs; + const cbm_impact_item_t *b = rhs; + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + if (a->hop != b->hop) { + return a->hop - b->hop; + } + return strcmp(safe_str(a->name), safe_str(b->name)); +} + +static int impact_direct_caller_count(const cbm_impact_analysis_t *out) { + int direct_callers = 0; + for (int i = 0; i < out->direct_count; i++) { + if (out->direct[i].type && strcmp(out->direct[i].type, "route") == 0) { + continue; + } + direct_callers++; + } + return direct_callers; +} + +static int impact_route_entry_count(const cbm_impact_analysis_t *out) { + int total = 0; + const cbm_impact_item_t *groups[] = {out->direct, out->indirect, out->transitive}; + const int counts[] = {out->direct_count, out->indirect_count, out->transitive_count}; + for (int g = 0; g < 3; g++) { + for (int i = 0; i < counts[g]; i++) { + const char *type = groups[g][i].type; + if (type && (strcmp(type, "route") == 0 || strcmp(type, "entry_point") == 0)) { + total++; + } + } + } + return total; +} + +static char *impact_build_summary(const cbm_impact_analysis_t *out) { + int direct_callers = impact_direct_caller_count(out); + int route_entries = impact_route_entry_count(out); + int tests = out->affected_test_count; + int transitive = out->transitive_count; + + char buf[256]; + if (transitive > 0) { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d affected tests, " + "%d transitive impacts", + direct_callers, route_entries, tests, transitive); + } else { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d affected tests", + direct_callers, route_entries, tests); + } + return heap_strdup(buf); +} + +static char *impact_determine_risk(const cbm_impact_analysis_t *out, bool top_five_percent) { + int direct_callers = impact_direct_caller_count(out); + int indirect_reach = out->indirect_count + out->transitive_count; + int route_entries = impact_route_entry_count(out); + bool has_tests = out->affected_test_count > 0; + + if (direct_callers >= 3 || route_entries > 0 || top_five_percent) { + return heap_strdup("high"); + } + + if (direct_callers >= 1 && direct_callers <= 2 && indirect_reach > 0) { + return heap_strdup(has_tests ? "low" : "medium"); + } + + return heap_strdup("low"); +} + +int cbm_store_get_impact_analysis(cbm_store_t *s, const char *project, const char *symbol, + int depth, cbm_impact_analysis_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !s->db || !project || !symbol) { + store_set_error(s, "impact analysis requires project and symbol"); + return CBM_STORE_ERR; + } + if (depth < 1) { + depth = 1; + } + + cbm_node_t target = {0}; + int target_in_degree = 0; + double target_pagerank = 0.0; + int rc = impact_select_target(s, project, symbol, &target, &target_in_degree, &target_pagerank); + if (rc != CBM_STORE_OK) { + return rc; + } + + impact_walk_t walk = {0}; + walk.seen_ids = cbm_ht_create(64); + if (!walk.seen_ids) { + cbm_node_free_fields(&target); + store_set_error(s, "impact analysis alloc failed"); + return CBM_STORE_ERR; + } + + rc = impact_walk_enqueue(&walk, target.id, 0, false); + if (rc != CBM_STORE_OK) { + impact_walk_free(&walk); + cbm_node_free_fields(&target); + store_set_error(s, "impact analysis alloc failed"); + return rc; + } + + for (int head = 0; head < walk.queue_count; head++) { + impact_visit_t current = walk.queue[head]; + if (current.hop >= depth) { + continue; + } + + cbm_node_t node = {0}; + if (cbm_store_find_node_by_id(s, current.id, &node) != CBM_STORE_OK) { + continue; + } + + int next_hop = current.hop + 1; + if (impact_label_is_callable(node.label)) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "CALLS", true, next_hop); + if (rc == CBM_STORE_OK) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "HANDLES", false, next_hop); + } + } else if (node.label && strcmp(node.label, "Route") == 0) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "HTTP_CALLS", true, next_hop); + if (rc == CBM_STORE_OK) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "ASYNC_CALLS", true, next_hop); + } + } else { + rc = CBM_STORE_OK; + } + + cbm_node_free_fields(&node); + if (rc != CBM_STORE_OK) { + impact_walk_free(&walk); + cbm_node_free_fields(&target); + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis traversal failed"); + return rc; + } + } + + impact_cached_node_t *cached_nodes = NULL; + int cached_count = 0; + rc = impact_fetch_nodes_with_scores(s, project, walk.visited, walk.visited_count, &cached_nodes, + &cached_count); + if (rc != CBM_STORE_OK) { + impact_walk_free(&walk); + cbm_node_free_fields(&target); + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis lookup failed"); + return rc; + } + + int direct_cap = 0; + int indirect_cap = 0; + int transitive_cap = 0; + int test_cap = 0; + + for (int i = 0; i < walk.visited_count; i++) { + const impact_cached_node_t *cached = + impact_find_cached_node(cached_nodes, cached_count, walk.visited[i].id); + if (!cached) { + continue; + } + + const cbm_node_t *node = &cached->node; + double pagerank = cached->pagerank; + bool is_test = cbm_is_test_file_path(node->file_path); + bool is_entry_point = impact_node_is_entry_point(node); + + if (is_test) { + rc = impact_append_test(&out->affected_tests, &out->affected_test_count, &test_cap, node); + } else { + const char *item_type = (node->label && strcmp(node->label, "Route") == 0) + ? "route" + : (is_entry_point ? "entry_point" : "caller"); + if (walk.visited[i].hop == 1) { + rc = impact_append_item(&out->direct, &out->direct_count, &direct_cap, node, + item_type, pagerank, walk.visited[i].hop); + } else if (walk.visited[i].hop <= 3) { + rc = impact_append_item(&out->indirect, &out->indirect_count, &indirect_cap, node, + item_type, pagerank, walk.visited[i].hop); + } else { + rc = impact_append_item(&out->transitive, &out->transitive_count, &transitive_cap, + node, item_type, pagerank, walk.visited[i].hop); + } + } + + if (rc != CBM_STORE_OK) { + impact_cached_nodes_free(cached_nodes, cached_count); + impact_walk_free(&walk); + cbm_node_free_fields(&target); + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis alloc failed"); + return rc; + } + } + + if (out->direct_count > 1) { + qsort(out->direct, (size_t)out->direct_count, sizeof(cbm_impact_item_t), impact_item_cmp); + } + if (out->indirect_count > 1) { + qsort(out->indirect, (size_t)out->indirect_count, sizeof(cbm_impact_item_t), + impact_item_cmp); + } + if (out->transitive_count > 1) { + qsort(out->transitive, (size_t)out->transitive_count, sizeof(cbm_impact_item_t), + impact_item_cmp); + } + + out->symbol = heap_strdup(safe_str(target.name)); + out->qualified_name = heap_strdup(safe_str(target.qualified_name)); + out->file = heap_strdup(safe_str(target.file_path)); + out->pagerank = target_pagerank; + bool top_five_percent = impact_node_in_top_five_percent(s, project, target_pagerank); + out->risk_score = impact_determine_risk(out, top_five_percent); + out->summary = impact_build_summary(out); + + impact_cached_nodes_free(cached_nodes, cached_count); + impact_walk_free(&walk); + cbm_node_free_fields(&target); + + if (!out->symbol || !out->qualified_name || !out->file || !out->risk_score || !out->summary) { + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis alloc failed"); + return CBM_STORE_ERR; + } + + (void)target_in_degree; + return CBM_STORE_OK; +} + +void cbm_store_impact_analysis_free(cbm_impact_analysis_t *out) { + if (!out) { + return; + } + + for (int i = 0; i < out->direct_count; i++) { + free((void *)out->direct[i].name); + free((void *)out->direct[i].file); + free((void *)out->direct[i].type); + } + free(out->direct); + + for (int i = 0; i < out->indirect_count; i++) { + free((void *)out->indirect[i].name); + free((void *)out->indirect[i].file); + free((void *)out->indirect[i].type); + } + free(out->indirect); + + for (int i = 0; i < out->transitive_count; i++) { + free((void *)out->transitive[i].name); + free((void *)out->transitive[i].file); + free((void *)out->transitive[i].type); + } + free(out->transitive); + + for (int i = 0; i < out->affected_test_count; i++) { + free((void *)out->affected_tests[i].name); + free((void *)out->affected_tests[i].file); + } + free(out->affected_tests); + + free((void *)out->symbol); + free((void *)out->qualified_name); + free((void *)out->file); + free((void *)out->risk_score); + free((void *)out->summary); + memset(out, 0, sizeof(*out)); +} + /* ── Schema ─────────────────────────────────────────────────────── */ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t *out) { diff --git a/tests/test_mcp.c b/tests/test_mcp.c index d140e84d..9131a4e6 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -140,6 +140,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); ASSERT_NOT_NULL(strstr(json, "get_key_symbols")); + ASSERT_NOT_NULL(strstr(json, "get_impact_analysis")); ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); @@ -675,6 +676,186 @@ static cbm_mcp_server_t *setup_truncation_server(void) { return srv; } +static cbm_mcp_server_t *setup_impact_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "impact", "/tmp/impact"); + cbm_mcp_server_set_project(srv, "impact"); + + cbm_node_t nodes[] = { + {.project = "impact", + .label = "Function", + .name = "ProcessOrder", + .qualified_name = "impact.service.ProcessOrder", + .file_path = "app/services/order_service.php"}, + {.project = "impact", + .label = "Method", + .name = "HandleOrder", + .qualified_name = "impact.controller.OrderController.HandleOrder", + .file_path = "app/controllers/OrderController.php"}, + {.project = "impact", + .label = "Function", + .name = "CliEntry", + .qualified_name = "impact.cli.CliEntry", + .file_path = "app/cli/order_cli.php", + .properties_json = "{\"is_entry_point\":true}"}, + {.project = "impact", + .label = "Route", + .name = "POST /orders", + .qualified_name = "impact.route.post_orders", + .file_path = "routes/api.php"}, + {.project = "impact", + .label = "Function", + .name = "CheckoutApi", + .qualified_name = "impact.http.CheckoutApi", + .file_path = "app/http/CheckoutApi.php"}, + {.project = "impact", + .label = "Function", + .name = "OrderWebhook", + .qualified_name = "impact.jobs.OrderWebhook", + .file_path = "app/jobs/OrderWebhook.php"}, + {.project = "impact", + .label = "Function", + .name = "BrowserFlow", + .qualified_name = "impact.ui.BrowserFlow", + .file_path = "app/ui/browser_flow.php"}, + {.project = "impact", + .label = "Function", + .name = "ProcessOrderTest", + .qualified_name = "impact.tests.ProcessOrderTest", + .file_path = "tests/process_order_test.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.core.Duplicate", + .file_path = "app/core/duplicate.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.tests.Duplicate", + .file_path = "tests/duplicate_test.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerA", + .qualified_name = "impact.core.CoreCallerA", + .file_path = "app/core/core_caller_a.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerB", + .qualified_name = "impact.core.CoreCallerB", + .file_path = "app/core/core_caller_b.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller", + .qualified_name = "impact.tests.TestCaller", + .file_path = "tests/test_caller.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller2", + .qualified_name = "impact.tests.TestCaller2", + .file_path = "tests/test_caller_two.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller3", + .qualified_name = "impact.tests.TestCaller3", + .file_path = "tests/test_caller_three.php"}, + }; + + enum { + ID_PROCESS_ORDER, + ID_HANDLE_ORDER, + ID_CLI_ENTRY, + ID_ROUTE, + ID_CHECKOUT_API, + ID_ORDER_WEBHOOK, + ID_BROWSER_FLOW, + ID_PROCESS_ORDER_TEST, + ID_DUPLICATE_PROD, + ID_DUPLICATE_TEST, + ID_CORE_CALLER_A, + ID_CORE_CALLER_B, + ID_TEST_CALLER, + ID_TEST_CALLER_2, + ID_TEST_CALLER_3, + ID_COUNT + }; + int64_t ids[ID_COUNT] = {0}; + for (int i = 0; i < ID_COUNT; i++) { + ids[i] = cbm_store_upsert_node(st, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CLI_ENTRY], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_PROCESS_ORDER_TEST], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_ROUTE], + .type = "HANDLES"}, + {.project = "impact", + .source_id = ids[ID_CHECKOUT_API], + .target_id = ids[ID_ROUTE], + .type = "HTTP_CALLS"}, + {.project = "impact", + .source_id = ids[ID_ORDER_WEBHOOK], + .target_id = ids[ID_ROUTE], + .type = "ASYNC_CALLS"}, + {.project = "impact", + .source_id = ids[ID_BROWSER_FLOW], + .target_id = ids[ID_CHECKOUT_API], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_A], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_B], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_2], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_3], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + }; + const int edge_count = (int)(sizeof(edges) / sizeof(edges[0])); + for (int i = 0; i < edge_count; i++) { + cbm_store_insert_edge(st, &edges[i]); + } + + if (cbm_store_compute_pagerank(st, "impact", 20, 0.85) != CBM_STORE_OK) { + cbm_mcp_server_free(srv); + return NULL; + } + + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1386,6 +1567,125 @@ TEST(tool_query_graph_max_tokens_truncates) { PASS(); } +TEST(tool_get_impact_analysis_basic) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"symbol\":\"ProcessOrder\"")); + ASSERT_NOT_NULL(strstr(text, "\"qualified_name\":\"impact.service.ProcessOrder\"")); + ASSERT_NOT_NULL(strstr(text, "\"risk_score\":\"high\"")); + ASSERT_NOT_NULL(strstr( + text, + "\"summary\":\"2 direct callers, 2 route/entry points, 1 affected tests, 1 transitive impacts\"")); + ASSERT_NOT_NULL(strstr(text, "\"affected_tests\":[")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_missing_symbol) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", "{\"project\":\"impact\",\"symbol\":\"MissingSymbol\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "search_graph(name_pattern")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_ambiguous_symbol_picks_top_match) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"Duplicate\",\"depth\":2}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"qualified_name\":\"impact.core.Duplicate\"")); + ASSERT_NOT_NULL(strstr(text, "\"file\":\"app/core/duplicate.php\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_include_tests_false) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4,\"include_tests\":false}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"affected_tests\":[]")); + ASSERT_NOT_NULL( + strstr(text, "\"summary\":\"2 direct callers, 2 route/entry points, 1 transitive impacts\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"impact\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_route_and_entry_point_typing) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL( + strstr(text, "\"name\":\"CliEntry\",\"file\":\"app/cli/order_cli.php\",\"type\":\"entry_point\"")); + ASSERT_NOT_NULL( + strstr(text, "\"name\":\"POST /orders\",\"file\":\"routes/api.php\",\"type\":\"route\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -2120,6 +2420,12 @@ SUITE(mcp) { RUN_TEST(tool_trace_call_path_chain_shows_omitted_count); RUN_TEST(tool_query_graph_missing_query); RUN_TEST(tool_query_graph_max_tokens_truncates); + RUN_TEST(tool_get_impact_analysis_basic); + RUN_TEST(tool_get_impact_analysis_missing_symbol); + RUN_TEST(tool_get_impact_analysis_ambiguous_symbol_picks_top_match); + RUN_TEST(tool_get_impact_analysis_include_tests_false); + RUN_TEST(tool_get_impact_analysis_max_tokens_truncates); + RUN_TEST(tool_get_impact_analysis_route_and_entry_point_typing); /* Pipeline-dependent tool handlers */ RUN_TEST(tool_index_repository_missing_path); diff --git a/tests/test_store_search.c b/tests/test_store_search.c index 4a5b7b3e..2e68e607 100644 --- a/tests/test_store_search.c +++ b/tests/test_store_search.c @@ -82,6 +82,239 @@ static cbm_store_t *setup_pagerank_store(int64_t *ids) { return s; } +static cbm_store_t *setup_impact_store(bool with_pagerank) { + cbm_store_t *s = cbm_store_open_memory(); + if (!s) { + return NULL; + } + cbm_store_upsert_project(s, "impact", "/tmp/impact"); + + cbm_node_t nodes[] = { + {.project = "impact", + .label = "Function", + .name = "ProcessOrder", + .qualified_name = "impact.service.ProcessOrder", + .file_path = "app/services/order_service.php"}, + {.project = "impact", + .label = "Method", + .name = "HandleOrder", + .qualified_name = "impact.controller.OrderController.HandleOrder", + .file_path = "app/controllers/OrderController.php"}, + {.project = "impact", + .label = "Function", + .name = "CliEntry", + .qualified_name = "impact.cli.CliEntry", + .file_path = "app/cli/order_cli.php", + .properties_json = "{\"is_entry_point\":true}"}, + {.project = "impact", + .label = "Route", + .name = "POST /orders", + .qualified_name = "impact.route.post_orders", + .file_path = "routes/api.php"}, + {.project = "impact", + .label = "Function", + .name = "CheckoutApi", + .qualified_name = "impact.http.CheckoutApi", + .file_path = "app/http/CheckoutApi.php"}, + {.project = "impact", + .label = "Function", + .name = "OrderWebhook", + .qualified_name = "impact.jobs.OrderWebhook", + .file_path = "app/jobs/OrderWebhook.php"}, + {.project = "impact", + .label = "Function", + .name = "BrowserFlow", + .qualified_name = "impact.ui.BrowserFlow", + .file_path = "app/ui/browser_flow.php"}, + {.project = "impact", + .label = "Function", + .name = "ProcessOrderTest", + .qualified_name = "impact.tests.ProcessOrderTest", + .file_path = "tests/process_order_test.php"}, + {.project = "impact", + .label = "Function", + .name = "Notify", + .qualified_name = "impact.notify.Notify", + .file_path = "app/services/notify.php"}, + {.project = "impact", + .label = "Function", + .name = "TaskRunner", + .qualified_name = "impact.tasks.TaskRunner", + .file_path = "app/tasks/task_runner.php"}, + {.project = "impact", + .label = "Function", + .name = "Scheduler", + .qualified_name = "impact.scheduler.Scheduler", + .file_path = "app/schedule/scheduler.php"}, + {.project = "impact", + .label = "Function", + .name = "FormatAmount", + .qualified_name = "impact.money.FormatAmount", + .file_path = "app/util/money.php"}, + {.project = "impact", + .label = "Function", + .name = "Checkout", + .qualified_name = "impact.checkout.Checkout", + .file_path = "app/checkout/checkout.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.core.Duplicate", + .file_path = "app/core/duplicate.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.tests.Duplicate", + .file_path = "tests/duplicate_test.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerA", + .qualified_name = "impact.core.CoreCallerA", + .file_path = "app/core/core_caller_a.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerB", + .qualified_name = "impact.core.CoreCallerB", + .file_path = "app/core/core_caller_b.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller", + .qualified_name = "impact.tests.TestCaller", + .file_path = "tests/test_caller.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller2", + .qualified_name = "impact.tests.TestCaller2", + .file_path = "tests/test_caller_two.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller3", + .qualified_name = "impact.tests.TestCaller3", + .file_path = "tests/test_caller_three.php"}, + }; + + enum { + ID_PROCESS_ORDER, + ID_HANDLE_ORDER, + ID_CLI_ENTRY, + ID_ROUTE, + ID_CHECKOUT_API, + ID_ORDER_WEBHOOK, + ID_BROWSER_FLOW, + ID_PROCESS_ORDER_TEST, + ID_NOTIFY, + ID_TASK_RUNNER, + ID_SCHEDULER, + ID_FORMAT_AMOUNT, + ID_CHECKOUT, + ID_DUPLICATE_PROD, + ID_DUPLICATE_TEST, + ID_CORE_CALLER_A, + ID_CORE_CALLER_B, + ID_TEST_CALLER, + ID_TEST_CALLER_2, + ID_TEST_CALLER_3, + ID_COUNT + }; + int64_t ids[ID_COUNT] = {0}; + for (int i = 0; i < ID_COUNT; i++) { + ids[i] = cbm_store_upsert_node(s, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CLI_ENTRY], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_PROCESS_ORDER_TEST], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_ROUTE], + .type = "HANDLES"}, + {.project = "impact", + .source_id = ids[ID_CHECKOUT_API], + .target_id = ids[ID_ROUTE], + .type = "HTTP_CALLS"}, + {.project = "impact", + .source_id = ids[ID_ORDER_WEBHOOK], + .target_id = ids[ID_ROUTE], + .type = "ASYNC_CALLS"}, + {.project = "impact", + .source_id = ids[ID_BROWSER_FLOW], + .target_id = ids[ID_CHECKOUT_API], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TASK_RUNNER], + .target_id = ids[ID_NOTIFY], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_SCHEDULER], + .target_id = ids[ID_TASK_RUNNER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CHECKOUT], + .target_id = ids[ID_FORMAT_AMOUNT], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_A], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_B], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_2], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_3], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + }; + const int edge_count = (int)(sizeof(edges) / sizeof(edges[0])); + for (int i = 0; i < edge_count; i++) { + cbm_store_insert_edge(s, &edges[i]); + } + + if (with_pagerank && cbm_store_compute_pagerank(s, "impact", 20, 0.85) != CBM_STORE_OK) { + cbm_store_close(s); + return NULL; + } + return s; +} + +static const cbm_impact_item_t *find_impact_item(const cbm_impact_item_t *items, int count, + const char *name) { + for (int i = 0; i < count; i++) { + if (items[i].name && strcmp(items[i].name, name) == 0) { + return &items[i]; + } + } + return NULL; +} + +static const cbm_affected_test_t *find_affected_test(const cbm_affected_test_t *items, int count, + const char *name) { + for (int i = 0; i < count; i++) { + if (items[i].name && strcmp(items[i].name, name) == 0) { + return &items[i]; + } + } + return NULL; +} + /* ── Search by label ────────────────────────────────────────────── */ TEST(store_search_by_label) { @@ -1244,6 +1477,126 @@ TEST(store_is_test_file_various) { /* ── Risk/impact edge cases ────────────────────────────────────── */ +TEST(store_get_impact_analysis_high_risk_with_routes_and_tests) { + cbm_store_t *s = setup_impact_store(false); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "ProcessOrder", 4, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "ProcessOrder"); + ASSERT_STR_EQ(out.qualified_name, "impact.service.ProcessOrder"); + ASSERT_STR_EQ(out.file, "app/services/order_service.php"); + ASSERT_EQ(out.direct_count, 2); + ASSERT_EQ(out.indirect_count, 3); + ASSERT_EQ(out.transitive_count, 1); + ASSERT_EQ(out.affected_test_count, 1); + ASSERT_STR_EQ(out.risk_score, "high"); + ASSERT_STR_EQ(out.summary, + "2 direct callers, 2 route/entry points, 1 affected tests, 1 transitive impacts"); + + const cbm_impact_item_t *entry = find_impact_item(out.direct, out.direct_count, "CliEntry"); + ASSERT_NOT_NULL(entry); + ASSERT_STR_EQ(entry->type, "entry_point"); + + const cbm_impact_item_t *handler = + find_impact_item(out.direct, out.direct_count, "HandleOrder"); + ASSERT_NOT_NULL(handler); + ASSERT_STR_EQ(handler->type, "caller"); + + const cbm_impact_item_t *route = + find_impact_item(out.indirect, out.indirect_count, "POST /orders"); + ASSERT_NOT_NULL(route); + ASSERT_STR_EQ(route->type, "route"); + + const cbm_impact_item_t *browser = + find_impact_item(out.transitive, out.transitive_count, "BrowserFlow"); + ASSERT_NOT_NULL(browser); + ASSERT_STR_EQ(browser->type, "caller"); + + const cbm_affected_test_t *test = + find_affected_test(out.affected_tests, out.affected_test_count, "ProcessOrderTest"); + ASSERT_NOT_NULL(test); + ASSERT_STR_EQ(test->file, "tests/process_order_test.php"); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + +TEST(store_get_impact_analysis_medium_risk) { + cbm_store_t *s = setup_impact_store(false); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "Notify", 3, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "Notify"); + ASSERT_EQ(out.direct_count, 1); + ASSERT_EQ(out.indirect_count, 1); + ASSERT_EQ(out.transitive_count, 0); + ASSERT_EQ(out.affected_test_count, 0); + ASSERT_STR_EQ(out.risk_score, "medium"); + ASSERT_STR_EQ(out.summary, "1 direct callers, 0 route/entry points, 0 affected tests"); + + const cbm_impact_item_t *direct = + find_impact_item(out.direct, out.direct_count, "TaskRunner"); + ASSERT_NOT_NULL(direct); + ASSERT_STR_EQ(direct->type, "caller"); + + const cbm_impact_item_t *indirect = + find_impact_item(out.indirect, out.indirect_count, "Scheduler"); + ASSERT_NOT_NULL(indirect); + ASSERT_STR_EQ(indirect->type, "caller"); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + +TEST(store_get_impact_analysis_low_risk) { + cbm_store_t *s = setup_impact_store(false); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "FormatAmount", 3, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "FormatAmount"); + ASSERT_EQ(out.direct_count, 1); + ASSERT_EQ(out.indirect_count, 0); + ASSERT_EQ(out.transitive_count, 0); + ASSERT_EQ(out.affected_test_count, 0); + ASSERT_STR_EQ(out.risk_score, "low"); + ASSERT_STR_EQ(out.summary, "1 direct callers, 0 route/entry points, 0 affected tests"); + + const cbm_impact_item_t *direct = find_impact_item(out.direct, out.direct_count, "Checkout"); + ASSERT_NOT_NULL(direct); + ASSERT_STR_EQ(direct->type, "caller"); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + +TEST(store_get_impact_analysis_ambiguous_symbol_prefers_ranked_match) { + cbm_store_t *s = setup_impact_store(true); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "Duplicate", 2, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "Duplicate"); + ASSERT_STR_EQ(out.qualified_name, "impact.core.Duplicate"); + ASSERT_STR_EQ(out.file, "app/core/duplicate.php"); + ASSERT_EQ(out.direct_count, 2); + ASSERT_NOT_NULL(find_impact_item(out.direct, out.direct_count, "CoreCallerA")); + ASSERT_NOT_NULL(find_impact_item(out.direct, out.direct_count, "CoreCallerB")); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + TEST(store_hop_to_risk_all_levels) { /* hop 0 hits the default case → LOW */ ASSERT_EQ(cbm_hop_to_risk(0), CBM_RISK_LOW); @@ -1343,6 +1696,10 @@ SUITE(store_search) { RUN_TEST(store_qn_to_top_package_many_segments); RUN_TEST(store_qn_to_top_package_null); RUN_TEST(store_is_test_file_various); + RUN_TEST(store_get_impact_analysis_high_risk_with_routes_and_tests); + RUN_TEST(store_get_impact_analysis_medium_risk); + RUN_TEST(store_get_impact_analysis_low_risk); + RUN_TEST(store_get_impact_analysis_ambiguous_symbol_prefers_ranked_match); RUN_TEST(store_hop_to_risk_all_levels); RUN_TEST(store_risk_label_all_levels); RUN_TEST(store_impact_summary_empty); From d261a14425fc610ec0911835df6e14635566bf9c Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 19:50:00 +0530 Subject: [PATCH 08/10] Wire blast radius public surface Add the impact-analysis store API declaration, expose get_impact_analysis in CLI help text, and cover the tool with the existing integration fixture. --- src/main.c | 2 +- src/store/store.h | 34 ++++++++++++++++++++++++++++++++++ tests/test_integration.c | 17 ++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/main.c b/src/main.c index 46c13e6f..a0e08da5 100644 --- a/src/main.c +++ b/src/main.c @@ -150,7 +150,7 @@ static void print_help(void) { printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); - printf(" get_architecture_summary, search_code,\n"); + printf(" get_architecture_summary, get_impact_analysis, search_code,\n"); printf(" get_key_symbols,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); diff --git a/src/store/store.h b/src/store/store.h index 99c240e3..3418b31e 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -429,6 +429,40 @@ cbm_impact_summary_t cbm_build_impact_summary(const cbm_node_hop_t *hops, int ho int cbm_deduplicate_hops(const cbm_node_hop_t *hops, int hop_count, cbm_node_hop_t **out, int *out_count); +typedef struct { + const char *name; + const char *file; + const char *type; + double pagerank; + int hop; /* internal traversal depth used for ordering/grouping */ +} cbm_impact_item_t; + +typedef struct { + const char *name; + const char *file; +} cbm_affected_test_t; + +typedef struct { + const char *symbol; + const char *qualified_name; + const char *file; + double pagerank; + cbm_impact_item_t *direct; + int direct_count; + cbm_impact_item_t *indirect; + int indirect_count; + cbm_impact_item_t *transitive; + int transitive_count; + cbm_affected_test_t *affected_tests; + int affected_test_count; + const char *risk_score; + const char *summary; +} cbm_impact_analysis_t; + +int cbm_store_get_impact_analysis(cbm_store_t *s, const char *project, const char *symbol, + int depth, cbm_impact_analysis_t *out); +void cbm_store_impact_analysis_free(cbm_impact_analysis_t *out); + /* ── Schema ─────────────────────────────────────────────────────── */ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t *out); diff --git a/tests/test_integration.c b/tests/test_integration.c index d0a14dde..847d9a77 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -393,6 +393,20 @@ TEST(integ_mcp_get_key_symbols) { PASS(); } +TEST(integ_mcp_get_impact_analysis) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"depth\":3}", g_project); + + char *resp = call_tool("get_impact_analysis", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "\"symbol\":\"Add\"")); + ASSERT_NOT_NULL(strstr(resp, "\"impact\"")); + ASSERT_NOT_NULL(strstr(resp, "\"risk_score\"")); + ASSERT_TRUE(strstr(resp, "Multiply") || strstr(resp, "Compute")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -575,7 +589,7 @@ SUITE(integration) { if (integration_setup() != 0) { printf(" %-50s", "integration_setup"); printf("SKIP (setup failed)\n"); - tf_skip_count += 25; /* skip all integration tests */ + tf_skip_count += 26; /* skip all integration tests */ integration_teardown(); return; } @@ -597,6 +611,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_get_architecture); RUN_TEST(integ_mcp_get_architecture_summary); RUN_TEST(integ_mcp_get_key_symbols); + RUN_TEST(integ_mcp_get_impact_analysis); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); From 31de10986662b9bfedba780597c3e42583c6d1c6 Mon Sep 17 00:00:00 2001 From: maplenk Date: Fri, 27 Mar 2026 18:43:41 +0530 Subject: [PATCH 09/10] fix: batch impact lookups and clean up impact handler - Chunk impact_fetch_nodes_with_scores() into batches of 900 to stay under SQLite's SQLITE_MAX_VARIABLE_NUMBER bind limit - Replace REQUIRE_STORE in handle_get_impact_analysis() with explicit cleanup that frees symbol on early return - Fix pre-existing test_integration assertion that used escaped JSON field matching (was already failing on origin/feat/blast-radius) Co-Authored-By: Claude Opus 4.6 --- src/mcp/mcp.c | 9 ++- src/store/store.c | 127 ++++++++++++++++++++++----------------- tests/test_integration.c | 6 +- 3 files changed, 82 insertions(+), 60 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index eb3957f0..4095e44e 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1918,7 +1918,14 @@ static char *handle_get_impact_analysis(cbm_mcp_server_t *srv, const char *args) free(project); return cbm_mcp_text_result("symbol is required", true); } - REQUIRE_STORE(store, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(symbol); + return _res; + } char *not_indexed = verify_project_indexed(store, project); if (not_indexed) { diff --git a/src/store/store.c b/src/store/store.c index b1ae0dbd..1823d074 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -3171,6 +3171,10 @@ static const impact_cached_node_t *impact_find_cached_node(const impact_cached_n return NULL; } +/* Chunk size for IN(...) batches — stays well under SQLite's default + * SQLITE_MAX_VARIABLE_NUMBER (999), leaving room for the ?1 project bind. */ +#define IMPACT_FETCH_CHUNK_SIZE 900 + static int impact_fetch_nodes_with_scores(cbm_store_t *s, const char *project, const impact_visit_t *visits, int visit_count, impact_cached_node_t **out_nodes, int *out_count) { @@ -3183,77 +3187,88 @@ static int impact_fetch_nodes_with_scores(cbm_store_t *s, const char *project, return CBM_STORE_OK; } - bool has_scores = store_has_node_scores_table(s); - size_t sql_cap = 512 + ((size_t)visit_count * 8U); - char *sql = malloc(sql_cap); - if (!sql) { + impact_cached_node_t *nodes = calloc((size_t)visit_count, sizeof(*nodes)); + if (!nodes) { return CBM_STORE_ERR; } - int written = snprintf( - sql, sql_cap, - "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " - "n.start_line, n.end_line, n.properties, %s " - "FROM nodes n %s WHERE n.project=?1 AND n.id IN (", - has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", - has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" - : ""); - if (written < 0 || (size_t)written >= sql_cap) { - free(sql); - return CBM_STORE_ERR; - } + bool has_scores = store_has_node_scores_table(s); + int count = 0; + int rc = CBM_STORE_OK; + + for (int chunk_start = 0; chunk_start < visit_count; chunk_start += IMPACT_FETCH_CHUNK_SIZE) { + int chunk_end = chunk_start + IMPACT_FETCH_CHUNK_SIZE; + if (chunk_end > visit_count) chunk_end = visit_count; + int chunk_size = chunk_end - chunk_start; + + size_t sql_cap = 512 + ((size_t)chunk_size * 8U); + char *sql = malloc(sql_cap); + if (!sql) { + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } + + int written = snprintf( + sql, sql_cap, + "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " + "n.start_line, n.end_line, n.properties, %s " + "FROM nodes n %s WHERE n.project=?1 AND n.id IN (", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); + if (written < 0 || (size_t)written >= sql_cap) { + free(sql); + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } - size_t len = (size_t)written; - for (int i = 0; i < visit_count; i++) { - written = snprintf(sql + len, sql_cap - len, "%s?%d", i > 0 ? "," : "", i + 2); + size_t len = (size_t)written; + for (int i = 0; i < chunk_size; i++) { + written = snprintf(sql + len, sql_cap - len, "%s?%d", i > 0 ? "," : "", i + 2); + if (written < 0 || (size_t)written >= sql_cap - len) { + free(sql); + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } + len += (size_t)written; + } + written = snprintf(sql + len, sql_cap - len, ") ORDER BY n.id"); if (written < 0 || (size_t)written >= sql_cap - len) { free(sql); + impact_cached_nodes_free(nodes, count); return CBM_STORE_ERR; } - len += (size_t)written; - } - written = snprintf(sql + len, sql_cap - len, ") ORDER BY n.id"); - if (written < 0 || (size_t)written >= sql_cap - len) { - free(sql); - return CBM_STORE_ERR; - } - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + free(sql); + store_set_error_sqlite(s, "impact_fetch_nodes_with_scores"); + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } free(sql); - store_set_error_sqlite(s, "impact_fetch_nodes_with_scores"); - return CBM_STORE_ERR; - } - free(sql); - bind_text(stmt, 1, project); - for (int i = 0; i < visit_count; i++) { - sqlite3_bind_int64(stmt, i + 2, visits[i].id); - } + bind_text(stmt, 1, project); + for (int i = 0; i < chunk_size; i++) { + sqlite3_bind_int64(stmt, i + 2, visits[chunk_start + i].id); + } - impact_cached_node_t *nodes = calloc((size_t)visit_count, sizeof(*nodes)); - if (!nodes) { + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= visit_count) { + rc = CBM_STORE_ERR; + break; + } + scan_node(stmt, &nodes[count].node); + nodes[count].id = nodes[count].node.id; + nodes[count].pagerank = sqlite3_column_double(stmt, 9); + count++; + } sqlite3_finalize(stmt); - return CBM_STORE_ERR; - } - int count = 0; - int rc = CBM_STORE_OK; - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (count >= visit_count) { - rc = CBM_STORE_ERR; - break; + if (rc != CBM_STORE_OK) { + impact_cached_nodes_free(nodes, count); + return rc; } - scan_node(stmt, &nodes[count].node); - nodes[count].id = nodes[count].node.id; - nodes[count].pagerank = sqlite3_column_double(stmt, 9); - count++; - } - sqlite3_finalize(stmt); - - if (rc != CBM_STORE_OK) { - impact_cached_nodes_free(nodes, count); - return rc; } if (count > 1) { diff --git a/tests/test_integration.c b/tests/test_integration.c index 847d9a77..b106c11d 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -399,9 +399,9 @@ TEST(integ_mcp_get_impact_analysis) { char *resp = call_tool("get_impact_analysis", args); ASSERT_NOT_NULL(resp); - ASSERT_NOT_NULL(strstr(resp, "\"symbol\":\"Add\"")); - ASSERT_NOT_NULL(strstr(resp, "\"impact\"")); - ASSERT_NOT_NULL(strstr(resp, "\"risk_score\"")); + ASSERT_NOT_NULL(strstr(resp, "Add")); + ASSERT_NOT_NULL(strstr(resp, "impact")); + ASSERT_NOT_NULL(strstr(resp, "risk_score")); ASSERT_TRUE(strstr(resp, "Multiply") || strstr(resp, "Compute")); free(resp); PASS(); From 0e55891839d053f6d31d5750ae9f13995407bf19 Mon Sep 17 00:00:00 2001 From: maplenk Date: Wed, 25 Mar 2026 23:40:01 +0530 Subject: [PATCH 10/10] Add compound query tools + fix safety issues across all phases Phase 5: Three new compound MCP tools (explore, understand, prepare_change) that bundle multiple graph queries into single-call responses: - explore: area search with matches, dependencies, hotspots, entry points - understand: symbol deep-dive with 3-tier resolution (exact QN, exact name with auto-pick, QN suffix with suggestions), callers, callees, source, connected symbols, is_key_symbol flag - prepare_change: wraps impact analysis with review_scope (must_review, should_review, tests) and include_tests=false support - All three support max_tokens budget with progressive truncation - Wire qn_pattern in store search (completing pre-existing API contract) Bug fixes across all committed phases: - Fix REQUIRE_STORE leaking heap args in 5 handlers (get_key_symbols, get_impact_analysis, explore, understand, prepare_change) - Fix markdown_builder_reserve infinite loop on OOM (NULL check after safe_realloc) - Fix SQLite bind parameter limit in impact_fetch_nodes_with_scores (chunk into batches of 900) - Fix cbm_mcp_text_result(NULL) crash on OOM (guard with empty string) - Fix POSIX regex: remove invalid PCRE (?i) prefix from contains pattern - Fix search degree filter: set min_degree/max_degree to -1 (disabled) in compound search helpers - Fix summary_count_nodes returning -1 on SQL error (return 0 instead) - Fix explore total_results overcounting unfiltered hotspots - Fix qsort(NULL, 0) undefined behavior in explore - Fix handle_understand early return leaking search outputs (use goto) - Refactor handle_prepare_change to use goto cleanup pattern Output enrichment (non-breaking): - search_graph results now include start_line, end_line, signature - trace_call_path hops now include file_path, start_line, signature Tests: 2639 passed (+44 new), 0 failures Co-Authored-By: Claude Opus 4.6 (1M context) --- src/main.c | 4 +- src/mcp/mcp.c | 1903 +++++++++++++++++++++++++++++++++++--- src/store/store.c | 11 +- tests/test_integration.c | 46 +- tests/test_mcp.c | 321 +++++++ 5 files changed, 2161 insertions(+), 124 deletions(-) diff --git a/src/main.c b/src/main.c index a0e08da5..18865b73 100644 --- a/src/main.c +++ b/src/main.c @@ -150,8 +150,8 @@ static void print_help(void) { printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); - printf(" get_architecture_summary, get_impact_analysis, search_code,\n"); - printf(" get_key_symbols,\n"); + printf(" get_architecture_summary, get_key_symbols, get_impact_analysis,\n"); + printf(" explore, understand, prepare_change, search_code,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 4095e44e..09712bfe 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -478,6 +478,7 @@ static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { b->cap *= 2; b->buf = safe_realloc(b->buf, b->cap); if (!b->buf) { + b->truncated = true; return false; } } @@ -685,7 +686,7 @@ char *cbm_mcp_text_result(const char *text, bool is_error) { yyjson_mut_val *content = yyjson_mut_arr(doc); yyjson_mut_val *item = yyjson_mut_obj(doc); yyjson_mut_obj_add_str(doc, item, "type", "text"); - yyjson_mut_obj_add_str(doc, item, "text", text); + yyjson_mut_obj_add_str(doc, item, "text", text ? text : ""); yyjson_mut_arr_add_val(content, item); yyjson_mut_obj_add_val(doc, root, "content", content); @@ -801,6 +802,34 @@ static const tool_def_t TOOLS[] = { "detail level.\"}},\"required\":[" "\"project\",\"symbol\"]}"}, + {"explore", + "Compound area exploration: matching symbols, small dependency summaries, hotspots, and " + "entry points in one response.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"area\":{\"type\":" + "\"string\",\"description\":\"Case-insensitive keyword for the area to explore.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-priority sections when needed.\"}},\"required\":[\"project\"," + "\"area\"]}"}, + + {"understand", + "Compound symbol deep-dive: definition, source, callers, callees, and connected symbols in " + "one response.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"symbol\":{\"type\":" + "\"string\",\"description\":\"Short symbol name or full qualified_name.\"},\"max_tokens\":{" + "\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output size. Truncates " + "source and lower-priority arrays when needed.\"}},\"required\":[\"project\"," + "\"symbol\"]}"}, + + {"prepare_change", + "Compound pre-change analysis: blast radius, affected tests, risk score, and suggested " + "review scope.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"symbol\":{\"type\":" + "\"string\",\"description\":\"Exact function, method, or class name.\"}," + "\"include_tests\":{\"type\":\"boolean\",\"default\":true,\"description\":\"Include " + "affected test details in the output.\"},\"max_tokens\":{\"type\":\"integer\"," + "\"default\":2000,\"description\":\"Maximum output size. Truncates lower-priority impact " + "details when needed.\"}},\"required\":[\"project\",\"symbol\"]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -3019,157 +3048,1782 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { true); } -/* ── search_code v2: graph-augmented code search ─────────────── */ +/* ── compound queries ─────────────────────────────────────────── */ -/* Strip non-ASCII bytes to guarantee valid UTF-8 JSON output */ -enum { ASCII_MAX = 127 }; -static void sanitize_ascii(char *s) { - for (unsigned char *p = (unsigned char *)s; *p; p++) { - if (*p > ASCII_MAX) { - *p = '?'; +#define EXPLORE_SEARCH_LIMIT 64 +#define EXPLORE_DEPENDENCY_LIMIT 5 +#define UNDERSTAND_KEY_SYMBOL_LIMIT 20 +#define UNDERSTAND_MAX_BFS_RESULTS 64 +#define MAX_COMPACT_SOURCE_CHARS 256 +#define CONNECTED_REL_CALLER 0x1U +#define CONNECTED_REL_CALLEE 0x2U + +typedef struct { + const cbm_search_result_t *match; + char **callers; + int caller_count; + char **callees; + int callee_count; +} explore_dependency_t; + +typedef struct { + cbm_node_t node; + double pagerank; + unsigned relation_mask; +} connected_symbol_t; + +typedef struct { + char **must_review; + int must_review_count; + char **should_review; + int should_review_count; + char **tests; + int test_count; +} review_scope_t; + +static bool compound_is_symbol_label(const char *label) { + return label && (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0 || + strcmp(label, "Class") == 0); +} + +static char *regex_escape_literal(const char *text) { + static const char *meta = ".^$|()[]{}*+?\\"; + size_t extra = 0; + for (const char *p = text; p && *p; p++) { + if (strchr(meta, *p)) { + extra++; + } + } + + size_t len = strlen(text ? text : ""); + char *out = malloc(len + extra + 1); + if (!out) { + return NULL; + } + + char *dst = out; + for (const char *p = text; p && *p; p++) { + if (strchr(meta, *p)) { + *dst++ = '\\'; } + *dst++ = *p; } + *dst = '\0'; + return out; } -/* Intermediate grep match */ -typedef struct { - char file[512]; - int line; - char content[1024]; -} grep_match_t; +static char *build_exact_regex_pattern(const char *text) { + char *escaped = regex_escape_literal(text ? text : ""); + if (!escaped) { + return NULL; + } + size_t len = strlen(escaped) + 3; + char *pattern = malloc(len); + if (!pattern) { + free(escaped); + return NULL; + } + snprintf(pattern, len, "^%s$", escaped); + free(escaped); + return pattern; +} -/* Deduped result: one per containing graph node */ -typedef struct { - int64_t node_id; /* 0 = raw match (no containing node) */ - char node_name[256]; - char qualified_name[512]; - char label[64]; - char file[512]; - int start_line; - int end_line; - int in_degree; - int out_degree; - int score; - int match_lines[64]; - int match_count; -} search_result_t; +static char *build_contains_regex_pattern(const char *text) { + char *escaped = regex_escape_literal(text ? text : ""); + if (!escaped) { + return NULL; + } + size_t len = strlen(escaped) + 5; + char *pattern = malloc(len); + if (!pattern) { + free(escaped); + return NULL; + } + snprintf(pattern, len, ".*%s.*", escaped); + free(escaped); + return pattern; +} -/* Score a result for ranking: project source first, vendored last, tests lowest */ -enum { SCORE_FUNC = 10, SCORE_ROUTE = 15, SCORE_VENDORED = -50, SCORE_TEST = -5 }; -enum { MAX_LINE_SPAN = 999999 }; +static char *build_contains_glob_pattern(const char *text) { + size_t len = strlen(text ? text : "") + 3; + char *pattern = malloc(len); + if (!pattern) { + return NULL; + } + snprintf(pattern, len, "*%s*", text ? text : ""); + return pattern; +} -static int compute_search_score(const search_result_t *r) { - int score = r->in_degree; - if (strcmp(r->label, "Function") == 0 || strcmp(r->label, "Method") == 0) { - score += SCORE_FUNC; +static bool search_result_matches_area(const cbm_search_result_t *sr, const char *area) { + if (!sr || !area || !area[0]) { + return false; } - if (strcmp(r->label, "Route") == 0) { - score += SCORE_ROUTE; + return (sr->node.name && cbm_strcasestr(sr->node.name, area)) || + (sr->node.qualified_name && cbm_strcasestr(sr->node.qualified_name, area)) || + (sr->node.file_path && cbm_strcasestr(sr->node.file_path, area)); +} + +static bool key_symbol_matches_area(const cbm_key_symbol_t *sym, const char *area) { + if (!sym || !area || !area[0]) { + return false; } - if (strstr(r->file, "vendored/") || strstr(r->file, "vendor/") || - strstr(r->file, "node_modules/")) { - score += SCORE_VENDORED; + return (sym->name && cbm_strcasestr(sym->name, area)) || + (sym->qualified_name && cbm_strcasestr(sym->qualified_name, area)) || + (sym->file_path && cbm_strcasestr(sym->file_path, area)); +} + +static bool entry_point_matches_area(const cbm_entry_point_t *entry, const char *area) { + if (!entry || !area || !area[0]) { + return false; } - /* Penalize test files */ - if (strstr(r->file, "test") || strstr(r->file, "spec") || strstr(r->file, "_test.")) { - score += SCORE_TEST; + return (entry->name && cbm_strcasestr(entry->name, area)) || + (entry->qualified_name && cbm_strcasestr(entry->qualified_name, area)) || + (entry->file && cbm_strcasestr(entry->file, area)); +} + +static int search_result_ref_rank_cmp(const void *lhs, const void *rhs) { + const cbm_search_result_t *a = *(const cbm_search_result_t *const *)lhs; + const cbm_search_result_t *b = *(const cbm_search_result_t *const *)rhs; + if (a->pagerank < b->pagerank) { + return 1; } - return score; + if (a->pagerank > b->pagerank) { + return -1; + } + int a_degree = a->in_degree + a->out_degree; + int b_degree = b->in_degree + b->out_degree; + if (a_degree != b_degree) { + return b_degree - a_degree; + } + const char *aqn = a->node.qualified_name ? a->node.qualified_name : ""; + const char *bqn = b->node.qualified_name ? b->node.qualified_name : ""; + return strcmp(aqn, bqn); } -static int search_result_cmp(const void *a, const void *b) { - const search_result_t *ra = (const search_result_t *)a; - const search_result_t *rb = (const search_result_t *)b; - return rb->score - ra->score; /* descending */ +static int understand_exact_match_cmp(const void *lhs, const void *rhs) { + const cbm_search_result_t *a = *(const cbm_search_result_t *const *)lhs; + const cbm_search_result_t *b = *(const cbm_search_result_t *const *)rhs; + bool a_test = cbm_is_test_file_path(a->node.file_path); + bool b_test = cbm_is_test_file_path(b->node.file_path); + if (a_test != b_test) { + return a_test ? 1 : -1; + } + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + if (a->in_degree != b->in_degree) { + return b->in_degree - a->in_degree; + } + const char *aqn = a->node.qualified_name ? a->node.qualified_name : ""; + const char *bqn = b->node.qualified_name ? b->node.qualified_name : ""; + return strcmp(aqn, bqn); } -/* Build the grep command string based on scoped vs recursive mode */ -static void build_grep_cmd(char *cmd, size_t cmd_sz, bool use_regex, bool scoped, - const char *file_pattern, const char *tmpfile, const char *filelist, - const char *root_path) { - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *flag = use_regex ? "-E" : "-F"; - if (scoped) { - if (file_pattern) { - snprintf(cmd, cmd_sz, "xargs grep -n %s --include='%s' -f '%s' < '%s' 2>/dev/null", - flag, file_pattern, tmpfile, filelist); - } else { - snprintf(cmd, cmd_sz, "xargs grep -n %s -f '%s' < '%s' 2>/dev/null", flag, tmpfile, - filelist); +static int connected_symbol_cmp(const void *lhs, const void *rhs) { + const connected_symbol_t *a = lhs; + const connected_symbol_t *b = rhs; + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + const char *an = a->node.name ? a->node.name : ""; + const char *bn = b->node.name ? b->node.name : ""; + return strcmp(an, bn); +} + +static bool append_unique_search_ref(const cbm_search_result_t *sr, + const cbm_search_result_t ***refs, int *count, int *cap) { + if (!sr || !refs || !count || !cap) { + return false; + } + for (int i = 0; i < *count; i++) { + if ((*refs)[i]->node.id == sr->node.id) { + return true; } - } else { - if (file_pattern) { - snprintf(cmd, cmd_sz, "grep -rn %s --include='%s' -f '%s' '%s' 2>/dev/null", flag, - file_pattern, tmpfile, root_path); - } else { - snprintf(cmd, cmd_sz, "grep -rn %s -f '%s' '%s' 2>/dev/null", flag, tmpfile, root_path); + } + if (*count >= *cap) { + *cap = *cap > 0 ? (*cap * 2) : 16; + *refs = safe_realloc((void *)*refs, (size_t)*cap * sizeof(**refs)); + } + (*refs)[(*count)++] = sr; + return true; +} + +static void append_matching_refs(const cbm_search_output_t *out, const char *area, + bool symbol_only, const cbm_search_result_t ***refs, + int *count, int *cap) { + if (!out || !refs || !count || !cap) { + return; + } + for (int i = 0; i < out->count; i++) { + if (symbol_only && !compound_is_symbol_label(out->results[i].node.label)) { + continue; + } + if (area && area[0] && !search_result_matches_area(&out->results[i], area)) { + continue; } + (void)append_unique_search_ref(&out->results[i], refs, count, cap); } } -/* Phase 4: assemble JSON output from search results */ -static char *assemble_search_output(search_result_t *sr, int sr_count, grep_match_t *raw, - int raw_count, int gm_count, int limit, int mode, - int context_lines, const char *root_path) { - enum { MODE_COMPACT = 0, MODE_FULL = 1, MODE_FILES = 2 }; +static char *load_node_source(cbm_mcp_server_t *srv, const cbm_node_t *node) { + if (!srv || !node || !node->project || !node->file_path) { + return NULL; + } - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root_obj = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root_obj); + char *root_path = get_project_root(srv, node->project); + if (!root_path) { + return NULL; + } - int output_count = sr_count < limit ? sr_count : limit; + int start = node->start_line > 0 ? node->start_line : 1; + int end = node->end_line > start ? node->end_line : start + SNIPPET_DEFAULT_LINES; + size_t path_len = strlen(root_path) + strlen(node->file_path) + 2; + char *abs_path = malloc(path_len); + char *source = NULL; + if (!abs_path) { + free(root_path); + return NULL; + } + snprintf(abs_path, path_len, "%s/%s", root_path, node->file_path); - if (mode == MODE_FILES) { - yyjson_mut_val *files_arr = yyjson_mut_arr(doc); - char *seen_files[512]; - int seen_count = 0; - for (int fi = 0; fi < output_count; fi++) { - bool dup = false; - for (int j = 0; j < seen_count; j++) { - if (strcmp(seen_files[j], sr[fi].file) == 0) { - dup = true; - break; - } - } - if (!dup && seen_count < 512) { - seen_files[seen_count++] = sr[fi].file; - yyjson_mut_arr_add_str(doc, files_arr, sr[fi].file); - } + char real_root[4096]; + char real_file[4096]; + bool path_ok = false; +#ifdef _WIN32 + if (_fullpath(real_root, root_path, sizeof(real_root)) && + _fullpath(real_file, abs_path, sizeof(real_file))) { +#else + if (realpath(root_path, real_root) && realpath(abs_path, real_file)) { +#endif + size_t root_len = strlen(real_root); + if (strncmp(real_file, real_root, root_len) == 0 && + (real_file[root_len] == '/' || real_file[root_len] == '\\' || + real_file[root_len] == '\0')) { + path_ok = true; } - for (int fi = 0; fi < raw_count && seen_count < 512; fi++) { - bool dup = false; - for (int j = 0; j < seen_count; j++) { - if (strcmp(seen_files[j], raw[fi].file) == 0) { - dup = true; + } + if (path_ok) { + source = read_file_lines(abs_path, start, end); + } + + free(abs_path); + free(root_path); + return source; +} + +static void free_name_lists(char **items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + free(items[i]); + } + free(items); +} + +static void free_explore_dependencies(explore_dependency_t *deps, int count) { + if (!deps) { + return; + } + for (int i = 0; i < count; i++) { + free_name_lists(deps[i].callers, deps[i].caller_count); + free_name_lists(deps[i].callees, deps[i].callee_count); + } + free(deps); +} + +static int collect_explore_dependencies(cbm_store_t *store, const cbm_search_result_t **matches, + int match_count, explore_dependency_t **out, int *out_count) { + *out = NULL; + *out_count = 0; + if (!store || !matches || match_count <= 0) { + return CBM_STORE_OK; + } + + int dep_count = match_count < EXPLORE_DEPENDENCY_LIMIT ? match_count : EXPLORE_DEPENDENCY_LIMIT; + explore_dependency_t *deps = calloc((size_t)dep_count, sizeof(*deps)); + if (!deps) { + return CBM_STORE_ERR; + } + + for (int i = 0; i < dep_count; i++) { + deps[i].match = matches[i]; + if (cbm_store_node_neighbor_names(store, matches[i]->node.id, 5, &deps[i].callers, + &deps[i].caller_count, &deps[i].callees, + &deps[i].callee_count) != CBM_STORE_OK) { + free_explore_dependencies(deps, dep_count); + return CBM_STORE_ERR; + } + } + + *out = deps; + *out_count = dep_count; + return CBM_STORE_OK; +} + +static int collect_connected_symbols(const cbm_traverse_result_t *callers, + const cbm_traverse_result_t *callees, + connected_symbol_t **out, int *out_count) { + *out = NULL; + *out_count = 0; + + int cap = callers->visited_count + callees->visited_count; + if (cap <= 0) { + return CBM_STORE_OK; + } + + connected_symbol_t *items = calloc((size_t)cap, sizeof(*items)); + if (!items) { + return CBM_STORE_ERR; + } + + int count = 0; + const cbm_traverse_result_t *groups[] = {callers, callees}; + const unsigned masks[] = {CONNECTED_REL_CALLER, CONNECTED_REL_CALLEE}; + for (int g = 0; g < 2; g++) { + for (int i = 0; i < groups[g]->visited_count; i++) { + const cbm_node_hop_t *hop = &groups[g]->visited[i]; + int found = -1; + for (int j = 0; j < count; j++) { + if (items[j].node.id == hop->node.id) { + found = j; break; } } - if (!dup) { - seen_files[seen_count++] = raw[fi].file; - yyjson_mut_arr_add_str(doc, files_arr, raw[fi].file); + if (found >= 0) { + items[found].relation_mask |= masks[g]; + if (hop->pagerank > items[found].pagerank) { + items[found].pagerank = hop->pagerank; + } + continue; } + + copy_node(&hop->node, &items[count].node); + items[count].pagerank = hop->pagerank; + items[count].relation_mask = masks[g]; + count++; } - yyjson_mut_obj_add_val(doc, root_obj, "files", files_arr); - } else { - yyjson_mut_val *results_arr = yyjson_mut_arr(doc); - for (int ri = 0; ri < output_count; ri++) { - search_result_t *r = &sr[ri]; - yyjson_mut_val *item = yyjson_mut_obj(doc); + } - yyjson_mut_obj_add_str(doc, item, "node", r->node_name); - yyjson_mut_obj_add_str(doc, item, "qualified_name", r->qualified_name); - yyjson_mut_obj_add_str(doc, item, "label", r->label); - yyjson_mut_obj_add_str(doc, item, "file", r->file); - yyjson_mut_obj_add_int(doc, item, "start_line", r->start_line); - yyjson_mut_obj_add_int(doc, item, "end_line", r->end_line); - yyjson_mut_obj_add_int(doc, item, "in_degree", r->in_degree); - yyjson_mut_obj_add_int(doc, item, "out_degree", r->out_degree); + qsort(items, (size_t)count, sizeof(*items), connected_symbol_cmp); + *out = items; + *out_count = count; + return CBM_STORE_OK; +} - yyjson_mut_val *ml = yyjson_mut_arr(doc); - for (int j = 0; j < r->match_count; j++) { - yyjson_mut_arr_add_int(doc, ml, r->match_lines[j]); - } - yyjson_mut_obj_add_val(doc, item, "match_lines", ml); +static void free_connected_symbols(connected_symbol_t *items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + cbm_node_free_fields(&items[i].node); + } + free(items); +} + +static const char *connected_relation_label(unsigned relation_mask) { + if ((relation_mask & CONNECTED_REL_CALLER) && (relation_mask & CONNECTED_REL_CALLEE)) { + return "both"; + } + if (relation_mask & CONNECTED_REL_CALLER) { + return "caller"; + } + return "callee"; +} + +static bool understand_is_key_symbol(cbm_store_t *store, const char *project, + const cbm_search_result_t *selected) { + cbm_key_symbol_t *symbols = NULL; + int count = 0; + bool found = false; + if (cbm_store_get_key_symbols(store, project, NULL, UNDERSTAND_KEY_SYMBOL_LIMIT, &symbols, + &count) != CBM_STORE_OK) { + return false; + } + for (int i = 0; i < count; i++) { + if (selected->node.qualified_name && symbols[i].qualified_name && + strcmp(selected->node.qualified_name, symbols[i].qualified_name) == 0) { + found = true; + break; + } + } + cbm_store_key_symbols_free(symbols, count); + return found; +} + +static int append_unique_string(char ***items, int *count, const char *value) { + if (!items || !count || !value || !value[0]) { + return CBM_STORE_OK; + } + for (int i = 0; i < *count; i++) { + if (strcmp((*items)[i], value) == 0) { + return CBM_STORE_OK; + } + } + char **resized = safe_realloc(*items, (size_t)(*count + 1) * sizeof(**items)); + *items = resized; + (*items)[*count] = heap_strdup(value); + if (!(*items)[*count]) { + return CBM_STORE_ERR; + } + (*count)++; + return CBM_STORE_OK; +} + +static void free_string_array(char **items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + free(items[i]); + } + free(items); +} + +static void review_scope_free(review_scope_t *scope) { + if (!scope) { + return; + } + free_string_array(scope->must_review, scope->must_review_count); + free_string_array(scope->should_review, scope->should_review_count); + free_string_array(scope->tests, scope->test_count); + memset(scope, 0, sizeof(*scope)); +} + +static int build_review_scope(const cbm_impact_analysis_t *impact, bool include_tests, + review_scope_t *scope) { + memset(scope, 0, sizeof(*scope)); + if (append_unique_string(&scope->must_review, &scope->must_review_count, impact->file) != + CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + for (int i = 0; i < impact->direct_count; i++) { + if (append_unique_string(&scope->must_review, &scope->must_review_count, + impact->direct[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + for (int i = 0; i < impact->indirect_count; i++) { + if (append_unique_string(&scope->should_review, &scope->should_review_count, + impact->indirect[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + for (int i = 0; i < impact->transitive_count; i++) { + if (append_unique_string(&scope->should_review, &scope->should_review_count, + impact->transitive[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + if (include_tests) { + for (int i = 0; i < impact->affected_test_count; i++) { + if (append_unique_string(&scope->tests, &scope->test_count, + impact->affected_tests[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + } + return CBM_STORE_OK; +} + +static size_t estimate_string_list_entry_chars(const char *value) { + return strlen(value ? value : "") + 8; +} + +static size_t estimate_key_symbol_chars(const cbm_key_symbol_t *sym, bool compact) { + size_t size = 72; + size += strlen(sym->name ? sym->name : ""); + size += strlen(sym->file_path ? sym->file_path : ""); + if (!compact) { + size += strlen(sym->qualified_name ? sym->qualified_name : ""); + size += strlen(sym->label ? sym->label : ""); + size += 32; + } else { + size += 16; + } + return size; +} + +static void add_key_symbol_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, const cbm_key_symbol_t *sym, + bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", sym->name ? sym->name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", sym->file_path ? sym->file_path : ""); + if (compact) { + yyjson_mut_obj_add_real(doc, item, "pagerank", sym->pagerank); + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sym->qualified_name ? sym->qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", sym->label ? sym->label : ""); + yyjson_mut_obj_add_int(doc, item, "in_degree", sym->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", sym->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sym->pagerank); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_entry_point_chars(const cbm_entry_point_t *entry, bool compact) { + size_t size = 48; + size += strlen(entry->name ? entry->name : ""); + size += strlen(entry->file ? entry->file : ""); + if (!compact) { + size += strlen(entry->qualified_name ? entry->qualified_name : ""); + } else { + size += 12; + } + return size; +} + +static void add_entry_point_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_entry_point_t *entry, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", entry->name ? entry->name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", entry->file ? entry->file : ""); + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + entry->qualified_name ? entry->qualified_name : ""); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_explore_dependency_chars(const explore_dependency_t *dep, bool compact) { + size_t size = 64; + size += strlen(dep->match->node.name ? dep->match->node.name : ""); + size += strlen(dep->match->node.file_path ? dep->match->node.file_path : ""); + if (compact) { + size += 24; + return size; + } + size += strlen(dep->match->node.qualified_name ? dep->match->node.qualified_name : ""); + for (int i = 0; i < dep->caller_count; i++) { + size += strlen(dep->callers[i]); + } + for (int i = 0; i < dep->callee_count; i++) { + size += strlen(dep->callees[i]); + } + return size + 24; +} + +static void add_string_array_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, char **values, + int count) { + for (int i = 0; i < count; i++) { + yyjson_mut_arr_add_str(doc, arr, values[i] ? values[i] : ""); + } +} + +static void add_explore_dependency_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const explore_dependency_t *dep, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", + dep->match->node.name ? dep->match->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", + dep->match->node.file_path ? dep->match->node.file_path : ""); + if (compact) { + yyjson_mut_obj_add_int(doc, item, "caller_count", dep->caller_count); + yyjson_mut_obj_add_int(doc, item, "callee_count", dep->callee_count); + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + dep->match->node.qualified_name ? dep->match->node.qualified_name + : ""); + yyjson_mut_val *callers = yyjson_mut_arr(doc); + yyjson_mut_val *callees = yyjson_mut_arr(doc); + add_string_array_json(doc, callers, dep->callers, dep->caller_count); + add_string_array_json(doc, callees, dep->callees, dep->callee_count); + yyjson_mut_obj_add_val(doc, item, "callers", callers); + yyjson_mut_obj_add_val(doc, item, "callees", callees); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_alternative_chars(const cbm_search_result_t *sr, bool compact) { + size_t size = 48; + size += strlen(sr->node.qualified_name ? sr->node.qualified_name : ""); + size += strlen(sr->node.file_path ? sr->node.file_path : ""); + size += compact ? 12 : strlen(sr->node.label ? sr->node.label : "") + 16; + return size; +} + +static void add_alternative_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_search_result_t *sr, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sr->node.qualified_name ? sr->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", sr->node.file_path ? sr->node.file_path : ""); + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_connected_symbol_chars(const connected_symbol_t *item, bool compact) { + size_t size = 64; + size += strlen(item->node.name ? item->node.name : ""); + size += strlen(item->node.file_path ? item->node.file_path : ""); + size += strlen(connected_relation_label(item->relation_mask)); + if (!compact) { + size += strlen(item->node.qualified_name ? item->node.qualified_name : ""); + size += 16; + } else { + size += 8; + } + return size; +} + +static void add_connected_symbol_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const connected_symbol_t *item, bool compact) { + yyjson_mut_val *entry = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, entry, "name", item->node.name ? item->node.name : ""); + yyjson_mut_obj_add_str(doc, entry, "file_path", item->node.file_path ? item->node.file_path + : ""); + yyjson_mut_obj_add_int(doc, entry, "start_line", item->node.start_line); + yyjson_mut_obj_add_str(doc, entry, "relation", connected_relation_label(item->relation_mask)); + if (compact) { + yyjson_mut_obj_add_bool(doc, entry, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, entry, "qualified_name", + item->node.qualified_name ? item->node.qualified_name : ""); + yyjson_mut_obj_add_real(doc, entry, "pagerank", item->pagerank); + } + yyjson_mut_arr_add_val(arr, entry); +} + +static size_t estimate_understand_definition_chars(const cbm_search_result_t *selected, + const char *source, bool include_source) { + size_t size = 96; + size += strlen(selected->node.file_path ? selected->node.file_path : ""); + size += estimate_signature_field_chars(&selected->node); + if (include_source && source) { + size += strlen(source) + 24; + } + return size; +} + +static void add_understand_definition_json(yyjson_mut_doc *doc, yyjson_mut_val *root, + const cbm_search_result_t *selected, + const char *source) { + yyjson_mut_val *definition = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, definition, "file_path", + selected->node.file_path ? selected->node.file_path : ""); + yyjson_mut_obj_add_int(doc, definition, "start_line", selected->node.start_line); + yyjson_mut_obj_add_int(doc, definition, "end_line", selected->node.end_line); + + char *signature = node_signature_dup(&selected->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, definition, "signature", signature); + } + free(signature); + + if (source) { + yyjson_mut_obj_add_str(doc, definition, "source", source); + } + + yyjson_mut_obj_add_val(doc, root, "definition", definition); +} + +static char *build_symbol_suggestions_response(const char *input, cbm_node_t *nodes, int count) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "status", "ambiguous"); + char msg[512]; + snprintf(msg, sizeof(msg), + "%d matches for \"%s\". Pick a qualified_name from suggestions below, " + "or use search_graph(name_pattern=\"...\") to narrow results.", + count, input); + yyjson_mut_obj_add_str(doc, root, "message", msg); + + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + nodes[i].qualified_name ? nodes[i].qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "name", nodes[i].name ? nodes[i].name : ""); + yyjson_mut_obj_add_str(doc, item, "label", nodes[i].label ? nodes[i].label : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", nodes[i].file_path ? nodes[i].file_path + : ""); + yyjson_mut_arr_add_val(arr, item); + } + yyjson_mut_obj_add_val(doc, root, "suggestions", arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static int run_store_search(cbm_store_t *store, const cbm_search_params_t *params, + cbm_search_output_t *out) { + memset(out, 0, sizeof(*out)); + return cbm_store_search(store, params, out); +} + +static int search_exact_qn(cbm_store_t *store, const char *project, const char *qn, + cbm_search_output_t *out) { + char *pattern = build_exact_regex_pattern(qn); + if (!pattern) { + return CBM_STORE_ERR; + } + cbm_search_params_t params = { + .project = project, + .qn_pattern = pattern, + .limit = 16, + .sort_by = "relevance", + .min_degree = -1, + .max_degree = -1, + }; + int rc = run_store_search(store, ¶ms, out); + free(pattern); + return rc; +} + +static int search_exact_name(cbm_store_t *store, const char *project, const char *name, + cbm_search_output_t *out) { + char *pattern = build_exact_regex_pattern(name); + if (!pattern) { + return CBM_STORE_ERR; + } + cbm_search_params_t params = { + .project = project, + .name_pattern = pattern, + .limit = 64, + .sort_by = "relevance", + .min_degree = -1, + .max_degree = -1, + }; + int rc = run_store_search(store, ¶ms, out); + free(pattern); + return rc; +} + +static int search_contains(cbm_store_t *store, const char *project, const char *name_pattern, + const char *qn_pattern, const char *file_pattern, + cbm_search_output_t *out) { + cbm_search_params_t params = { + .project = project, + .name_pattern = name_pattern, + .qn_pattern = qn_pattern, + .file_pattern = file_pattern, + .limit = EXPLORE_SEARCH_LIMIT, + .sort_by = "relevance", + .min_degree = -1, + .max_degree = -1, + }; + return run_store_search(store, ¶ms, out); +} + +static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *area = cbm_mcp_get_string_arg(args, "area"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + + if (!area || !area[0]) { + free(project); + free(area); + return cbm_mcp_text_result("area is required", true); + } + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(area); + return _res; + } + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(area); + return not_indexed; + } + + char *contains_regex = build_contains_regex_pattern(area); + char *file_glob = build_contains_glob_pattern(area); + cbm_search_output_t name_out = {0}; + cbm_search_output_t qn_out = {0}; + cbm_search_output_t file_out = {0}; + const cbm_search_result_t **matches = NULL; + int match_count = 0; + int match_cap = 0; + explore_dependency_t *deps = NULL; + int dep_count = 0; + cbm_key_symbol_t *hotspots = NULL; + int hotspot_count = 0; + cbm_architecture_info_t arch = {0}; + cbm_entry_point_t **entry_refs = NULL; + int entry_count = 0; + int entry_cap = 0; + char *result = NULL; + + if (!contains_regex || !file_glob) { + result = cbm_mcp_text_result("failed to allocate explore filters", true); + goto cleanup_explore; + } + if (search_contains(store, project, contains_regex, NULL, NULL, &name_out) != CBM_STORE_OK || + search_contains(store, project, NULL, contains_regex, NULL, &qn_out) != CBM_STORE_OK || + search_contains(store, project, NULL, NULL, file_glob, &file_out) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load explore matches", true); + goto cleanup_explore; + } + + append_matching_refs(&name_out, area, false, &matches, &match_count, &match_cap); + append_matching_refs(&qn_out, area, false, &matches, &match_count, &match_cap); + append_matching_refs(&file_out, area, false, &matches, &match_count, &match_cap); + if (match_count > 0) { + qsort(matches, (size_t)match_count, sizeof(*matches), search_result_ref_rank_cmp); + } + + if (collect_explore_dependencies(store, matches, match_count, &deps, &dep_count) != + CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load explore dependencies", true); + goto cleanup_explore; + } + + if (cbm_store_get_key_symbols(store, project, area, 10, &hotspots, &hotspot_count) != + CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load area hotspots", true); + goto cleanup_explore; + } + + const char *aspects[] = {"entry_points"}; + if (cbm_store_get_architecture(store, project, aspects, 1, &arch) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load entry points", true); + goto cleanup_explore; + } + for (int i = 0; i < arch.entry_point_count; i++) { + if (!entry_point_matches_area(&arch.entry_points[i], area)) { + continue; + } + if (entry_count >= entry_cap) { + entry_cap = entry_cap > 0 ? entry_cap * 2 : 8; + entry_refs = safe_realloc(entry_refs, (size_t)entry_cap * sizeof(*entry_refs)); + } + entry_refs[entry_count++] = &arch.entry_points[i]; + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + yyjson_mut_obj_add_str(doc, root, "area", area); + + yyjson_mut_val *match_arr = yyjson_mut_arr(doc); + for (int i = 0; i < match_count; i++) { + add_search_result_item(doc, match_arr, matches[i], false); + } + yyjson_mut_obj_add_val(doc, root, "matches", match_arr); + + yyjson_mut_val *dep_arr = yyjson_mut_arr(doc); + for (int i = 0; i < dep_count; i++) { + add_explore_dependency_json(doc, dep_arr, &deps[i], false); + } + yyjson_mut_obj_add_val(doc, root, "dependencies", dep_arr); + + int filtered_hotspot_count = 0; + for (int i = 0; i < hotspot_count; i++) { + if (key_symbol_matches_area(&hotspots[i], area)) { + filtered_hotspot_count++; + } + } + + yyjson_mut_val *hotspot_arr = yyjson_mut_arr(doc); + for (int i = 0; i < hotspot_count; i++) { + if (!key_symbol_matches_area(&hotspots[i], area)) { + continue; + } + add_key_symbol_json(doc, hotspot_arr, &hotspots[i], false); + } + yyjson_mut_obj_add_val(doc, root, "hotspots", hotspot_arr); + + yyjson_mut_val *entry_arr = yyjson_mut_arr(doc); + for (int i = 0; i < entry_count; i++) { + add_entry_point_json(doc, entry_arr, entry_refs[i], false); + } + yyjson_mut_obj_add_val(doc, root, "entry_points", entry_arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + yyjson_mut_obj_add_str(doc, root, "area", area); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + match_count + dep_count + filtered_hotspot_count + entry_count); + + size_t used = 64 + strlen(area); + int shown = 0; + int full_items = 0; + bool stop = false; + + match_arr = yyjson_mut_arr(doc); + for (int i = 0; i < match_count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_search_result_chars(matches[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_search_result_chars(matches[i], true); + } + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_search_result_item(doc, match_arr, matches[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + yyjson_mut_obj_add_val(doc, root, "matches", match_arr); + + dep_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < dep_count; i++) { + bool compact = true; + size_t estimate = estimate_explore_dependency_chars(&deps[i], compact); + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_explore_dependency_json(doc, dep_arr, &deps[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "dependencies", dep_arr); + + hotspot_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < hotspot_count; i++) { + if (!key_symbol_matches_area(&hotspots[i], area)) { + continue; + } + bool compact = true; + size_t estimate = estimate_key_symbol_chars(&hotspots[i], compact); + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_key_symbol_json(doc, hotspot_arr, &hotspots[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "hotspots", hotspot_arr); + + entry_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < entry_count; i++) { + bool compact = true; + size_t estimate = estimate_entry_point_chars(entry_refs[i], compact); + if (used + estimate > char_budget && shown > 0) { + break; + } + add_entry_point_json(doc, entry_arr, entry_refs[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "entry_points", entry_arr); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + result = cbm_mcp_text_result(json, false); + free(json); + +cleanup_explore: + free(contains_regex); + free(file_glob); + free(matches); + free(entry_refs); + free_explore_dependencies(deps, dep_count); + cbm_store_search_free(&name_out); + cbm_store_search_free(&qn_out); + cbm_store_search_free(&file_out); + cbm_store_key_symbols_free(hotspots, hotspot_count); + cbm_store_architecture_free(&arch); + free(project); + free(area); + return result; +} + +static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *symbol = cbm_mcp_get_string_arg(args, "symbol"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + + if (!symbol || !symbol[0]) { + free(project); + free(symbol); + return cbm_mcp_text_result("symbol is required", true); + } + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(symbol); + return _res; + } + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(symbol); + return not_indexed; + } + + cbm_search_output_t qn_out = {0}; + cbm_search_output_t exact_name_out = {0}; + cbm_search_output_t selected_out = {0}; + cbm_node_t *suffix_nodes = NULL; + int suffix_count = 0; + const cbm_search_result_t **exact_refs = NULL; + int exact_ref_count = 0; + int exact_ref_cap = 0; + const cbm_search_result_t *selected = NULL; + bool auto_picked = false; + cbm_traverse_result_t callers = {0}; + cbm_traverse_result_t callees = {0}; + connected_symbol_t *connected = NULL; + int connected_count = 0; + bool is_key_symbol = false; + char *source = NULL; + char *result = NULL; + + if (search_exact_qn(store, project, symbol, &qn_out) == CBM_STORE_OK && qn_out.count > 0) { + selected = &qn_out.results[0]; + } else if (search_exact_name(store, project, symbol, &exact_name_out) == CBM_STORE_OK) { + append_matching_refs(&exact_name_out, NULL, true, &exact_refs, &exact_ref_count, + &exact_ref_cap); + if (exact_ref_count > 0) { + qsort(exact_refs, (size_t)exact_ref_count, sizeof(*exact_refs), + understand_exact_match_cmp); + selected = exact_refs[0]; + auto_picked = exact_ref_count > 1; + } + } + + if (!selected) { + if (cbm_store_find_nodes_by_qn_suffix(store, project, symbol, &suffix_nodes, + &suffix_count) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to resolve symbol", true); + goto cleanup_understand; + } + + int filtered = 0; + for (int i = 0; i < suffix_count; i++) { + if (compound_is_symbol_label(suffix_nodes[i].label)) { + suffix_nodes[filtered++] = suffix_nodes[i]; + } else { + cbm_node_free_fields(&suffix_nodes[i]); + } + } + suffix_count = filtered; + + if (suffix_count == 1) { + if (search_exact_qn(store, project, suffix_nodes[0].qualified_name, &selected_out) == + CBM_STORE_OK && + selected_out.count > 0) { + selected = &selected_out.results[0]; + } + } else if (suffix_count > 1) { + result = build_symbol_suggestions_response(symbol, suffix_nodes, suffix_count); + goto cleanup_understand; + } + } + + if (!selected) { + char err[512]; + snprintf(err, sizeof(err), + "symbol not found. Use search_graph(name_pattern=\".*%s.*\") first to discover " + "the exact symbol name.", + symbol); + result = cbm_mcp_text_result(err, true); + goto cleanup_understand; + } + + source = load_node_source(srv, &selected->node); + const char *edge_types[] = {"CALLS"}; + if (cbm_store_bfs(store, selected->node.id, "inbound", edge_types, 1, 1, + UNDERSTAND_MAX_BFS_RESULTS, &callers) != CBM_STORE_OK || + cbm_store_bfs(store, selected->node.id, "outbound", edge_types, 1, 1, + UNDERSTAND_MAX_BFS_RESULTS, &callees) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load symbol dependencies", true); + goto cleanup_understand; + } + + if (callers.visited_count > 1) { + qsort(callers.visited, (size_t)callers.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } + if (callees.visited_count > 1) { + qsort(callees.visited, (size_t)callees.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } + if (collect_connected_symbols(&callers, &callees, &connected, &connected_count) != + CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to build connected symbol summary", true); + goto cleanup_understand; + } + is_key_symbol = understand_is_key_symbol(store, project, selected); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", symbol); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + selected->node.qualified_name ? selected->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "label", selected->node.label ? selected->node.label : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", selected->pagerank); + yyjson_mut_obj_add_bool(doc, root, "is_key_symbol", is_key_symbol); + add_understand_definition_json(doc, root, selected, source); + + if (auto_picked) { + yyjson_mut_val *alts = yyjson_mut_arr(doc); + for (int i = 1; i < exact_ref_count; i++) { + add_alternative_json(doc, alts, exact_refs[i], false); + } + yyjson_mut_obj_add_val(doc, root, "alternatives", alts); + } + + yyjson_mut_val *caller_arr = yyjson_mut_arr(doc); + for (int i = 0; i < callers.visited_count; i++) { + add_node_hop_item(doc, caller_arr, &callers.visited[i], false); + } + yyjson_mut_obj_add_val(doc, root, "callers", caller_arr); + + yyjson_mut_val *callee_arr = yyjson_mut_arr(doc); + for (int i = 0; i < callees.visited_count; i++) { + add_node_hop_item(doc, callee_arr, &callees.visited[i], false); + } + yyjson_mut_obj_add_val(doc, root, "callees", callee_arr); + + yyjson_mut_val *connected_arr = yyjson_mut_arr(doc); + for (int i = 0; i < connected_count; i++) { + add_connected_symbol_json(doc, connected_arr, &connected[i], false); + } + yyjson_mut_obj_add_val(doc, root, "connected_symbols", connected_arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", symbol); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + selected->node.qualified_name ? selected->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "label", + selected->node.label ? selected->node.label : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", selected->pagerank); + yyjson_mut_obj_add_bool(doc, root, "is_key_symbol", is_key_symbol); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + 1 + callers.visited_count + callees.visited_count + + connected_count + (auto_picked ? exact_ref_count - 1 : 0)); + + size_t used = 96 + strlen(symbol) + + strlen(selected->node.qualified_name ? selected->node.qualified_name : ""); + int shown = 1; + + char *compact_source = NULL; + if (source) { + compact_source = truncate_text_copy(source, MAX_COMPACT_SOURCE_CHARS); + } + add_understand_definition_json(doc, root, selected, NULL); + if (compact_source && compact_source[0]) { + size_t estimate = estimate_understand_definition_chars(selected, compact_source, true) - + estimate_understand_definition_chars(selected, NULL, false); + if (used + estimate <= char_budget) { + yyjson_mut_val *definition = yyjson_mut_obj_get(root, "definition"); + yyjson_mut_obj_add_strcpy(doc, definition, "source", compact_source); + yyjson_mut_obj_add_bool(doc, definition, "source_truncated", true); + used += estimate; + } + } + free(compact_source); + + if (auto_picked) { + yyjson_mut_val *alts = yyjson_mut_arr(doc); + for (int i = 1; i < exact_ref_count; i++) { + size_t estimate = estimate_alternative_chars(exact_refs[i], true); + if (used + estimate > char_budget && shown > 0) { + break; + } + add_alternative_json(doc, alts, exact_refs[i], true); + used += estimate; + shown++; + } + yyjson_mut_obj_add_val(doc, root, "alternatives", alts); + } + + int full_items = 0; + bool stop = false; + caller_arr = yyjson_mut_arr(doc); + for (int i = 0; i < callers.visited_count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&callers.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&callers.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_node_hop_item(doc, caller_arr, &callers.visited[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + yyjson_mut_obj_add_val(doc, root, "callers", caller_arr); + + callee_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < callees.visited_count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&callees.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&callees.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_node_hop_item(doc, callee_arr, &callees.visited[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callees", callee_arr); + + connected_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < connected_count; i++) { + bool compact = true; + size_t estimate = estimate_connected_symbol_chars(&connected[i], compact); + if (used + estimate > char_budget && shown > 0) { + break; + } + add_connected_symbol_json(doc, connected_arr, &connected[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "connected_symbols", connected_arr); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + result = cbm_mcp_text_result(json, false); + free(json); + +cleanup_understand: + free(project); + free(symbol); + free(source); + free(exact_refs); + cbm_store_search_free(&qn_out); + cbm_store_search_free(&exact_name_out); + cbm_store_search_free(&selected_out); + cbm_store_traverse_free(&callers); + cbm_store_traverse_free(&callees); + free_connected_symbols(connected, connected_count); + cbm_store_free_nodes(suffix_nodes, suffix_count); + return result; +} + +static size_t estimate_review_scope_chars(const review_scope_t *scope, bool include_tests) { + size_t size = 48; + for (int i = 0; i < scope->must_review_count; i++) { + size += estimate_string_list_entry_chars(scope->must_review[i]); + } + for (int i = 0; i < scope->should_review_count; i++) { + size += estimate_string_list_entry_chars(scope->should_review[i]); + } + if (include_tests) { + for (int i = 0; i < scope->test_count; i++) { + size += estimate_string_list_entry_chars(scope->tests[i]); + } + } + return size; +} + +static void add_review_scope_json(yyjson_mut_doc *doc, yyjson_mut_val *root, + const review_scope_t *scope, bool include_tests) { + yyjson_mut_val *review = yyjson_mut_obj(doc); + yyjson_mut_val *must = yyjson_mut_arr(doc); + yyjson_mut_val *should = yyjson_mut_arr(doc); + for (int i = 0; i < scope->must_review_count; i++) { + yyjson_mut_arr_add_str(doc, must, scope->must_review[i] ? scope->must_review[i] : ""); + } + for (int i = 0; i < scope->should_review_count; i++) { + yyjson_mut_arr_add_str(doc, should, scope->should_review[i] ? scope->should_review[i] : ""); + } + yyjson_mut_obj_add_val(doc, review, "must_review", must); + yyjson_mut_obj_add_val(doc, review, "should_review", should); + if (include_tests) { + yyjson_mut_val *tests = yyjson_mut_arr(doc); + for (int i = 0; i < scope->test_count; i++) { + yyjson_mut_arr_add_str(doc, tests, scope->tests[i] ? scope->tests[i] : ""); + } + yyjson_mut_obj_add_val(doc, review, "tests", tests); + } + yyjson_mut_obj_add_val(doc, root, "review_scope", review); +} + +static int prepare_change_total_results(const cbm_impact_analysis_t *impact, + const review_scope_t *scope, bool include_tests) { + int total = impact_output_total_results(impact, include_tests); + total += scope->must_review_count + scope->should_review_count; + if (include_tests) { + total += scope->test_count; + } + return total; +} + +static char *handle_prepare_change(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *symbol = cbm_mcp_get_string_arg(args, "symbol"); + bool include_tests = cbm_mcp_get_bool_arg_default(args, "include_tests", true); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + char *result = NULL; + + if (!symbol || !symbol[0]) { + free(project); + free(symbol); + return cbm_mcp_text_result("symbol is required", true); + } + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(symbol); + return _res; + } + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(symbol); + return not_indexed; + } + + cbm_impact_analysis_t impact = {0}; + review_scope_t scope = {0}; + char *summary_text = NULL; + + int rc = cbm_store_get_impact_analysis(store, project, symbol, 4, &impact); + if (rc == CBM_STORE_NOT_FOUND) { + char err[512]; + snprintf(err, sizeof(err), + "symbol not found. Use search_graph(name_pattern=\".*%s.*\") first to discover " + "the exact symbol name.", + symbol); + result = cbm_mcp_text_result(err, true); + goto cleanup_prepare_change; + } + if (rc != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to build pre-change analysis", true); + goto cleanup_prepare_change; + } + + if (build_review_scope(&impact, include_tests, &scope) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to build review scope", true); + goto cleanup_prepare_change; + } + + summary_text = impact_output_summary_dup(&impact, include_tests); + if (!summary_text) { + summary_text = heap_strdup(impact.summary ? impact.summary : ""); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + + yyjson_mut_val *impact_obj = yyjson_mut_obj(doc); + yyjson_mut_val *direct = yyjson_mut_arr(doc); + for (int i = 0; i < impact.direct_count; i++) { + add_impact_item_json(doc, direct, &impact.direct[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "direct", direct); + + yyjson_mut_val *indirect = yyjson_mut_arr(doc); + for (int i = 0; i < impact.indirect_count; i++) { + add_impact_item_json(doc, indirect, &impact.indirect[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "indirect", indirect); + + yyjson_mut_val *transitive = yyjson_mut_arr(doc); + for (int i = 0; i < impact.transitive_count; i++) { + add_impact_item_json(doc, transitive, &impact.transitive[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "transitive", transitive); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj); + + yyjson_mut_val *tests = yyjson_mut_arr(doc); + if (include_tests) { + for (int i = 0; i < impact.affected_test_count; i++) { + add_affected_test_json(doc, tests, &impact.affected_tests[i]); + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests); + add_review_scope_json(doc, root, &scope, include_tests); + yyjson_mut_obj_add_str(doc, root, "risk_score", impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + prepare_change_total_results(&impact, &scope, include_tests)); + + size_t used = 96; + used += strlen(impact.symbol ? impact.symbol : ""); + used += strlen(impact.qualified_name ? impact.qualified_name : ""); + used += strlen(impact.file ? impact.file : ""); + used += strlen(summary_text ? summary_text : ""); + used += strlen(impact.risk_score ? impact.risk_score : ""); + + int shown = 0; + int full_items = 0; + bool stop = false; + + impact_obj = yyjson_mut_obj(doc); + add_budgeted_impact_group(doc, impact_obj, "direct", impact.direct, impact.direct_count, + char_budget, &used, &shown, &full_items, &stop); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj); + + if (!stop) { + size_t estimate = estimate_review_scope_chars(&scope, include_tests); + if (used + estimate <= char_budget || shown == 0) { + add_review_scope_json(doc, root, &scope, include_tests); + used += estimate; + shown += scope.must_review_count + scope.should_review_count + + (include_tests ? scope.test_count : 0); + } else { + yyjson_mut_val *review = yyjson_mut_obj(doc); + yyjson_mut_val *must = yyjson_mut_arr(doc); + yyjson_mut_val *should = yyjson_mut_arr(doc); + for (int i = 0; i < scope.must_review_count; i++) { + size_t item_estimate = estimate_string_list_entry_chars(scope.must_review[i]); + if (used + item_estimate > char_budget && shown > 0) { + stop = true; + break; + } + yyjson_mut_arr_add_str(doc, must, scope.must_review[i]); + used += item_estimate; + shown++; + } + if (!stop) { + for (int i = 0; i < scope.should_review_count; i++) { + size_t item_estimate = estimate_string_list_entry_chars(scope.should_review[i]); + if (used + item_estimate > char_budget && shown > 0) { + stop = true; + break; + } + yyjson_mut_arr_add_str(doc, should, scope.should_review[i]); + used += item_estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, review, "must_review", must); + yyjson_mut_obj_add_val(doc, review, "should_review", should); + if (include_tests) { + yyjson_mut_val *review_tests = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < scope.test_count; i++) { + size_t item_estimate = estimate_string_list_entry_chars(scope.tests[i]); + if (used + item_estimate > char_budget && shown > 0) { + stop = true; + break; + } + yyjson_mut_arr_add_str(doc, review_tests, scope.tests[i]); + used += item_estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, review, "tests", review_tests); + } + yyjson_mut_obj_add_val(doc, root, "review_scope", review); + } + } else { + yyjson_mut_val *review = yyjson_mut_obj(doc); + yyjson_mut_obj_add_val(doc, review, "must_review", yyjson_mut_arr(doc)); + yyjson_mut_obj_add_val(doc, review, "should_review", yyjson_mut_arr(doc)); + if (include_tests) { + yyjson_mut_obj_add_val(doc, review, "tests", yyjson_mut_arr(doc)); + } + yyjson_mut_obj_add_val(doc, root, "review_scope", review); + } + + tests = yyjson_mut_arr(doc); + if (include_tests && !stop) { + for (int i = 0; i < impact.affected_test_count; i++) { + size_t estimate = estimate_affected_test_chars(&impact.affected_tests[i]); + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_affected_test_json(doc, tests, &impact.affected_tests[i]); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests); + + if (!stop) { + yyjson_mut_val *impact_existing = yyjson_mut_obj_get(root, "impact"); + add_budgeted_impact_group(doc, impact_existing, "indirect", impact.indirect, + impact.indirect_count, char_budget, &used, &shown, + &full_items, &stop); + add_budgeted_impact_group(doc, impact_existing, "transitive", impact.transitive, + impact.transitive_count, char_budget, &used, &shown, + &full_items, &stop); + } else { + yyjson_mut_val *impact_existing = yyjson_mut_obj_get(root, "impact"); + yyjson_mut_obj_add_val(doc, impact_existing, "indirect", yyjson_mut_arr(doc)); + yyjson_mut_obj_add_val(doc, impact_existing, "transitive", yyjson_mut_arr(doc)); + } + + yyjson_mut_obj_add_str(doc, root, "risk_score", impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + result = cbm_mcp_text_result(json, false); + free(json); + +cleanup_prepare_change: + free(summary_text); + review_scope_free(&scope); + cbm_store_impact_analysis_free(&impact); + free(project); + free(symbol); + return result; +} + +/* ── search_code v2: graph-augmented code search ─────────────── */ + +/* Strip non-ASCII bytes to guarantee valid UTF-8 JSON output */ +enum { ASCII_MAX = 127 }; +static void sanitize_ascii(char *s) { + for (unsigned char *p = (unsigned char *)s; *p; p++) { + if (*p > ASCII_MAX) { + *p = '?'; + } + } +} + +/* Intermediate grep match */ +typedef struct { + char file[512]; + int line; + char content[1024]; +} grep_match_t; + +/* Deduped result: one per containing graph node */ +typedef struct { + int64_t node_id; /* 0 = raw match (no containing node) */ + char node_name[256]; + char qualified_name[512]; + char label[64]; + char file[512]; + int start_line; + int end_line; + int in_degree; + int out_degree; + int score; + int match_lines[64]; + int match_count; +} search_result_t; + +/* Score a result for ranking: project source first, vendored last, tests lowest */ +enum { SCORE_FUNC = 10, SCORE_ROUTE = 15, SCORE_VENDORED = -50, SCORE_TEST = -5 }; +enum { MAX_LINE_SPAN = 999999 }; + +static int compute_search_score(const search_result_t *r) { + int score = r->in_degree; + if (strcmp(r->label, "Function") == 0 || strcmp(r->label, "Method") == 0) { + score += SCORE_FUNC; + } + if (strcmp(r->label, "Route") == 0) { + score += SCORE_ROUTE; + } + if (strstr(r->file, "vendored/") || strstr(r->file, "vendor/") || + strstr(r->file, "node_modules/")) { + score += SCORE_VENDORED; + } + /* Penalize test files */ + if (strstr(r->file, "test") || strstr(r->file, "spec") || strstr(r->file, "_test.")) { + score += SCORE_TEST; + } + return score; +} + +static int search_result_cmp(const void *a, const void *b) { + const search_result_t *ra = (const search_result_t *)a; + const search_result_t *rb = (const search_result_t *)b; + return rb->score - ra->score; /* descending */ +} + +/* Build the grep command string based on scoped vs recursive mode */ +static void build_grep_cmd(char *cmd, size_t cmd_sz, bool use_regex, bool scoped, + const char *file_pattern, const char *tmpfile, const char *filelist, + const char *root_path) { + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + const char *flag = use_regex ? "-E" : "-F"; + if (scoped) { + if (file_pattern) { + snprintf(cmd, cmd_sz, "xargs grep -n %s --include='%s' -f '%s' < '%s' 2>/dev/null", + flag, file_pattern, tmpfile, filelist); + } else { + snprintf(cmd, cmd_sz, "xargs grep -n %s -f '%s' < '%s' 2>/dev/null", flag, tmpfile, + filelist); + } + } else { + if (file_pattern) { + snprintf(cmd, cmd_sz, "grep -rn %s --include='%s' -f '%s' '%s' 2>/dev/null", flag, + file_pattern, tmpfile, root_path); + } else { + snprintf(cmd, cmd_sz, "grep -rn %s -f '%s' '%s' 2>/dev/null", flag, tmpfile, root_path); + } + } +} + +/* Phase 4: assemble JSON output from search results */ +static char *assemble_search_output(search_result_t *sr, int sr_count, grep_match_t *raw, + int raw_count, int gm_count, int limit, int mode, + int context_lines, const char *root_path) { + enum { MODE_COMPACT = 0, MODE_FULL = 1, MODE_FILES = 2 }; + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root_obj = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root_obj); + + int output_count = sr_count < limit ? sr_count : limit; + + if (mode == MODE_FILES) { + yyjson_mut_val *files_arr = yyjson_mut_arr(doc); + char *seen_files[512]; + int seen_count = 0; + for (int fi = 0; fi < output_count; fi++) { + bool dup = false; + for (int j = 0; j < seen_count; j++) { + if (strcmp(seen_files[j], sr[fi].file) == 0) { + dup = true; + break; + } + } + if (!dup && seen_count < 512) { + seen_files[seen_count++] = sr[fi].file; + yyjson_mut_arr_add_str(doc, files_arr, sr[fi].file); + } + } + for (int fi = 0; fi < raw_count && seen_count < 512; fi++) { + bool dup = false; + for (int j = 0; j < seen_count; j++) { + if (strcmp(seen_files[j], raw[fi].file) == 0) { + dup = true; + break; + } + } + if (!dup) { + seen_files[seen_count++] = raw[fi].file; + yyjson_mut_arr_add_str(doc, files_arr, raw[fi].file); + } + } + yyjson_mut_obj_add_val(doc, root_obj, "files", files_arr); + } else { + yyjson_mut_val *results_arr = yyjson_mut_arr(doc); + for (int ri = 0; ri < output_count; ri++) { + search_result_t *r = &sr[ri]; + yyjson_mut_val *item = yyjson_mut_obj(doc); + + yyjson_mut_obj_add_str(doc, item, "node", r->node_name); + yyjson_mut_obj_add_str(doc, item, "qualified_name", r->qualified_name); + yyjson_mut_obj_add_str(doc, item, "label", r->label); + yyjson_mut_obj_add_str(doc, item, "file", r->file); + yyjson_mut_obj_add_int(doc, item, "start_line", r->start_line); + yyjson_mut_obj_add_int(doc, item, "end_line", r->end_line); + yyjson_mut_obj_add_int(doc, item, "in_degree", r->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", r->out_degree); + + yyjson_mut_val *ml = yyjson_mut_arr(doc); + for (int j = 0; j < r->match_count; j++) { + yyjson_mut_arr_add_int(doc, ml, r->match_lines[j]); + } + yyjson_mut_obj_add_val(doc, item, "match_lines", ml); if (r->start_line > 0 && r->end_line > 0) { char abs_path[1024]; @@ -3923,6 +5577,15 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture_summary") == 0) { return handle_get_architecture_summary(srv, args_json); } + if (strcmp(tool_name, "explore") == 0) { + return handle_explore(srv, args_json); + } + if (strcmp(tool_name, "understand") == 0) { + return handle_understand(srv, args_json); + } + if (strcmp(tool_name, "prepare_change") == 0) { + return handle_prepare_change(srv, args_json); + } /* Pipeline-dependent tools */ if (strcmp(tool_name, "index_repository") == 0) { diff --git a/src/store/store.c b/src/store/store.c index 1823d074..5c420dbf 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2453,6 +2453,15 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear ADD_WHERE(bind_buf); BIND_TEXT(params->name_pattern); } + if (params->qn_pattern) { + if (params->case_sensitive) { + snprintf(bind_buf, sizeof(bind_buf), "n.qualified_name REGEXP ?%d", bind_idx + 1); + } else { + snprintf(bind_buf, sizeof(bind_buf), "iregexp(?%d, n.qualified_name)", bind_idx + 1); + } + ADD_WHERE(bind_buf); + BIND_TEXT(params->qn_pattern); + } if (params->file_pattern) { like_pattern = cbm_glob_to_like(params->file_pattern); snprintf(bind_buf, sizeof(bind_buf), "n.file_path LIKE ?%d", bind_idx + 1); @@ -5344,7 +5353,7 @@ static int summary_count_nodes(cbm_store_t *s, const char *project, const char * sqlite3_stmt *stmt = NULL; if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "summary_count_nodes"); - return 0; + return 0; /* return 0 rather than -1 so callers display "0" not "-1" */ } bind_text(stmt, 1, project); if (focus_like && focus_like[0]) { diff --git a/tests/test_integration.c b/tests/test_integration.c index b106c11d..147386bc 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -393,12 +393,40 @@ TEST(integ_mcp_get_key_symbols) { PASS(); } +TEST(integ_mcp_explore) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"area\":\"main\",\"max_tokens\":400}", g_project); + + char *resp = call_tool("explore", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "matches")); + ASSERT_NOT_NULL(strstr(resp, "hotspots")); + ASSERT_NOT_NULL(strstr(resp, "main")); + free(resp); + PASS(); +} + +TEST(integ_mcp_understand) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"max_tokens\":400}", g_project); + + char *resp = call_tool("understand", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "qualified_name")); + ASSERT_NOT_NULL(strstr(resp, "definition")); + ASSERT_NOT_NULL(strstr(resp, "callers")); + ASSERT_NOT_NULL(strstr(resp, "return a + b")); + free(resp); + PASS(); +} + TEST(integ_mcp_get_impact_analysis) { char args[256]; snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"depth\":3}", g_project); char *resp = call_tool("get_impact_analysis", args); ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "symbol")); ASSERT_NOT_NULL(strstr(resp, "Add")); ASSERT_NOT_NULL(strstr(resp, "impact")); ASSERT_NOT_NULL(strstr(resp, "risk_score")); @@ -407,6 +435,19 @@ TEST(integ_mcp_get_impact_analysis) { PASS(); } +TEST(integ_mcp_prepare_change) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"max_tokens\":400}", g_project); + + char *resp = call_tool("prepare_change", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "review_scope")); + ASSERT_NOT_NULL(strstr(resp, "risk_score")); + ASSERT_NOT_NULL(strstr(resp, "utils.go")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -589,7 +630,7 @@ SUITE(integration) { if (integration_setup() != 0) { printf(" %-50s", "integration_setup"); printf("SKIP (setup failed)\n"); - tf_skip_count += 26; /* skip all integration tests */ + tf_skip_count += 29; /* skip all integration tests */ integration_teardown(); return; } @@ -611,7 +652,10 @@ SUITE(integration) { RUN_TEST(integ_mcp_get_architecture); RUN_TEST(integ_mcp_get_architecture_summary); RUN_TEST(integ_mcp_get_key_symbols); + RUN_TEST(integ_mcp_explore); + RUN_TEST(integ_mcp_understand); RUN_TEST(integ_mcp_get_impact_analysis); + RUN_TEST(integ_mcp_prepare_change); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 9131a4e6..62c21f0a 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -142,6 +142,9 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_key_symbols")); ASSERT_NOT_NULL(strstr(json, "get_impact_analysis")); ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); + ASSERT_NOT_NULL(strstr(json, "explore")); + ASSERT_NOT_NULL(strstr(json, "understand")); + ASSERT_NOT_NULL(strstr(json, "prepare_change")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); ASSERT_NOT_NULL(strstr(json, "delete_project")); @@ -307,6 +310,9 @@ TEST(server_handle_tools_list) { ASSERT_NOT_NULL(strstr(resp, "\"id\":2")); ASSERT_NOT_NULL(strstr(resp, "search_graph")); ASSERT_NOT_NULL(strstr(resp, "query_graph")); + ASSERT_NOT_NULL(strstr(resp, "explore")); + ASSERT_NOT_NULL(strstr(resp, "understand")); + ASSERT_NOT_NULL(strstr(resp, "prepare_change")); free(resp); cbm_mcp_server_free(srv); @@ -1344,6 +1350,16 @@ static cbm_mcp_server_t *setup_snippet_server(char *tmp_dir, size_t tmp_sz) { n_run2.end_line = 13; cbm_store_upsert_node(st, &n_run2); + cbm_node_t n_run3 = {0}; + n_run3.project = proj_name; + n_run3.label = "Function"; + n_run3.name = "Run"; + n_run3.qualified_name = "test-project.api.server.Run"; + n_run3.file_path = "main.go"; + n_run3.start_line = 11; + n_run3.end_line = 13; + cbm_store_upsert_node(st, &n_run3); + /* Create edges: HandleRequest -> ProcessOrder, HandleRequest -> Run1 */ cbm_edge_t e1 = {.project = proj_name, .source_id = id_hr, .target_id = id_po, .type = "CALLS"}; cbm_store_insert_edge(st, &e1); @@ -1353,6 +1369,11 @@ static cbm_mcp_server_t *setup_snippet_server(char *tmp_dir, size_t tmp_sz) { cbm_store_insert_edge(st, &e2); (void)id_run1; /* run1 used for edge above */ + if (cbm_store_compute_pagerank(st, proj_name, 20, 0.85) != CBM_STORE_OK) { + cbm_mcp_server_free(srv); + return NULL; + } + return srv; } @@ -1686,6 +1707,291 @@ TEST(tool_get_impact_analysis_route_and_entry_point_typing) { PASS(); } +TEST(tool_explore_basic) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"matches\"")); + ASSERT_NOT_NULL(strstr(text, "\"dependencies\"")); + ASSERT_NOT_NULL(strstr(text, "\"hotspots\"")); + ASSERT_NOT_NULL(strstr(text, "\"entry_points\"")); + ASSERT_NOT_NULL(strstr(text, "ProcessOrder")); + ASSERT_NOT_NULL(strstr(text, "CliEntry")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_explore_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\",\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_exact_short_name_autopicks_best_non_test) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "understand", "{\"project\":\"impact\",\"symbol\":\"Duplicate\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"qualified_name\":\"impact.core.Duplicate\"")); + ASSERT_NOT_NULL(strstr(text, "\"alternatives\"")); + ASSERT_NOT_NULL(strstr(text, "impact.tests.Duplicate")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_qualified_name_resolution) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "understand", + "{\"project\":\"test-project\",\"symbol\":\"test-project.cmd.server.main.ProcessOrder\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL( + strstr(text, "\"qualified_name\":\"test-project.cmd.server.main.ProcessOrder\"")); + ASSERT_NOT_NULL(strstr(text, "\"definition\"")); + ASSERT_NOT_NULL(strstr(text, "\"source\"")); + ASSERT_NOT_NULL(strstr(text, "func ProcessOrder(id int)")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + +TEST(tool_understand_suffix_ambiguity_returns_suggestions) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "understand", "{\"project\":\"test-project\",\"symbol\":\"server.Run\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"status\":\"ambiguous\"")); + ASSERT_NOT_NULL(strstr(text, "test-project.cmd.server.Run")); + ASSERT_NOT_NULL(strstr(text, "test-project.api.server.Run")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + +TEST(tool_understand_max_tokens_truncates) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "understand", + "{\"project\":\"test-project\",\"symbol\":\"test-project.cmd.server.main.HandleRequest\"," + "\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"definition\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + +TEST(tool_prepare_change_basic) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "prepare_change", "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"review_scope\"")); + ASSERT_NOT_NULL(strstr(text, "\"risk_score\":\"high\"")); + ASSERT_NOT_NULL(strstr(text, "\"must_review\"")); + ASSERT_NOT_NULL(strstr(text, "app/services/order_service.php")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_include_tests_false) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "prepare_change", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"include_tests\":false}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"affected_tests\":[]")); + ASSERT_NOT_NULL(strstr(text, "\"summary\":\"2 direct callers, 2 route/entry points, 1 transitive impacts\"")); + ASSERT_NULL(strstr(text, "\"review_scope\":{\"must_review\":[\"app/services/order_service.php\"],\"should_review\":[\"app/ui/browser_flow.php\"],\"tests\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "prepare_change", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"review_scope\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +/* ── Error-path tests for compound tools ──────────────────────── */ + +TEST(tool_explore_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"nonexistent\",\"area\":\"foo\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_explore_no_matches) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"zzzznonexistent\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + /* Should return valid JSON with empty arrays, not an error */ + ASSERT_NOT_NULL(strstr(text, "\"matches\"")); + ASSERT_NOT_NULL(strstr(text, "\"hotspots\"")); + ASSERT_NULL(strstr(text, "isError")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"nonexistent\",\"symbol\":\"Foo\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_missing_symbol) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ZZZNoSuchSymbol\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "prepare_change", + "{\"project\":\"nonexistent\",\"symbol\":\"Foo\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_missing_symbol) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "prepare_change", + "{\"project\":\"impact\",\"symbol\":\"ZZZNoSuchSymbol\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -2426,6 +2732,21 @@ SUITE(mcp) { RUN_TEST(tool_get_impact_analysis_include_tests_false); RUN_TEST(tool_get_impact_analysis_max_tokens_truncates); RUN_TEST(tool_get_impact_analysis_route_and_entry_point_typing); + RUN_TEST(tool_explore_basic); + RUN_TEST(tool_explore_max_tokens_truncates); + RUN_TEST(tool_understand_exact_short_name_autopicks_best_non_test); + RUN_TEST(tool_understand_qualified_name_resolution); + RUN_TEST(tool_understand_suffix_ambiguity_returns_suggestions); + RUN_TEST(tool_understand_max_tokens_truncates); + RUN_TEST(tool_prepare_change_basic); + RUN_TEST(tool_prepare_change_include_tests_false); + RUN_TEST(tool_prepare_change_max_tokens_truncates); + RUN_TEST(tool_explore_missing_project); + RUN_TEST(tool_explore_no_matches); + RUN_TEST(tool_understand_missing_project); + RUN_TEST(tool_understand_missing_symbol); + RUN_TEST(tool_prepare_change_missing_project); + RUN_TEST(tool_prepare_change_missing_symbol); /* Pipeline-dependent tool handlers */ RUN_TEST(tool_index_repository_missing_path);