From ac9ce2183dfc42a05027a602372bc64a2d940b38 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 16:58:45 +0530 Subject: [PATCH 01/14] Add architecture summary MCP tool --- src/main.c | 3 +- src/mcp/mcp.c | 361 +++++++++++- src/store/store.c | 1136 +++++++++++++++++++++++++++++++++++++- src/store/store.h | 63 +++ tests/test_integration.c | 15 + tests/test_mcp.c | 144 ++++- tests/test_store_arch.c | 214 +++++++ tests/test_store_nodes.c | 30 + 8 files changed, 1960 insertions(+), 6 deletions(-) diff --git a/src/main.c b/src/main.c index 70eadcdc..f442aa0c 100644 --- a/src/main.c +++ b/src/main.c @@ -149,7 +149,8 @@ static void print_help(void) { printf("\nSupported agents (auto-detected):\n"); printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); - printf(" get_code_snippet, get_graph_schema, get_architecture, search_code,\n"); + printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); + printf(" get_architecture_summary, search_code,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 3530acc3..47329a1c 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 14 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 15 graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -34,6 +34,7 @@ #endif #include #include // int64_t +#include #include #include #include @@ -79,6 +80,111 @@ static char *yy_doc_to_str(yyjson_mut_doc *doc) { return s; } +typedef struct { + char *buf; + size_t len; + size_t cap; + size_t limit; + bool truncated; +} markdown_builder_t; + +static void markdown_builder_init(markdown_builder_t *b, size_t limit) { + b->cap = 512; + b->buf = malloc(b->cap); + b->len = 0; + b->limit = limit; + b->truncated = false; + if (b->buf) { + b->buf[0] = '\0'; + } +} + +static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { + if (!b->buf) { + return false; + } + while (b->len + need + 1 > b->cap) { + b->cap *= 2; + b->buf = safe_realloc(b->buf, b->cap); + } + return true; +} + +static bool markdown_builder_append_raw(markdown_builder_t *b, const char *text) { + if (!b || !b->buf || !text || b->truncated) { + return false; + } + size_t add = strlen(text); + if (b->len + add > b->limit) { + b->truncated = true; + return false; + } + if (!markdown_builder_reserve(b, add)) { + return false; + } + memcpy(b->buf + b->len, text, add); + b->len += add; + b->buf[b->len] = '\0'; + return true; +} + +static bool markdown_builder_appendf(markdown_builder_t *b, const char *fmt, ...) { + if (!b || !b->buf || !fmt || b->truncated) { + return false; + } + + va_list ap; + va_start(ap, fmt); + va_list ap_copy; + va_copy(ap_copy, ap); + int needed = vsnprintf(NULL, 0, fmt, ap_copy); + va_end(ap_copy); + if (needed < 0) { + va_end(ap); + return false; + } + if (b->len + (size_t)needed > b->limit) { + b->truncated = true; + va_end(ap); + return false; + } + if (!markdown_builder_reserve(b, (size_t)needed)) { + va_end(ap); + return false; + } + vsnprintf(b->buf + b->len, b->cap - b->len, fmt, ap); + va_end(ap); + b->len += (size_t)needed; + return true; +} + +static char *markdown_builder_finish(markdown_builder_t *b) { + const char *note = "\n_Truncated at max_tokens._\n"; + if (!b || !b->buf) { + return NULL; + } + if (b->truncated) { + size_t note_len = strlen(note); + if (note_len <= b->limit) { + size_t keep_len = b->len; + size_t final_len = 0; + if (keep_len + note_len > b->limit) { + keep_len = b->limit - note_len; + } + final_len = keep_len + note_len; + if (final_len > b->len && + !markdown_builder_reserve(b, final_len - b->len)) { + return b->buf; + } + b->len = keep_len; + memcpy(b->buf + b->len, note, note_len); + b->len = final_len; + b->buf[b->len] = '\0'; + } + } + return b->buf; +} + /* ══════════════════════════════════════════════════════════════════ * JSON-RPC PARSING * ══════════════════════════════════════════════════════════════════ */ @@ -281,6 +387,17 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"aspects\":{\"type\":" "\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"project\"]}"}, + {"get_architecture_summary", + "Generate a structured markdown architecture summary from the existing SQLite graph, with " + "optional focus filtering and output size control.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\",\"description\":" + "\"Indexed project name (from list_projects).\"},\"project_path\":{\"type\":\"string\"," + "\"description\":\"Deprecated alias: path to the indexed project.\"},\"max_tokens\":{" + "\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output size. Controls " + "detail level.\"},\"focus\":{\"type\":\"string\",\"description\":\"Optional domain keyword " + "to zoom into (for example payment or inventory).\"}},\"anyOf\":[{\"required\":[" + "\"project\"]},{\"required\":[\"project_path\"]}]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -1218,6 +1335,245 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { return result; } +static bool same_project_path(const char *lhs, const char *rhs) { + if (!lhs || !rhs) { + return false; + } + + char lhs_real[4096]; + char rhs_real[4096]; +#ifdef _WIN32 + if (_fullpath(lhs_real, lhs, sizeof(lhs_real)) && _fullpath(rhs_real, rhs, sizeof(rhs_real))) { + return strcmp(lhs_real, rhs_real) == 0; + } +#else + if (realpath(lhs, lhs_real) && realpath(rhs, rhs_real)) { + return strcmp(lhs_real, rhs_real) == 0; + } +#endif + return strcmp(lhs, rhs) == 0; +} + +static char *handle_get_architecture_summary(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *project_path = cbm_mcp_get_string_arg(args, "project_path"); + char *focus = cbm_mcp_get_string_arg(args, "focus"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", 2000); + char *display_path = NULL; + + if (!project && !project_path) { + free(focus); + return cbm_mcp_text_result("project is required", true); + } + if (max_tokens <= 0) { + max_tokens = 2000; + } + + if (!project) { + project = cbm_project_name_from_path(project_path); + if (!project) { + free(project_path); + free(focus); + return cbm_mcp_text_result("unable to derive project name from project_path", true); + } + } + + cbm_store_t *store = resolve_store(srv, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(project_path); + free(focus); + return _res; + } + + cbm_project_t proj_info = {0}; + if (cbm_store_get_project(store, project, &proj_info) != CBM_STORE_OK) { + cbm_project_free_fields(&proj_info); + free(project); + free(project_path); + free(focus); + return cbm_mcp_text_result("project is not indexed", true); + } + if (project_path && proj_info.root_path && proj_info.root_path[0] && + !same_project_path(project_path, proj_info.root_path)) { + cbm_project_free_fields(&proj_info); + free(project); + free(project_path); + free(focus); + return cbm_mcp_text_result("project_path does not match project", true); + } + if (proj_info.root_path && proj_info.root_path[0]) { + display_path = heap_strdup(proj_info.root_path); + } else if (project_path && project_path[0]) { + display_path = heap_strdup(project_path); + } + cbm_project_free_fields(&proj_info); + + cbm_architecture_summary_t summary = {0}; + if (cbm_store_get_architecture_summary(store, project, focus, &summary) != CBM_STORE_OK) { + free(project); + free(project_path); + free(display_path); + free(focus); + return cbm_mcp_text_result("failed to build architecture summary", true); + } + + size_t char_budget = (size_t)max_tokens * 4U; + if (char_budget < 512) { + char_budget = 512; + } + markdown_builder_t md; + markdown_builder_init(&md, char_budget); + + const char *display_name = display_path ? cbm_path_base(display_path) : project; + (void)markdown_builder_appendf(&md, "## Project: %s\n", display_name ? display_name : project); + if (focus && focus[0]) { + (void)markdown_builder_appendf(&md, "Focus: %s\n", focus); + } + (void)markdown_builder_appendf(&md, "Files: %d | Functions: %d | Classes: %d | Routes: %d\n\n", + summary.total_files, summary.total_functions, + summary.total_classes, summary.total_routes); + + (void)markdown_builder_append_raw(&md, "## Key Files (by connectivity)\n"); + if (summary.file_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching files.\n\n"); + } else { + for (int i = 0; i < summary.file_count; i++) { + if (!markdown_builder_appendf(&md, "%d. %s - %d inbound calls, %d outbound\n", i + 1, + summary.files[i].file ? summary.files[i].file : "", + summary.files[i].inbound_calls, + summary.files[i].outbound_calls)) { + break; + } + if (summary.files[i].symbol_count > 0) { + (void)markdown_builder_append_raw(&md, " Key methods: "); + for (int j = 0; j < summary.files[i].symbol_count; j++) { + if (j > 0 && !markdown_builder_append_raw(&md, ", ")) { + break; + } + if (summary.files[i].symbols[j].span_lines > 0) { + (void)markdown_builder_appendf( + &md, "%s (%d lines)", + summary.files[i].symbols[j].name ? summary.files[i].symbols[j].name : "", + summary.files[i].symbols[j].span_lines); + } else { + (void)markdown_builder_appendf( + &md, "%s", + summary.files[i].symbols[j].name ? summary.files[i].symbols[j].name : ""); + } + } + (void)markdown_builder_append_raw(&md, "\n"); + } + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Route Map\n"); + if (summary.route_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching routes.\n\n"); + } else { + for (int i = 0; i < summary.route_count; i++) { + (void)markdown_builder_appendf( + &md, "%s %s", summary.routes[i].method ? summary.routes[i].method : "", + summary.routes[i].path ? summary.routes[i].path : ""); + if (summary.routes[i].handler && summary.routes[i].handler[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].handler); + } + if (summary.routes[i].service && summary.routes[i].service[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].service); + } + if (summary.routes[i].next && summary.routes[i].next[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].next); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Module Clusters (Louvain communities)\n"); + if (summary.cluster_count == 0) { + (void)markdown_builder_append_raw(&md, "No multi-file clusters found.\n\n"); + } else { + for (int i = 0; i < summary.cluster_count; i++) { + (void)markdown_builder_appendf(&md, "Cluster %d (%d files)\n", summary.clusters[i].id, + summary.clusters[i].file_count); + if (summary.clusters[i].core_file_count > 0) { + (void)markdown_builder_append_raw(&md, "Core: "); + for (int j = 0; j < summary.clusters[i].core_file_count; j++) { + if (j > 0) { + (void)markdown_builder_append_raw(&md, ", "); + } + (void)markdown_builder_appendf( + &md, "%s", + summary.clusters[i].core_files[j] + ? cbm_path_base(summary.clusters[i].core_files[j]) + : ""); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + if (summary.clusters[i].entry_point_count > 0) { + (void)markdown_builder_append_raw(&md, "Entry: "); + for (int j = 0; j < summary.clusters[i].entry_point_count; j++) { + if (j > 0) { + (void)markdown_builder_append_raw(&md, ", "); + } + (void)markdown_builder_appendf( + &md, "%s", + summary.clusters[i].entry_points[j] + ? summary.clusters[i].entry_points[j] + : ""); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + } + + (void)markdown_builder_append_raw(&md, "## High-Connectivity Functions (in_degree >= 5)\n"); + if (summary.function_count == 0) { + (void)markdown_builder_append_raw(&md, "None above threshold.\n\n"); + } else { + for (int i = 0; i < summary.function_count; i++) { + (void)markdown_builder_appendf( + &md, "%s - called by %d functions", + summary.functions[i].name ? summary.functions[i].name : "", + summary.functions[i].in_degree); + if (summary.functions[i].file && summary.functions[i].file[0]) { + (void)markdown_builder_appendf(&md, " [%s]", summary.functions[i].file); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Entry Points\n"); + if (summary.entry_point_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching entry points.\n"); + } else { + for (int i = 0; i < summary.entry_point_count; i++) { + (void)markdown_builder_appendf(&md, "%s: %d\n", + summary.entry_points[i].kind + ? summary.entry_points[i].kind + : "Other", + summary.entry_points[i].count); + } + } + + char *markdown = markdown_builder_finish(&md); + char *result = cbm_mcp_text_result(markdown ? markdown : "", false); + + free(markdown); + cbm_store_architecture_summary_free(&summary); + free(project); + free(project_path); + free(display_path); + free(focus); + return result; +} + static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *func_name = cbm_mcp_get_string_arg(args, "function_name"); char *project = cbm_mcp_get_string_arg(args, "project"); @@ -2704,6 +3060,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture") == 0) { return handle_get_architecture(srv, args_json); } + if (strcmp(tool_name, "get_architecture_summary") == 0) { + return handle_get_architecture_summary(srv, args_json); + } /* Pipeline-dependent tools */ if (strcmp(tool_name, "index_repository") == 0) { diff --git a/src/store/store.c b/src/store/store.c index 88aa7078..ca196255 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -9,11 +9,14 @@ // for ISO timestamp #include "store/store.h" +#include "foundation/hash_table.h" #include "foundation/platform.h" #include "foundation/compat.h" #include "foundation/compat_regex.h" #include +#include +#include #include #include #include @@ -246,6 +249,27 @@ static int configure_pragmas(cbm_store_t *s, bool in_memory) { return rc; } +static int configure_query_pragmas(cbm_store_t *s) { + int rc; + rc = exec_sql(s, "PRAGMA foreign_keys = ON;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA temp_store = MEMORY;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA busy_timeout = 10000;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA query_only = ON;"); + if (rc != CBM_STORE_OK) { + return rc; + } + return exec_sql(s, "PRAGMA mmap_size = 67108864;"); /* 64 MB */ +} + /* ── REGEXP function for SQLite ──────────────────────────────────── */ static void sqlite_regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) { @@ -375,8 +399,8 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { return NULL; } - /* Open read-write but do NOT create — returns SQLITE_CANTOPEN if absent. */ - int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READWRITE, NULL); + /* Open read-only and do NOT create — query tools should never need write access. */ + int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READONLY, NULL); if (rc != SQLITE_OK) { /* sqlite3_open_v2 allocates a handle even on failure — must close it. */ sqlite3_close(s->db); @@ -395,7 +419,7 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { sqlite3_create_function(s->db, "iregexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, sqlite_iregexp, NULL, NULL); - if (configure_pragmas(s, false) != CBM_STORE_OK) { + if (configure_query_pragmas(s) != CBM_STORE_OK) { sqlite3_close(s->db); free((void *)s->db_path); free(s); @@ -4085,6 +4109,1112 @@ void cbm_store_architecture_free(cbm_architecture_info_t *out) { memset(out, 0, sizeof(*out)); } +typedef struct { + int64_t node_id; + char *path; + int inbound_calls; + int outbound_calls; +} arch_summary_file_row_t; + +typedef struct { + int community; + int *member_indices; + int member_count; + int member_cap; +} arch_summary_cluster_row_t; + +static char *summary_focus_term(const char *focus) { + if (!focus || !focus[0]) { + return NULL; + } + size_t len = strlen(focus); + char *term = malloc(len + 1); + if (!term) { + return NULL; + } + for (size_t i = 0; i < len; i++) { + term[i] = (char)tolower((unsigned char)focus[i]); + } + term[len] = '\0'; + return term; +} + +static char *summary_focus_like(const char *focus) { + char *term = summary_focus_term(focus); + if (!term) { + return NULL; + } + size_t len = strlen(term); + char *like = malloc(len + 3); + if (!like) { + free(term); + return NULL; + } + like[0] = '%'; + memcpy(like + 1, term, len); + like[len + 1] = '%'; + like[len + 2] = '\0'; + free(term); + return like; +} + +static bool summary_text_matches(const char *focus_term, const char *text) { + if (!focus_term || !focus_term[0]) { + return true; + } + if (!text || !text[0]) { + return false; + } + return cbm_strcasestr(text, focus_term) != NULL; +} + +static int summary_count_nodes(cbm_store_t *s, const char *project, const char *label_sql, + const char *focus_like) { + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT COUNT(*) FROM nodes " + "WHERE project=?1 AND %s " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(name, '')) LIKE ?2 " + "OR lower(COALESCE(qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(file_path, '')) LIKE ?2 " + "OR lower(COALESCE(properties, '')) LIKE ?2);", + label_sql); + } else { + snprintf(sql, sizeof(sql), + "SELECT COUNT(*) FROM nodes " + "WHERE project=?1 AND %s " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%';", + label_sql); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_count_nodes"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + int count = 0; + if (sqlite3_step(stmt) == SQLITE_ROW) { + count = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return count; +} + +static int summary_file_row_cmp(const void *lhs, const void *rhs) { + const arch_summary_file_row_t *a = lhs; + const arch_summary_file_row_t *b = rhs; + int64_t a_score = (int64_t)a->inbound_calls + (int64_t)a->outbound_calls; + int64_t b_score = (int64_t)b->inbound_calls + (int64_t)b->outbound_calls; + if (a_score != b_score) { + return b_score > a_score ? 1 : -1; + } + if (a->inbound_calls != b->inbound_calls) { + return b->inbound_calls > a->inbound_calls ? 1 : -1; + } + if (!a->path) { + return 1; + } + if (!b->path) { + return -1; + } + return strcmp(a->path, b->path); +} + +static void summary_copy_json_scalar(yyjson_val *val, char *out, size_t out_sz) { + if (out_sz == 0) { + return; + } + out[0] = '\0'; + if (!val) { + return; + } + + if (yyjson_is_str(val)) { + snprintf(out, out_sz, "%s", yyjson_get_str(val)); + return; + } + if (yyjson_is_bool(val)) { + snprintf(out, out_sz, "%s", yyjson_get_bool(val) ? "true" : "false"); + return; + } + if (yyjson_is_int(val) || yyjson_is_sint(val)) { + snprintf(out, out_sz, "%lld", (long long)yyjson_get_sint(val)); + return; + } + if (yyjson_is_uint(val)) { + snprintf(out, out_sz, "%llu", (unsigned long long)yyjson_get_uint(val)); + return; + } + if (yyjson_is_real(val)) { + snprintf(out, out_sz, "%.17g", yyjson_get_real(val)); + } +} + +static void summary_extract_route_fields(const char *props, char *method, size_t method_sz, + char *path, size_t path_sz, char *handler, + size_t handler_sz) { + method[0] = '\0'; + path[0] = '\0'; + handler[0] = '\0'; + if (!props) { + return; + } + + yyjson_doc *doc = yyjson_read(props, strlen(props), 0); + if (!doc) { + return; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + if (yyjson_is_obj(root)) { + summary_copy_json_scalar(yyjson_obj_get(root, "method"), method, method_sz); + summary_copy_json_scalar(yyjson_obj_get(root, "path"), path, path_sz); + summary_copy_json_scalar(yyjson_obj_get(root, "handler"), handler, handler_sz); + } + yyjson_doc_free(doc); +} + +static int summary_collect_file_rows(cbm_store_t *s, const char *project, const char *focus_like, + arch_summary_file_row_t **out_rows, int *out_count, + cbm_louvain_edge_t **out_edges, int *out_edge_count) { + *out_rows = NULL; + *out_count = 0; + *out_edges = NULL; + *out_edge_count = 0; + + const char *files_sql_no_focus = + "SELECT id, COALESCE(NULLIF(file_path, ''), name) " + "FROM nodes " + "WHERE project=?1 AND label='File' " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%test%' " + "ORDER BY 2;"; + const char *files_sql_focus = + "SELECT n.id, COALESCE(NULLIF(n.file_path, ''), n.name) " + "FROM nodes n " + "WHERE n.project=?1 AND n.label='File' " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%test%' " + "AND (lower(COALESCE(n.file_path, '')) LIKE ?2 " + "OR lower(COALESCE(n.name, '')) LIKE ?2 " + "OR lower(COALESCE(n.qualified_name, '')) LIKE ?2 " + "OR EXISTS (SELECT 1 FROM nodes m " + " WHERE m.project = n.project " + " AND m.file_path = n.file_path " + " AND lower(COALESCE(m.file_path, '')) NOT LIKE '%test%' " + " AND (lower(COALESCE(m.name, '')) LIKE ?2 " + " OR lower(COALESCE(m.qualified_name, '')) LIKE ?2 " + " OR lower(COALESCE(m.file_path, '')) LIKE ?2 " + " OR lower(COALESCE(m.properties, '')) LIKE ?2))) " + "ORDER BY 2;"; + + sqlite3_stmt *stmt = NULL; + const char *files_sql = (focus_like && focus_like[0]) ? files_sql_focus : files_sql_no_focus; + if (sqlite3_prepare_v2(s->db, files_sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_files"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + int cap = 16; + int count = 0; + arch_summary_file_row_t *rows = calloc(cap, sizeof(arch_summary_file_row_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + int old_cap = cap; + cap *= 2; + rows = safe_realloc(rows, (size_t)cap * sizeof(arch_summary_file_row_t)); + memset(rows + old_cap, 0, (size_t)(cap - old_cap) * sizeof(arch_summary_file_row_t)); + } + rows[count].node_id = sqlite3_column_int64(stmt, 0); + rows[count].path = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + count++; + } + sqlite3_finalize(stmt); + + CBMHashTable *file_rows = cbm_ht_create(count > 0 ? (uint32_t)count * 2U : 32U); + if (!file_rows) { + for (int i = 0; i < count; i++) { + free(rows[i].path); + } + free(rows); + store_set_error(s, "summary_collect_file_rows: file lookup alloc failed"); + return CBM_STORE_ERR; + } + for (int i = 0; i < count; i++) { + if (rows[i].path && rows[i].path[0]) { + cbm_ht_set(file_rows, rows[i].path, (void *)((intptr_t)i + 1)); + } + } + + const char *edges_sql = + "SELECT src.file_path, dst.file_path " + "FROM edges e " + "JOIN nodes src ON src.id = e.source_id " + "JOIN nodes dst ON dst.id = e.target_id " + "WHERE e.project=?1 AND e.type='CALLS' " + "AND src.file_path <> '' AND dst.file_path <> '' " + "AND src.file_path <> dst.file_path " + "AND lower(src.file_path) NOT LIKE '%test%' " + "AND lower(dst.file_path) NOT LIKE '%test%';"; + + stmt = NULL; + if (sqlite3_prepare_v2(s->db, edges_sql, -1, &stmt, NULL) != SQLITE_OK) { + cbm_ht_free(file_rows); + for (int i = 0; i < count; i++) { + free(rows[i].path); + } + free(rows); + store_set_error_sqlite(s, "summary_collect_edges"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int edge_cap = 64; + int edge_count = 0; + cbm_louvain_edge_t *edges = malloc((size_t)edge_cap * sizeof(cbm_louvain_edge_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *src_file = (const char *)sqlite3_column_text(stmt, 0); + const char *dst_file = (const char *)sqlite3_column_text(stmt, 1); + intptr_t src_val = (intptr_t)(src_file ? cbm_ht_get(file_rows, src_file) : NULL); + intptr_t dst_val = (intptr_t)(dst_file ? cbm_ht_get(file_rows, dst_file) : NULL); + int src_idx = src_val ? (int)(src_val - 1) : -1; + int dst_idx = dst_val ? (int)(dst_val - 1) : -1; + if (src_idx < 0 || dst_idx < 0 || src_idx == dst_idx) { + continue; + } + + rows[src_idx].outbound_calls++; + rows[dst_idx].inbound_calls++; + + if (edge_count >= edge_cap) { + edge_cap *= 2; + edges = safe_realloc(edges, (size_t)edge_cap * sizeof(cbm_louvain_edge_t)); + } + edges[edge_count].src = rows[src_idx].node_id; + edges[edge_count].dst = rows[dst_idx].node_id; + edge_count++; + } + sqlite3_finalize(stmt); + cbm_ht_free(file_rows); + + *out_rows = rows; + *out_count = count; + *out_edges = edges; + *out_edge_count = edge_count; + return CBM_STORE_OK; +} + +static int summary_fill_key_symbols(cbm_store_t *s, const char *project, cbm_arch_summary_file_t *file) { + const char *sql = + "SELECT name, " + "CASE WHEN end_line >= start_line AND start_line > 0 " + "THEN end_line - start_line + 1 ELSE 0 END AS span " + "FROM nodes " + "WHERE project=?1 AND file_path=?2 " + "AND label IN ('Function','Method') " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "ORDER BY span DESC, name " + "LIMIT 3;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_fill_key_symbols"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + bind_text(stmt, 2, file->file); + + int cap = 4; + int count = 0; + cbm_arch_summary_symbol_t *symbols = calloc(cap, sizeof(cbm_arch_summary_symbol_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + cap *= 2; + symbols = safe_realloc(symbols, (size_t)cap * sizeof(cbm_arch_summary_symbol_t)); + } + symbols[count].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + symbols[count].span_lines = sqlite3_column_int(stmt, 1); + count++; + } + sqlite3_finalize(stmt); + + file->symbols = symbols; + file->symbol_count = count; + return CBM_STORE_OK; +} + +static int summary_find_handler_node(cbm_store_t *s, const char *project, const char *handler, + int64_t *out_id, char **out_file_path) { + *out_id = 0; + *out_file_path = NULL; + if (!handler || !handler[0]) { + return CBM_STORE_NOT_FOUND; + } + + char owner[256] = ""; + char member[256] = ""; + const char *sep = strchr(handler, '@'); + int sep_len = 1; + const char *scope = strstr(handler, "::"); + if (scope && (!sep || scope < sep)) { + sep = scope; + sep_len = 2; + } + + if (sep) { + size_t owner_len = (size_t)(sep - handler); + if (owner_len >= sizeof(owner)) { + owner_len = sizeof(owner) - 1; + } + memcpy(owner, handler, owner_len); + owner[owner_len] = '\0'; + snprintf(member, sizeof(member), "%s", sep + sep_len); + } else { + snprintf(member, sizeof(member), "%s", handler); + } + + cbm_node_t *nodes = NULL; + int count = 0; + if (cbm_store_find_nodes_by_name(s, project, member, &nodes, &count) != CBM_STORE_OK || + count == 0) { + cbm_store_free_nodes(nodes, count); + return CBM_STORE_NOT_FOUND; + } + + int pick = 0; + for (int i = 0; i < count; i++) { + if (owner[0] && + ((nodes[i].qualified_name && cbm_strcasestr(nodes[i].qualified_name, owner)) || + (nodes[i].file_path && cbm_strcasestr(nodes[i].file_path, owner)))) { + pick = i; + break; + } + if (!owner[0] && nodes[i].file_path && !cbm_is_test_file_path(nodes[i].file_path)) { + pick = i; + break; + } + } + + *out_id = nodes[pick].id; + if (nodes[pick].file_path && nodes[pick].file_path[0]) { + *out_file_path = heap_strdup(nodes[pick].file_path); + } + cbm_store_free_nodes(nodes, count); + return CBM_STORE_OK; +} + +static int summary_query_primary_callee(cbm_store_t *s, int64_t source_id, const char *source_file, + char **out_name, int64_t *out_id, char **out_file_path) { + *out_name = NULL; + if (out_id) { + *out_id = 0; + } + if (out_file_path) { + *out_file_path = NULL; + } + + const char *sql = + "SELECT n.id, n.name, COALESCE(n.file_path, '') " + "FROM edges e " + "JOIN nodes n ON n.id = e.target_id " + "WHERE e.source_id=?1 AND e.type='CALLS' " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "ORDER BY CASE " + " WHEN ?2 <> '' AND COALESCE(n.file_path, '') <> '' " + " AND COALESCE(n.file_path, '') <> ?2 THEN 0 " + " ELSE 1 " + " END, " + " CASE WHEN n.label IN ('Method','Function') THEN 0 ELSE 1 END, " + " n.name " + "LIMIT 1;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_query_primary_callee"); + return CBM_STORE_ERR; + } + sqlite3_bind_int64(stmt, 1, source_id); + bind_text(stmt, 2, source_file ? source_file : ""); + + if (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 1); + const char *file_path = (const char *)sqlite3_column_text(stmt, 2); + if (out_id) { + *out_id = sqlite3_column_int64(stmt, 0); + } + *out_name = heap_strdup(name); + if (out_file_path && file_path && file_path[0]) { + *out_file_path = heap_strdup(file_path); + } + } + sqlite3_finalize(stmt); + return CBM_STORE_OK; +} + +static int summary_collect_routes(cbm_store_t *s, const char *project, const char *focus_term, + cbm_arch_summary_route_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + + const char *sql = + "SELECT name, properties, COALESCE(file_path, '') " + "FROM nodes " + "WHERE project=?1 AND label='Route' " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "ORDER BY name;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_routes"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 8; + int count = 0; + cbm_arch_summary_route_t *routes = calloc(cap, sizeof(cbm_arch_summary_route_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 0); + const char *props = (const char *)sqlite3_column_text(stmt, 1); + const char *route_file = (const char *)sqlite3_column_text(stmt, 2); + if (cbm_is_test_file_path(route_file)) { + continue; + } + + char method[64]; + char path[512]; + char handler[256]; + summary_extract_route_fields(props, method, sizeof(method), path, sizeof(path), handler, + sizeof(handler)); + + if (!path[0] && name) { + snprintf(path, sizeof(path), "%s", name); + } + + int64_t handler_id = 0; + char *handler_file = NULL; + if (handler[0]) { + (void)summary_find_handler_node(s, project, handler, &handler_id, &handler_file); + } + + char *service = NULL; + char *next = NULL; + char *service_file = NULL; + int64_t service_id = 0; + if (handler_id > 0) { + (void)summary_query_primary_callee(s, handler_id, handler_file, &service, &service_id, + &service_file); + if (service_id > 0) { + (void)summary_query_primary_callee(s, service_id, + service_file ? service_file : handler_file, &next, + NULL, NULL); + } + } + + if (focus_term && focus_term[0] && + !summary_text_matches(focus_term, method) && + !summary_text_matches(focus_term, path) && + !summary_text_matches(focus_term, handler) && + !summary_text_matches(focus_term, service) && + !summary_text_matches(focus_term, next) && + !summary_text_matches(focus_term, handler_file)) { + free(service_file); + free(handler_file); + free(service); + free(next); + continue; + } + + if (count >= cap) { + cap *= 2; + routes = safe_realloc(routes, (size_t)cap * sizeof(cbm_arch_summary_route_t)); + } + + routes[count].method = heap_strdup(method[0] ? method : ""); + routes[count].path = heap_strdup(path[0] ? path : ""); + routes[count].handler = heap_strdup(handler[0] ? handler : ""); + routes[count].service = service; + routes[count].next = next; + routes[count].handler_file = handler_file; + free(service_file); + count++; + } + sqlite3_finalize(stmt); + + *out_arr = routes; + *out_count = count; + return CBM_STORE_OK; +} + +static void summary_cluster_add_member(arch_summary_cluster_row_t *cluster, int member_index) { + if (cluster->member_count >= cluster->member_cap) { + cluster->member_cap = cluster->member_cap ? cluster->member_cap * 2 : 4; + cluster->member_indices = + safe_realloc(cluster->member_indices, (size_t)cluster->member_cap * sizeof(int)); + } + cluster->member_indices[cluster->member_count++] = member_index; +} + +static const char *summary_handler_owner_token(const char *handler, char *buf, size_t buf_size) { + if (!handler || !handler[0] || !buf || buf_size == 0) { + return NULL; + } + + const char *sep = strchr(handler, '@'); + size_t len = sep ? (size_t)(sep - handler) : strlen(handler); + const char *scope = strstr(handler, "::"); + if (scope && (!sep || scope < sep)) { + len = (size_t)(scope - handler); + } + if (len == 0) { + return NULL; + } + if (len >= buf_size) { + len = buf_size - 1; + } + memcpy(buf, handler, len); + buf[len] = '\0'; + return buf; +} + +static int summary_cluster_cmp(const void *lhs, const void *rhs) { + const arch_summary_cluster_row_t *a = lhs; + const arch_summary_cluster_row_t *b = rhs; + return b->member_count - a->member_count; +} + +static bool summary_cluster_has_file(const arch_summary_cluster_row_t *cluster, + const arch_summary_file_row_t *rows, const char *path) { + if (!cluster || !rows || !path) { + return false; + } + for (int i = 0; i < cluster->member_count; i++) { + const char *candidate = rows[cluster->member_indices[i]].path; + if (candidate && strcmp(candidate, path) == 0) { + return true; + } + } + return false; +} + +static bool summary_cluster_matches_route(const arch_summary_cluster_row_t *cluster, + const arch_summary_file_row_t *rows, + const cbm_arch_summary_route_t *route) { + char owner_buf[256]; + const char *owner = NULL; + + if (!cluster || !rows || !route) { + return false; + } + if (summary_cluster_has_file(cluster, rows, route->handler_file)) { + return true; + } + + owner = summary_handler_owner_token(route->handler, owner_buf, sizeof(owner_buf)); + if (!owner || !owner[0]) { + return false; + } + + for (int i = 0; i < cluster->member_count; i++) { + const char *candidate = rows[cluster->member_indices[i]].path; + if (candidate && cbm_strcasestr(candidate, owner)) { + return true; + } + } + return false; +} + +static int summary_build_clusters(const arch_summary_file_row_t *rows, int row_count, + const cbm_louvain_edge_t *edges, int edge_count, + const cbm_arch_summary_route_t *routes, int route_count, + cbm_arch_summary_cluster_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + if (row_count == 0 || edge_count == 0) { + return CBM_STORE_OK; + } + + int64_t *node_ids = malloc((size_t)row_count * sizeof(int64_t)); + for (int i = 0; i < row_count; i++) { + node_ids[i] = rows[i].node_id; + } + + cbm_louvain_result_t *results = NULL; + int result_count = 0; + int rc = cbm_louvain(node_ids, row_count, edges, edge_count, &results, &result_count); + free(node_ids); + if (rc != CBM_STORE_OK) { + return rc; + } + + CBMHashTable *row_by_id = cbm_ht_create(row_count > 0 ? (uint32_t)row_count * 2U : 32U); + char(*row_id_keys)[32] = NULL; + if (!row_by_id) { + free(results); + return CBM_STORE_ERR; + } + row_id_keys = calloc((size_t)row_count, sizeof(*row_id_keys)); + if (!row_id_keys) { + cbm_ht_free(row_by_id); + free(results); + return CBM_STORE_ERR; + } + for (int i = 0; i < row_count; i++) { + snprintf(row_id_keys[i], sizeof(row_id_keys[i]), "%lld", (long long)rows[i].node_id); + cbm_ht_set(row_by_id, row_id_keys[i], (void *)((intptr_t)i + 1)); + } + + int cap = 8; + int count = 0; + arch_summary_cluster_row_t *clusters = calloc(cap, sizeof(arch_summary_cluster_row_t)); + for (int i = 0; i < result_count; i++) { + char result_key[32]; + snprintf(result_key, sizeof(result_key), "%lld", (long long)results[i].node_id); + intptr_t row_val = (intptr_t)cbm_ht_get(row_by_id, result_key); + int member_index = row_val ? (int)(row_val - 1) : -1; + if (member_index < 0) { + continue; + } + + int slot = -1; + for (int j = 0; j < count; j++) { + if (clusters[j].community == results[i].community) { + slot = j; + break; + } + } + if (slot < 0) { + if (count >= cap) { + int old_cap = cap; + cap *= 2; + clusters = safe_realloc(clusters, (size_t)cap * sizeof(arch_summary_cluster_row_t)); + memset(clusters + old_cap, 0, + (size_t)(cap - old_cap) * sizeof(arch_summary_cluster_row_t)); + } + slot = count++; + clusters[slot].community = results[i].community; + } + summary_cluster_add_member(&clusters[slot], member_index); + } + free(row_id_keys); + cbm_ht_free(row_by_id); + free(results); + + int write_idx = 0; + for (int i = 0; i < count; i++) { + if (clusters[i].member_count < 2) { + free(clusters[i].member_indices); + continue; + } + if (write_idx != i) { + clusters[write_idx] = clusters[i]; + } + write_idx++; + } + count = write_idx; + if (count == 0) { + free(clusters); + return CBM_STORE_OK; + } + + qsort(clusters, (size_t)count, sizeof(arch_summary_cluster_row_t), summary_cluster_cmp); + if (count > 6) { + for (int i = 6; i < count; i++) { + free(clusters[i].member_indices); + } + count = 6; + } + + cbm_arch_summary_cluster_t *out = calloc((size_t)count, sizeof(cbm_arch_summary_cluster_t)); + for (int i = 0; i < count; i++) { + out[i].id = i + 1; + out[i].file_count = clusters[i].member_count; + + int core_count = clusters[i].member_count < 3 ? clusters[i].member_count : 3; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + out[i].core_files = calloc((size_t)core_count, sizeof(char *)); + out[i].core_file_count = core_count; + for (int core = 0; core < core_count; core++) { + int best_idx = -1; + int best_score = -1; + for (int m = 0; m < clusters[i].member_count; m++) { + int row_idx = clusters[i].member_indices[m]; + int score = rows[row_idx].inbound_calls + rows[row_idx].outbound_calls; + bool already_used = false; + for (int prev = 0; prev < core; prev++) { + const char *used = out[i].core_files[prev]; + if (used && rows[row_idx].path && strcmp(used, rows[row_idx].path) == 0) { + already_used = true; + break; + } + } + if (!already_used && score > best_score) { + best_score = score; + best_idx = row_idx; + } + } + if (best_idx >= 0) { + ((char **)out[i].core_files)[core] = heap_strdup(rows[best_idx].path); + } + } + + int entry_cap = 4; + int entry_count = 0; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **entries = calloc((size_t)entry_cap, sizeof(char *)); + for (int r = 0; r < route_count; r++) { + if (!summary_cluster_matches_route(&clusters[i], rows, &routes[r])) { + continue; + } + char label[768]; + if (routes[r].method && routes[r].method[0]) { + snprintf(label, sizeof(label), "%s %s", routes[r].method, + routes[r].path ? routes[r].path : ""); + } else { + snprintf(label, sizeof(label), "%s", routes[r].path ? routes[r].path : ""); + } + bool duplicate = false; + for (int e = 0; e < entry_count; e++) { + if (strcmp(entries[e], label) == 0) { + duplicate = true; + break; + } + } + if (duplicate) { + continue; + } + if (entry_count >= entry_cap) { + entry_cap *= 2; + entries = safe_realloc(entries, (size_t)entry_cap * sizeof(char *)); + } + entries[entry_count++] = heap_strdup(label); + } + out[i].entry_points = (const char **)entries; + out[i].entry_point_count = entry_count; + free(clusters[i].member_indices); + } + free(clusters); + + *out_arr = out; + *out_count = count; + return CBM_STORE_OK; +} + +static int summary_collect_hot_functions(cbm_store_t *s, const char *project, const char *focus_like, + cbm_arch_summary_function_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT n.name, COALESCE(n.file_path, ''), COUNT(*) AS fan_in " + "FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type='CALLS' " + "WHERE n.project=?1 " + "AND n.label IN ('Function','Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(n.name, '')) LIKE ?2 " + "OR lower(COALESCE(n.qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(n.file_path, '')) LIKE ?2 " + "OR lower(COALESCE(n.properties, '')) LIKE ?2) " + "GROUP BY n.id " + "HAVING fan_in >= 5 " + "ORDER BY fan_in DESC, n.name " + "LIMIT 15;"); + } else { + snprintf(sql, sizeof(sql), + "SELECT n.name, COALESCE(n.file_path, ''), COUNT(*) AS fan_in " + "FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type='CALLS' " + "WHERE n.project=?1 " + "AND n.label IN ('Function','Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%%test%%' " + "GROUP BY n.id " + "HAVING fan_in >= 5 " + "ORDER BY fan_in DESC, n.name " + "LIMIT 15;"); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_hot_functions"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + int cap = 8; + int count = 0; + cbm_arch_summary_function_t *arr = calloc(cap, sizeof(cbm_arch_summary_function_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + cap *= 2; + arr = safe_realloc(arr, (size_t)cap * sizeof(cbm_arch_summary_function_t)); + } + arr[count].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[count].file = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + arr[count].in_degree = sqlite3_column_int(stmt, 2); + count++; + } + sqlite3_finalize(stmt); + + *out_arr = arr; + *out_count = count; + return CBM_STORE_OK; +} + +static const char *summary_entry_group_name(const char *label) { + if (!label || !label[0]) { + return "Other"; + } + if (strcmp(label, "Route") == 0) { + return "Routes"; + } + if (cbm_strcasestr(label, "command")) { + return "Commands"; + } + if (cbm_strcasestr(label, "job")) { + return "Jobs"; + } + if (cbm_strcasestr(label, "cron") || cbm_strcasestr(label, "schedule")) { + return "Crons"; + } + if (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0) { + return "Functions"; + } + return "Other"; +} + +static int summary_add_entry_group(cbm_arch_summary_entry_group_t **groups, int *count, int *cap, + const char *kind, int add_count) { + if (!kind || add_count <= 0) { + return CBM_STORE_OK; + } + for (int i = 0; i < *count; i++) { + if (strcmp((*groups)[i].kind, kind) == 0) { + (*groups)[i].count += add_count; + return CBM_STORE_OK; + } + } + if (*count >= *cap) { + *cap = *cap ? *cap * 2 : 4; + *groups = safe_realloc(*groups, (size_t)*cap * sizeof(cbm_arch_summary_entry_group_t)); + } + (*groups)[*count].kind = heap_strdup(kind); + (*groups)[*count].count = add_count; + (*count)++; + return CBM_STORE_OK; +} + +static int summary_collect_entry_points(cbm_store_t *s, const char *project, const char *focus_like, + int route_count, cbm_arch_summary_entry_group_t **out_arr, + int *out_count) { + *out_arr = NULL; + *out_count = 0; + + int cap = 4; + int count = 0; + cbm_arch_summary_entry_group_t *groups = + calloc((size_t)cap, sizeof(cbm_arch_summary_entry_group_t)); + if (route_count > 0) { + (void)summary_add_entry_group(&groups, &count, &cap, "Routes", route_count); + } + + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT label, COUNT(*) " + "FROM nodes " + "WHERE project=?1 " + "AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(name, '')) LIKE ?2 " + "OR lower(COALESCE(qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(file_path, '')) LIKE ?2 " + "OR lower(COALESCE(properties, '')) LIKE ?2) " + "GROUP BY label;"); + } else { + snprintf(sql, sizeof(sql), + "SELECT label, COUNT(*) " + "FROM nodes " + "WHERE project=?1 " + "AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "GROUP BY label;"); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + free(groups); + store_set_error_sqlite(s, "summary_collect_entry_points"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *label = (const char *)sqlite3_column_text(stmt, 0); + int label_count = sqlite3_column_int(stmt, 1); + const char *kind = summary_entry_group_name(label); + if (strcmp(kind, "Routes") == 0) { + continue; + } + (void)summary_add_entry_group(&groups, &count, &cap, kind, label_count); + } + sqlite3_finalize(stmt); + + *out_arr = groups; + *out_count = count; + return CBM_STORE_OK; +} + +int cbm_store_get_architecture_summary(cbm_store_t *s, const char *project, const char *focus, + cbm_architecture_summary_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !project) { + return CBM_STORE_ERR; + } + + char *focus_like = summary_focus_like(focus); + char *focus_term = summary_focus_term(focus); + arch_summary_file_row_t *rows = NULL; + cbm_louvain_edge_t *edges = NULL; + int row_count = 0; + int edge_count = 0; + int rc = summary_collect_file_rows(s, project, focus_like, &rows, &row_count, &edges, &edge_count); + if (rc != CBM_STORE_OK) { + free(focus_like); + free(focus_term); + return rc; + } + out->total_files = summary_count_nodes(s, project, "label='File'", NULL); + out->total_functions = summary_count_nodes(s, project, "label IN ('Function','Method')", NULL); + out->total_classes = summary_count_nodes(s, project, "label IN ('Class','Interface')", NULL); + out->total_routes = summary_count_nodes(s, project, "label='Route'", NULL); + + qsort(rows, (size_t)row_count, sizeof(arch_summary_file_row_t), summary_file_row_cmp); + int top_file_count = row_count < 15 ? row_count : 15; + if (top_file_count > 0) { + out->files = calloc((size_t)top_file_count, sizeof(cbm_arch_summary_file_t)); + out->file_count = top_file_count; + for (int i = 0; i < top_file_count; i++) { + out->files[i].file = heap_strdup(rows[i].path); + out->files[i].inbound_calls = rows[i].inbound_calls; + out->files[i].outbound_calls = rows[i].outbound_calls; + rc = summary_fill_key_symbols(s, project, &out->files[i]); + if (rc != CBM_STORE_OK) { + break; + } + } + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_routes(s, project, focus_term, &out->routes, &out->route_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_build_clusters(rows, row_count, edges, edge_count, out->routes, out->route_count, + &out->clusters, &out->cluster_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_hot_functions(s, project, focus_like, &out->functions, + &out->function_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_entry_points(s, project, focus_like, out->total_routes, + &out->entry_points, &out->entry_point_count); + } + + for (int i = 0; i < row_count; i++) { + free(rows[i].path); + } + free(rows); + free(edges); + free(focus_like); + free(focus_term); + + if (rc != CBM_STORE_OK) { + cbm_store_architecture_summary_free(out); + } + return rc; +} + +void cbm_store_architecture_summary_free(cbm_architecture_summary_t *out) { + if (!out) { + return; + } + for (int i = 0; i < out->file_count; i++) { + free((void *)out->files[i].file); + for (int j = 0; j < out->files[i].symbol_count; j++) { + free((void *)out->files[i].symbols[j].name); + } + free(out->files[i].symbols); + } + free(out->files); + + for (int i = 0; i < out->route_count; i++) { + free((void *)out->routes[i].method); + free((void *)out->routes[i].path); + free((void *)out->routes[i].handler); + free((void *)out->routes[i].service); + free((void *)out->routes[i].next); + free((void *)out->routes[i].handler_file); + } + free(out->routes); + + for (int i = 0; i < out->cluster_count; i++) { + for (int j = 0; j < out->clusters[i].core_file_count; j++) { + free((void *)out->clusters[i].core_files[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].core_files); + for (int j = 0; j < out->clusters[i].entry_point_count; j++) { + free((void *)out->clusters[i].entry_points[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].entry_points); + } + free(out->clusters); + + for (int i = 0; i < out->function_count; i++) { + free((void *)out->functions[i].name); + free((void *)out->functions[i].file); + } + free(out->functions); + + for (int i = 0; i < out->entry_point_count; i++) { + free((void *)out->entry_points[i].kind); + } + free(out->entry_points); + memset(out, 0, sizeof(*out)); +} + /* ── ADR (Architecture Decision Record) ────────────────────────── */ static const char *canonical_sections[] = {"PURPOSE", "STACK", "ARCHITECTURE", diff --git a/src/store/store.h b/src/store/store.h index 17b0df11..d02fec77 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -514,6 +514,69 @@ int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char * int aspect_count, cbm_architecture_info_t *out); void cbm_store_architecture_free(cbm_architecture_info_t *out); +typedef struct { + const char *name; + int span_lines; +} cbm_arch_summary_symbol_t; + +typedef struct { + const char *file; + int inbound_calls; + int outbound_calls; + cbm_arch_summary_symbol_t *symbols; + int symbol_count; +} cbm_arch_summary_file_t; + +typedef struct { + const char *method; + const char *path; + const char *handler; + const char *service; + const char *next; + const char *handler_file; +} cbm_arch_summary_route_t; + +typedef struct { + int id; + int file_count; + const char **core_files; + int core_file_count; + const char **entry_points; + int entry_point_count; +} cbm_arch_summary_cluster_t; + +typedef struct { + const char *name; + const char *file; + int in_degree; +} cbm_arch_summary_function_t; + +typedef struct { + const char *kind; + int count; +} cbm_arch_summary_entry_group_t; + +typedef struct { + cbm_arch_summary_file_t *files; + cbm_arch_summary_route_t *routes; + cbm_arch_summary_cluster_t *clusters; + cbm_arch_summary_function_t *functions; + cbm_arch_summary_entry_group_t *entry_points; + int total_files; + int total_functions; + int total_classes; + int total_routes; + int file_count; + int route_count; + int cluster_count; + int function_count; + int entry_point_count; +} cbm_architecture_summary_t; + +int cbm_store_get_architecture_summary(cbm_store_t *s, const char *project, const char *focus, + cbm_architecture_summary_t *out); +void cbm_store_architecture_summary_free(cbm_architecture_summary_t *out); + /* ── ADR (Architecture Decision Record) ────────────────────────── */ #define CBM_ADR_MAX_LENGTH 8000 diff --git a/tests/test_integration.c b/tests/test_integration.c index 046cb856..318bce3e 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -367,6 +367,20 @@ TEST(integ_mcp_get_architecture) { PASS(); } +TEST(integ_mcp_get_architecture_summary) { + char args[512]; + snprintf(args, sizeof(args), + "{\"project\":\"%s\",\"max_tokens\":1200,\"focus\":\"main\"}", g_project); + + char *resp = call_tool("get_architecture_summary", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + ASSERT_NOT_NULL(strstr(resp, "## Key Files")); + ASSERT_NOT_NULL(strstr(resp, "main.py")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -554,6 +568,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_query_graph_calls); RUN_TEST(integ_mcp_get_graph_schema); RUN_TEST(integ_mcp_get_architecture); + RUN_TEST(integ_mcp_get_architecture_summary); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 77ec9c99..dd3c0bb2 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -6,10 +6,12 @@ #include "../src/foundation/compat.h" #include "test_framework.h" #include +#include #include #include #include #include +#include /* ══════════════════════════════════════════════════════════════════ * JSON-RPC PARSING @@ -129,7 +131,7 @@ TEST(mcp_initialize_response) { TEST(mcp_tools_list) { char *json = cbm_mcp_tools_list(); ASSERT_NOT_NULL(json); - /* Should contain all 14 tools */ + /* Should contain all 15 tools */ ASSERT_NOT_NULL(strstr(json, "index_repository")); ASSERT_NOT_NULL(strstr(json, "search_graph")); ASSERT_NOT_NULL(strstr(json, "query_graph")); @@ -137,6 +139,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_code_snippet")); ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); + ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); ASSERT_NOT_NULL(strstr(json, "delete_project")); @@ -490,6 +493,142 @@ TEST(tool_get_architecture_empty) { PASS(); } +TEST(tool_get_architecture_summary_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + + char *resp = + cbm_mcp_server_handle(srv, "{\"jsonrpc\":\"2.0\",\"id\":25,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{}}}"); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "project is required")); + free(resp); + + cbm_mcp_server_free(srv); + PASS(); +} + +static cbm_mcp_server_t *setup_arch_summary_server(char *tmp_dir, size_t tmp_sz) { + snprintf(tmp_dir, tmp_sz, "/tmp/cbm_mcp_arch_XXXXXX"); + if (!cbm_mkdtemp(tmp_dir)) { + return NULL; + } + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + rmdir(tmp_dir); + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + rmdir(tmp_dir); + return NULL; + } + + char *proj_name = cbm_project_name_from_path(tmp_dir); + if (!proj_name) { + cbm_mcp_server_free(srv); + rmdir(tmp_dir); + return NULL; + } + + cbm_mcp_server_set_project(srv, proj_name); + cbm_store_upsert_project(st, proj_name, tmp_dir); + + int64_t prev_fn_id = 0; + for (int i = 0; i < 24; i++) { + char file_name[64]; + char file_qn[128]; + char fn_name[32]; + char fn_qn[160]; + + snprintf(file_name, sizeof(file_name), "pkg/file%02d.go", i); + snprintf(file_qn, sizeof(file_qn), "%s.pkg.file%02d", proj_name, i); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", i); + snprintf(fn_qn, sizeof(fn_qn), "%s.pkg.file%02d.%s", proj_name, i, fn_name); + + cbm_node_t file = {.project = proj_name, + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(st, &file); + + cbm_node_t fn = {.project = proj_name, + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name, + .start_line = 1, + .end_line = 40 + i}; + int64_t fn_id = cbm_store_upsert_node(st, &fn); + if (prev_fn_id > 0) { + cbm_edge_t edge = { + .project = proj_name, .source_id = prev_fn_id, .target_id = fn_id, .type = "CALLS"}; + cbm_store_insert_edge(st, &edge); + } + prev_fn_id = fn_id; + } + + free(proj_name); + return srv; +} + +static void cleanup_arch_summary_server(char *tmp_dir, cbm_mcp_server_t *srv) { + cbm_mcp_server_free(srv); + if (tmp_dir && tmp_dir[0]) { + rmdir(tmp_dir); + } +} + +TEST(tool_get_architecture_summary_truncated) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + char *proj_name = cbm_project_name_from_path(tmp_dir); + ASSERT_NOT_NULL(proj_name); + + char req[1024]; + snprintf(req, sizeof(req), + "{\"jsonrpc\":\"2.0\",\"id\":26,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{\"project\":\"%s\",\"max_tokens\":1}}}", + proj_name); + + char *resp = cbm_mcp_server_handle(srv, req); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + ASSERT_NOT_NULL(strstr(resp, "_Truncated at max_tokens._")); + free(resp); + free(proj_name); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + +TEST(tool_get_architecture_summary_project_path_alias) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + + char req[1024]; + snprintf(req, sizeof(req), + "{\"jsonrpc\":\"2.0\",\"id\":27,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{\"project_path\":\"%s\",\"max_tokens\":64}}}", + tmp_dir); + + char *resp = cbm_mcp_server_handle(srv, req); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + free(resp); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + TEST(tool_query_graph_missing_query) { cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); @@ -1703,6 +1842,9 @@ SUITE(mcp) { RUN_TEST(tool_trace_missing_function_name); RUN_TEST(tool_delete_project_not_found); RUN_TEST(tool_get_architecture_empty); + RUN_TEST(tool_get_architecture_summary_missing_project); + RUN_TEST(tool_get_architecture_summary_truncated); + RUN_TEST(tool_get_architecture_summary_project_path_alias); RUN_TEST(tool_query_graph_missing_query); /* Pipeline-dependent tool handlers */ diff --git a/tests/test_store_arch.c b/tests/test_store_arch.c index 32663f3a..1427042a 100644 --- a/tests/test_store_arch.c +++ b/tests/test_store_arch.c @@ -379,6 +379,215 @@ TEST(arch_clusters) { PASS(); } +TEST(arch_summary_basic) { + cbm_store_t *s = setup_arch_test_store(); + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 5); + ASSERT_TRUE(summary.total_functions >= 5); + ASSERT_EQ(summary.total_routes, 1); + ASSERT_TRUE(summary.file_count > 0); + ASSERT_NOT_NULL(summary.files[0].file); + ASSERT_EQ(summary.route_count, 1); + ASSERT_STR_EQ(summary.routes[0].handler, "HandleRequest"); + ASSERT_STR_EQ(summary.routes[0].service, "ProcessOrder"); + ASSERT_STR_EQ(summary.routes[0].next, "ValidateOrder"); + ASSERT_TRUE(summary.entry_point_count > 0); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_focus) { + cbm_store_t *s = setup_arch_test_store(); + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", "service", &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 5); + ASSERT_TRUE(summary.total_functions >= 5); + ASSERT_TRUE(summary.file_count >= 1); + ASSERT_NOT_NULL(strstr(summary.files[0].file, "service.go")); + ASSERT_EQ(summary.total_routes, 1); + ASSERT_EQ(summary.route_count, 0); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_many_files) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + int64_t prev_fn_id = 0; + for (int i = 0; i < 20; i++) { + char file_name[64]; + char file_qn[96]; + char fn_name[32]; + char fn_qn[128]; + snprintf(file_name, sizeof(file_name), "pkg/file%02d.go", i); + snprintf(file_qn, sizeof(file_qn), "test.pkg.file%02d", i); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", i); + snprintf(fn_qn, sizeof(fn_qn), "test.pkg.file%02d.%s", i, fn_name); + + cbm_node_t file = {.project = "test", + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(s, &file); + + cbm_node_t fn = {.project = "test", + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name}; + int64_t fn_id = cbm_store_upsert_node(s, &fn); + if (prev_fn_id > 0) { + cbm_edge_t e = { + .project = "test", .source_id = prev_fn_id, .target_id = fn_id, .type = "CALLS"}; + cbm_store_insert_edge(s, &e); + } + prev_fn_id = fn_id; + } + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 20); + ASSERT_TRUE(summary.file_count > 0); + ASSERT_TRUE(summary.total_functions >= 20); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_cluster_growth) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + for (int pair = 0; pair < 9; pair++) { + int64_t pair_ids[2] = {0}; + for (int idx = 0; idx < 2; idx++) { + int file_no = pair * 2 + idx; + char file_name[64]; + char file_qn[96]; + char fn_name[32]; + char fn_qn[128]; + snprintf(file_name, sizeof(file_name), "cluster/file%02d.go", file_no); + snprintf(file_qn, sizeof(file_qn), "test.cluster.file%02d", file_no); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", file_no); + snprintf(fn_qn, sizeof(fn_qn), "test.cluster.file%02d.%s", file_no, fn_name); + + cbm_node_t file = {.project = "test", + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(s, &file); + + cbm_node_t fn = {.project = "test", + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name}; + pair_ids[idx] = cbm_store_upsert_node(s, &fn); + } + + cbm_edge_t edge = { + .project = "test", .source_id = pair_ids[0], .target_id = pair_ids[1], .type = "CALLS"}; + cbm_store_insert_edge(s, &edge); + } + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 18); + ASSERT_TRUE(summary.cluster_count > 0); + ASSERT_TRUE(summary.clusters[0].file_count >= 2); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_cluster_entry_fallback) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + cbm_node_t controller_file = {.project = "test", + .label = "File", + .name = "app/Http/Controllers/OrderController.php", + .qualified_name = "test.app.controllers.OrderController", + .file_path = "app/Http/Controllers/OrderController.php"}; + cbm_node_t service_file = {.project = "test", + .label = "File", + .name = "app/Services/OrderService.php", + .qualified_name = "test.app.services.OrderService", + .file_path = "app/Services/OrderService.php"}; + cbm_store_upsert_node(s, &controller_file); + cbm_store_upsert_node(s, &service_file); + + cbm_node_t controller_fn = {.project = "test", + .label = "Method", + .name = "handle", + .qualified_name = "test.app.controllers.OrderController.handle", + .file_path = "app/Http/Controllers/OrderController.php"}; + cbm_node_t service_fn = {.project = "test", + .label = "Method", + .name = "processOrder", + .qualified_name = "test.app.services.OrderService.processOrder", + .file_path = "app/Services/OrderService.php"}; + int64_t controller_id = cbm_store_upsert_node(s, &controller_fn); + int64_t service_id = cbm_store_upsert_node(s, &service_fn); + + cbm_edge_t edge = { + .project = "test", .source_id = controller_id, .target_id = service_id, .type = "CALLS"}; + cbm_store_insert_edge(s, &edge); + + cbm_node_t route = {.project = "test", + .label = "Route", + .name = "/orders", + .qualified_name = "test.routes.orders", + .properties_json = + "{\"method\":\"POST\",\"path\":\"/orders\",\"handler\":" + "\"OrderController@store\"}"}; + cbm_store_upsert_node(s, &route); + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.route_count, 1); + ASSERT_TRUE(summary.routes[0].handler_file == NULL); + ASSERT_TRUE(summary.cluster_count > 0); + + bool found_entry = false; + for (int i = 0; i < summary.cluster_count; i++) { + for (int j = 0; j < summary.clusters[i].entry_point_count; j++) { + if (strcmp(summary.clusters[i].entry_points[j], "POST /orders") == 0) { + found_entry = true; + break; + } + } + if (found_entry) { + break; + } + } + ASSERT_TRUE(found_entry); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + /* ── ADR tests ──────────────────────────────────────────────────── */ TEST(adr_store_and_retrieve) { @@ -978,6 +1187,11 @@ SUITE(store_arch) { RUN_TEST(arch_layers); RUN_TEST(arch_file_tree); RUN_TEST(arch_clusters); + RUN_TEST(arch_summary_basic); + RUN_TEST(arch_summary_focus); + RUN_TEST(arch_summary_many_files); + RUN_TEST(arch_summary_cluster_growth); + RUN_TEST(arch_summary_cluster_entry_fallback); /* ADR */ RUN_TEST(adr_store_and_retrieve); diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index b433ff2a..6cfc93f3 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include /* ── Schema / Open / Close ──────────────────────────────────────── */ @@ -111,6 +113,33 @@ TEST(store_project_delete) { PASS(); } +TEST(store_open_path_query_readonly_db) { + char path[] = "/tmp/cbm_store_query_XXXXXX"; + int fd = mkstemp(path); + ASSERT_TRUE(fd >= 0); + close(fd); + + cbm_store_t *writer = cbm_store_open_path(path); + ASSERT_NOT_NULL(writer); + ASSERT_EQ(cbm_store_upsert_project(writer, "readonly-proj", "/tmp/readonly-proj"), CBM_STORE_OK); + cbm_store_close(writer); + + ASSERT_EQ(chmod(path, 0444), 0); + + cbm_store_t *reader = cbm_store_open_path_query(path); + ASSERT_NOT_NULL(reader); + + cbm_project_t proj = {0}; + ASSERT_EQ(cbm_store_get_project(reader, "readonly-proj", &proj), CBM_STORE_OK); + ASSERT_STR_EQ(proj.root_path, "/tmp/readonly-proj"); + cbm_project_free_fields(&proj); + cbm_store_close(reader); + + chmod(path, 0644); + unlink(path); + PASS(); +} + /* ── Node CRUD ──────────────────────────────────────────────────── */ TEST(store_node_crud) { @@ -1511,6 +1540,7 @@ SUITE(store_nodes) { RUN_TEST(store_project_crud); RUN_TEST(store_project_update); RUN_TEST(store_project_delete); + RUN_TEST(store_open_path_query_readonly_db); RUN_TEST(store_node_crud); RUN_TEST(store_node_dedup); RUN_TEST(store_node_find_by_label); From b6f16cf472ba80a2539f167ac8cb4957e25619ce Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:27:28 +0530 Subject: [PATCH 02/14] Add PageRank ranking to graph tools --- src/main.c | 1 + src/mcp/mcp.c | 127 ++++++++- src/mcp/mcp.h | 2 +- src/pipeline/pipeline.c | 9 + src/pipeline/pipeline_incremental.c | 63 +++-- src/store/store.c | 419 +++++++++++++++++++++++++++- src/store/store.h | 23 +- tests/test_integration.c | 31 +- tests/test_mcp.c | 113 +++++++- tests/test_pipeline.c | 40 +++ tests/test_store_search.c | 77 +++++ 11 files changed, 865 insertions(+), 40 deletions(-) diff --git a/src/main.c b/src/main.c index f442aa0c..46c13e6f 100644 --- a/src/main.c +++ b/src/main.c @@ -151,6 +151,7 @@ static void print_help(void) { printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); printf(" get_architecture_summary, search_code,\n"); + printf(" get_key_symbols,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 47329a1c..8b313dbb 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 15 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 16 graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -348,7 +348,9 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"integer\"},\"max_degree\":{\"type\":\"integer\"},\"exclude_entry_points\":{" "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" "\"integer\",\"description\":\"Max results. Default: " - "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0}},\"required\":[\"project\"]}"}, + "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0},\"ranked\":{\"type\":\"boolean\"," + "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}}," + "\"required\":[\"project\"]}"}, {"query_graph", "Execute a Cypher query against the knowledge graph for complex multi-hop patterns, " @@ -365,8 +367,9 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"object\",\"properties\":{\"function_name\":{\"type\":\"string\"},\"project\":{" "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" - "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"function_" - "name\",\"project\"]}"}, + "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"ranked\":{\"type\":\"boolean\"," + "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}}," + "\"required\":[\"function_name\",\"project\"]}"}, {"get_code_snippet", "Read source code for a function/class/symbol. IMPORTANT: First call search_graph to find the " @@ -398,6 +401,14 @@ static const tool_def_t TOOLS[] = { "to zoom into (for example payment or inventory).\"}},\"anyOf\":[{\"required\":[" "\"project\"]},{\"required\":[\"project_path\"]}]}"}, + {"get_key_symbols", + "Human-readable ranked symbol list: top functions/classes by PageRank importance. Use this " + "for fast first-session orientation and central entry-point discovery.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"limit\":{\"type\":" + "\"integer\",\"default\":20},\"focus\":{\"type\":\"string\",\"description\":\"Optional " + "keyword to narrow symbols by name, qualified name, or file path.\"}},\"required\":[" + "\"project\"]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -611,6 +622,21 @@ bool cbm_mcp_get_bool_arg(const char *args_json, const char *key) { return result; } +static bool cbm_mcp_get_bool_arg_default(const char *args_json, const char *key, bool default_val) { + yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); + if (!doc) { + return default_val; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *val = yyjson_obj_get(root, key); + bool result = default_val; + if (val && yyjson_is_bool(val)) { + result = yyjson_get_bool(val); + } + yyjson_doc_free(doc); + return result; +} + /* ══════════════════════════════════════════════════════════════════ * MCP SERVER * ══════════════════════════════════════════════════════════════════ */ @@ -1061,6 +1087,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { int offset = cbm_mcp_get_int_arg(args, "offset", 0); int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); + bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); cbm_search_params_t params = { .project = project, @@ -1071,6 +1098,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { .offset = offset, .min_degree = min_degree, .max_degree = max_degree, + .sort_by = ranked ? "relevance" : "name", }; cbm_search_output_t out = {0}; @@ -1094,6 +1122,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { sr->node.file_path ? sr->node.file_path : ""); yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); yyjson_mut_arr_add_val(results, item); } yyjson_mut_obj_add_val(doc, root, "results", results); @@ -1335,6 +1364,82 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { return result; } +static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *focus = cbm_mcp_get_string_arg(args, "focus"); + int limit = cbm_mcp_get_int_arg(args, "limit", 20); + cbm_store_t *store = resolve_store(srv, project); + REQUIRE_STORE(store, project); + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(focus); + return not_indexed; + } + + cbm_key_symbol_t *symbols = NULL; + int count = 0; + if (cbm_store_get_key_symbols(store, project, focus, limit, &symbols, &count) != + CBM_STORE_OK) { + free(project); + free(focus); + return cbm_mcp_text_result("failed to load key symbols", true); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "project", project ? project : ""); + yyjson_mut_obj_add_int(doc, root, "count", count); + yyjson_mut_val *results = yyjson_mut_arr(doc); + for (int i = 0; i < count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", symbols[i].name ? symbols[i].name : ""); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + symbols[i].qualified_name ? symbols[i].qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", symbols[i].label ? symbols[i].label : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", + symbols[i].file_path ? symbols[i].file_path : ""); + yyjson_mut_obj_add_int(doc, item, "in_degree", symbols[i].in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", symbols[i].out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", symbols[i].pagerank); + yyjson_mut_arr_add_val(results, item); + } + yyjson_mut_obj_add_val(doc, root, "results", results); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + cbm_store_key_symbols_free(symbols, count); + free(project); + free(focus); + + { + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; + } +} + +static int node_hop_rank_cmp(const void *lhs, const void *rhs) { + const cbm_node_hop_t *a = lhs; + const cbm_node_hop_t *b = rhs; + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + if (a->hop != b->hop) { + return a->hop - b->hop; + } + if (!a->node.name || !b->node.name) { + return 0; + } + return strcmp(a->node.name, b->node.name); +} + static bool same_project_path(const char *lhs, const char *rhs) { if (!lhs || !rhs) { return false; @@ -1580,6 +1685,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_store_t *store = resolve_store(srv, project); char *direction = cbm_mcp_get_string_arg(args, "direction"); int depth = cbm_mcp_get_int_arg(args, "depth", 3); + bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); if (!func_name) { free(project); @@ -1645,6 +1751,10 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { if (do_outbound) { cbm_store_bfs(store, nodes[0].id, "outbound", edge_types, edge_type_count, depth, 100, &tr_out); + if (ranked && tr_out.visited_count > 1) { + qsort(tr_out.visited, (size_t)tr_out.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } yyjson_mut_val *callees = yyjson_mut_arr(doc); for (int i = 0; i < tr_out.visited_count; i++) { @@ -1655,6 +1765,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { doc, item, "qualified_name", tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); + yyjson_mut_obj_add_real(doc, item, "pagerank", tr_out.visited[i].pagerank); yyjson_mut_arr_add_val(callees, item); } yyjson_mut_obj_add_val(doc, root, "callees", callees); @@ -1663,6 +1774,10 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { if (do_inbound) { cbm_store_bfs(store, nodes[0].id, "inbound", edge_types, edge_type_count, depth, 100, &tr_in); + if (ranked && tr_in.visited_count > 1) { + qsort(tr_in.visited, (size_t)tr_in.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } yyjson_mut_val *callers = yyjson_mut_arr(doc); for (int i = 0; i < tr_in.visited_count; i++) { @@ -1673,6 +1788,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { doc, item, "qualified_name", tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); + yyjson_mut_obj_add_real(doc, item, "pagerank", tr_in.visited[i].pagerank); yyjson_mut_arr_add_val(callers, item); } yyjson_mut_obj_add_val(doc, root, "callers", callers); @@ -3060,6 +3176,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture") == 0) { return handle_get_architecture(srv, args_json); } + if (strcmp(tool_name, "get_key_symbols") == 0) { + return handle_get_key_symbols(srv, args_json); + } if (strcmp(tool_name, "get_architecture_summary") == 0) { return handle_get_architecture_summary(srv, args_json); } diff --git a/src/mcp/mcp.h b/src/mcp/mcp.h index 7e65912e..1f24dd8c 100644 --- a/src/mcp/mcp.h +++ b/src/mcp/mcp.h @@ -2,7 +2,7 @@ * mcp.h — MCP (Model Context Protocol) server for codebase-memory-mcp. * * Implements JSON-RPC 2.0 over stdio with the MCP tool calling protocol. - * Provides 14 graph analysis tools (search, trace, query, index, etc.) + * Provides 16 graph analysis tools (search, trace, query, index, etc.) */ #ifndef CBM_MCP_H #define CBM_MCP_H diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 66f47eac..41671775 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -816,8 +816,17 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { mtime_ns, fst.st_size); } } + if (cbm_store_compute_pagerank(hash_store, p->project_name, 20, 0.85) != + CBM_STORE_OK) { + cbm_log_error("pipeline.err", "phase", "pagerank", "project", p->project_name, + "error", cbm_store_error(hash_store)); + cbm_store_close(hash_store); + rc = -1; + goto cleanup; + } cbm_store_close(hash_store); cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); + cbm_log_info("pass.timing", "pass", "pagerank", "project", p->project_name); } } diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 1799f838..795f32d2 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -159,10 +159,11 @@ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_inf /* ── Incremental pipeline entry point ────────────────────────────── */ -int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_file_info_t *files, - int file_count) { - struct timespec t0; - cbm_clock_gettime(CLOCK_MONOTONIC, &t0); +int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_file_info_t *files, + int file_count) { + struct timespec t0; + struct timespec t; + cbm_clock_gettime(CLOCK_MONOTONIC, &t0); const char *project = cbm_pipeline_project_name(p); @@ -191,14 +192,25 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.classify", "changed", itoa_buf(n_changed), "unchanged", itoa_buf(n_unchanged), "deleted", itoa_buf(deleted_count)); - /* Fast path: nothing changed → skip */ - if (n_changed == 0 && deleted_count == 0) { - cbm_log_info("incremental.noop", "reason", "no_changes"); - free(is_changed); - free(deleted); - cbm_store_free_file_hashes(stored, stored_count); - cbm_store_close(store); - return 0; + /* Fast path: nothing changed → skip */ + if (n_changed == 0 && deleted_count == 0) { + cbm_log_info("incremental.noop", "reason", "no_changes"); + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { + cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + free(is_changed); + free(deleted); + cbm_store_free_file_hashes(stored, stored_count); + cbm_store_close(store); + return -1; + } + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + free(is_changed); + free(deleted); + cbm_store_free_file_hashes(stored, stored_count); + cbm_store_close(store); + return 0; } cbm_store_free_file_hashes(stored, stored_count); @@ -245,9 +257,8 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil .cancelled = cbm_pipeline_cancelled_ptr(p), }; - /* Run passes on changed files only */ - struct timespec t; - cbm_clock_gettime(CLOCK_MONOTONIC, &t); + /* Run passes on changed files only */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); cbm_pipeline_pass_definitions(&ctx, changed_files, ci); cbm_log_info("pass.timing", "pass", "incr_definitions", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); @@ -284,11 +295,23 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.merged", "nodes", itoa_buf(new_nodes), "edges", itoa_buf(new_edges)); - /* Persist updated file hashes for ALL files */ - persist_hashes(store, project, files, file_count); - - /* Cleanup */ - cbm_gbuf_free(gbuf); + /* Persist updated file hashes for ALL files */ + persist_hashes(store, project, files, file_count); + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { + cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + cbm_gbuf_free(gbuf); + cbm_registry_free(registry); + free(changed_files); + cbm_store_close(store); + return -1; + } + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + + /* Cleanup */ + cbm_gbuf_free(gbuf); cbm_registry_free(registry); free(changed_files); cbm_store_close(store); diff --git a/src/store/store.c b/src/store/store.c index ca196255..90ebc7d0 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -41,6 +41,8 @@ struct cbm_store { sqlite3 *db; const char *db_path; /* heap-allocated, or NULL for :memory: */ char errbuf[512]; + bool node_scores_checked; + bool node_scores_exists; /* Prepared statements (lazily initialized, cached for lifetime) */ sqlite3_stmt *stmt_upsert_node; @@ -125,6 +127,31 @@ static char *heap_strdup(const char *s) { return d; } +static bool store_has_node_scores_table(cbm_store_t *s) { + if (!s || !s->db) { + return false; + } + if (s->node_scores_checked) { + return s->node_scores_exists; + } + + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2( + s->db, + "SELECT 1 FROM sqlite_master WHERE type='table' AND name='node_scores' LIMIT 1;", -1, + &stmt, NULL); + if (rc != SQLITE_OK) { + s->node_scores_checked = true; + s->node_scores_exists = false; + return false; + } + + s->node_scores_exists = (sqlite3_step(stmt) == SQLITE_ROW); + s->node_scores_checked = true; + sqlite3_finalize(stmt); + return s->node_scores_exists; +} + /* Prepare a statement (cached). If already prepared, reset+clear. */ static sqlite3_stmt *prepare_cached(cbm_store_t *s, sqlite3_stmt **slot, const char *sql) { if (!s || !s->db) { @@ -200,6 +227,12 @@ static int init_schema(cbm_store_t *s) { " source_hash TEXT NOT NULL," " created_at TEXT NOT NULL," " updated_at TEXT NOT NULL" + ");" + "CREATE TABLE IF NOT EXISTS node_scores (" + " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," + " node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE," + " pagerank REAL NOT NULL," + " PRIMARY KEY (project, node_id)" ");"; return exec_sql(s, ddl); @@ -214,7 +247,8 @@ static int create_user_indexes(cbm_store_t *s) { "CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id, type);" "CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(project, type);" "CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(project, target_id, type);" - "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);"; + "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);" + "CREATE INDEX IF NOT EXISTS idx_node_scores_rank ON node_scores(project, pagerank DESC);"; return exec_sql(s, sql); } @@ -375,6 +409,9 @@ static cbm_store_t *store_open_internal(const char *path, bool in_memory) { return NULL; } + s->node_scores_checked = true; + s->node_scores_exists = true; + return s; } @@ -409,6 +446,8 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { } s->db_path = heap_strdup(db_path); + s->node_scores_checked = false; + s->node_scores_exists = false; /* Security: block ATTACH/DETACH to prevent file creation via SQL injection. */ sqlite3_set_authorizer(s->db, store_authorizer, NULL); @@ -599,7 +638,8 @@ int cbm_store_drop_indexes(cbm_store_t *s) { "DROP INDEX IF EXISTS idx_edges_target;" "DROP INDEX IF EXISTS idx_edges_type;" "DROP INDEX IF EXISTS idx_edges_target_type;" - "DROP INDEX IF EXISTS idx_edges_source_type;"); + "DROP INDEX IF EXISTS idx_edges_source_type;" + "DROP INDEX IF EXISTS idx_node_scores_rank;"); } int cbm_store_create_indexes(cbm_store_t *s) { @@ -1834,6 +1874,335 @@ int cbm_store_restore_from(cbm_store_t *dst, cbm_store_t *src) { return CBM_STORE_OK; } +/* ── PageRank ───────────────────────────────────────────────────── */ + +typedef struct { + int src_idx; + int dst_idx; +} cbm_pagerank_edge_ref_t; + +static int pagerank_find_node_index(const int64_t *node_ids, int count, int64_t node_id) { + int lo = 0; + int hi = count - 1; + while (lo <= hi) { + int mid = lo + ((hi - lo) / 2); + if (node_ids[mid] == node_id) { + return mid; + } + if (node_ids[mid] < node_id) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return -1; +} + +int cbm_store_compute_pagerank(cbm_store_t *s, const char *project, int iterations, double damping) { + int rc = CBM_STORE_OK; + sqlite3_stmt *stmt = NULL; + sqlite3_stmt *insert_stmt = NULL; + int64_t *node_ids = NULL; + int node_cap = 0; + int node_count = 0; + cbm_pagerank_edge_ref_t *edges = NULL; + int edge_cap = 0; + int edge_count = 0; + int *out_degree = NULL; + double *scores = NULL; + double *next_scores = NULL; + + if (!s || !s->db || !project) { + return CBM_STORE_ERR; + } + if (!store_has_node_scores_table(s)) { + store_set_error(s, "node_scores table is unavailable"); + return CBM_STORE_ERR; + } + if (iterations <= 0) { + iterations = 20; + } + if (damping <= 0.0 || damping >= 1.0) { + damping = 0.85; + } + + rc = sqlite3_prepare_v2( + s->db, + "SELECT id FROM nodes " + "WHERE project = ?1 AND label IN ('Function','Method','Class') " + "ORDER BY id;", + -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.nodes"); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (node_count >= node_cap) { + node_cap = node_cap > 0 ? node_cap * 2 : 128; + node_ids = safe_realloc(node_ids, (size_t)node_cap * sizeof(int64_t)); + } + node_ids[node_count++] = sqlite3_column_int64(stmt, 0); + } + sqlite3_finalize(stmt); + stmt = NULL; + + if (node_count > 0) { + out_degree = calloc((size_t)node_count, sizeof(int)); + scores = malloc((size_t)node_count * sizeof(double)); + next_scores = malloc((size_t)node_count * sizeof(double)); + if (!out_degree || !scores || !next_scores) { + store_set_error(s, "pagerank allocation failed"); + rc = CBM_STORE_ERR; + goto cleanup; + } + + rc = sqlite3_prepare_v2( + s->db, + "SELECT source_id, target_id FROM edges WHERE project = ?1 AND type = 'CALLS' " + "ORDER BY source_id, target_id;", + -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.edges"); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW) { + int src_idx = + pagerank_find_node_index(node_ids, node_count, sqlite3_column_int64(stmt, 0)); + int dst_idx = + pagerank_find_node_index(node_ids, node_count, sqlite3_column_int64(stmt, 1)); + if (src_idx < 0 || dst_idx < 0) { + continue; + } + if (edge_count >= edge_cap) { + edge_cap = edge_cap > 0 ? edge_cap * 2 : 256; + edges = safe_realloc(edges, (size_t)edge_cap * sizeof(cbm_pagerank_edge_ref_t)); + } + edges[edge_count].src_idx = src_idx; + edges[edge_count].dst_idx = dst_idx; + out_degree[src_idx]++; + edge_count++; + } + sqlite3_finalize(stmt); + stmt = NULL; + + for (int i = 0; i < node_count; i++) { + scores[i] = 1.0 / (double)node_count; + } + + for (int iter = 0; iter < iterations; iter++) { + double dangling_mass = 0.0; + double base = 0.0; + + for (int i = 0; i < node_count; i++) { + if (out_degree[i] == 0) { + dangling_mass += scores[i]; + } + } + + base = ((1.0 - damping) + (damping * dangling_mass)) / (double)node_count; + for (int i = 0; i < node_count; i++) { + next_scores[i] = base; + } + + for (int i = 0; i < edge_count; i++) { + int src_idx = edges[i].src_idx; + int dst_idx = edges[i].dst_idx; + if (out_degree[src_idx] > 0) { + next_scores[dst_idx] += + damping * (scores[src_idx] / (double)out_degree[src_idx]); + } + } + + { + double *tmp = scores; + scores = next_scores; + next_scores = tmp; + } + } + } + + rc = cbm_store_begin(s); + if (rc != CBM_STORE_OK) { + goto cleanup; + } + + rc = sqlite3_prepare_v2(s->db, "DELETE FROM node_scores WHERE project = ?1;", -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.delete"); + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "pagerank.delete"); + sqlite3_finalize(stmt); + stmt = NULL; + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + sqlite3_finalize(stmt); + stmt = NULL; + + if (node_count == 0) { + rc = cbm_store_commit(s); + goto cleanup; + } + + rc = sqlite3_prepare_v2( + s->db, "INSERT INTO node_scores (project, node_id, pagerank) VALUES (?1, ?2, ?3);", -1, + &insert_stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.insert"); + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + + for (int i = 0; i < node_count; i++) { + sqlite3_reset(insert_stmt); + sqlite3_clear_bindings(insert_stmt); + bind_text(insert_stmt, 1, project); + sqlite3_bind_int64(insert_stmt, 2, node_ids[i]); + sqlite3_bind_double(insert_stmt, 3, scores[i]); + if (sqlite3_step(insert_stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "pagerank.insert"); + sqlite3_finalize(insert_stmt); + insert_stmt = NULL; + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + } + + sqlite3_finalize(insert_stmt); + insert_stmt = NULL; + rc = cbm_store_commit(s); + if (rc != CBM_STORE_OK) { + goto cleanup; + } + + rc = CBM_STORE_OK; + +cleanup: + if (stmt) { + sqlite3_finalize(stmt); + } + if (insert_stmt) { + sqlite3_finalize(insert_stmt); + } + free(node_ids); + free(edges); + free(out_degree); + free(scores); + free(next_scores); + return rc; +} + +int cbm_store_get_key_symbols(cbm_store_t *s, const char *project, const char *focus, int limit, + cbm_key_symbol_t **out, int *count) { + sqlite3_stmt *stmt = NULL; + cbm_key_symbol_t *symbols = NULL; + int cap = 0; + int n = 0; + char *focus_like = NULL; + bool has_scores = false; + char sql[2048]; + + if (out) { + *out = NULL; + } + if (count) { + *count = 0; + } + if (!s || !s->db || !project || !out || !count) { + return CBM_STORE_ERR; + } + + if (limit <= 0) { + limit = 20; + } + has_scores = store_has_node_scores_table(s); + if (focus && focus[0]) { + size_t len = strlen(focus); + focus_like = malloc(len + 3); + if (!focus_like) { + return CBM_STORE_ERR; + } + focus_like[0] = '%'; + memcpy(focus_like + 1, focus, len); + focus_like[len + 1] = '%'; + focus_like[len + 2] = '\0'; + } + + snprintf( + sql, sizeof(sql), + "SELECT n.name, n.qualified_name, n.label, n.file_path, " + "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " + "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg, " + "%s " + "FROM nodes n %s " + "WHERE n.project = ?1 AND n.label IN ('Function','Method','Class') %s" + "ORDER BY pagerank DESC, in_deg DESC, out_deg DESC, n.name " + "LIMIT %d;", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + focus_like ? "AND (n.name LIKE ?2 OR n.qualified_name LIKE ?2 OR n.file_path LIKE ?2) " + : "", + limit); + + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "key_symbols.prepare"); + free(focus_like); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like) { + bind_text(stmt, 2, focus_like); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap = cap > 0 ? cap * 2 : 16; + symbols = safe_realloc(symbols, (size_t)cap * sizeof(cbm_key_symbol_t)); + } + memset(&symbols[n], 0, sizeof(cbm_key_symbol_t)); + symbols[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + symbols[n].qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + symbols[n].label = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + symbols[n].file_path = heap_strdup((const char *)sqlite3_column_text(stmt, 3)); + symbols[n].in_degree = sqlite3_column_int(stmt, 4); + symbols[n].out_degree = sqlite3_column_int(stmt, 5); + symbols[n].pagerank = sqlite3_column_double(stmt, 6); + n++; + } + + sqlite3_finalize(stmt); + free(focus_like); + *out = symbols; + *count = n; + return CBM_STORE_OK; +} + +void cbm_store_key_symbols_free(cbm_key_symbol_t *symbols, int count) { + if (!symbols) { + return; + } + for (int i = 0; i < count; i++) { + free((void *)symbols[i].name); + free((void *)symbols[i].qualified_name); + free((void *)symbols[i].label); + free((void *)symbols[i].file_path); + } + free(symbols); +} + /* ── Search ─────────────────────────────────────────────────────── */ /* Convert a glob pattern to SQL LIKE pattern. */ @@ -1978,13 +2347,14 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear char sql[4096]; char count_sql[4096]; int bind_idx = 0; + bool has_scores = store_has_node_scores_table(s); /* We build a query that selects nodes with optional degree subqueries */ const char *select_cols = "SELECT n.id, n.project, n.label, n.name, n.qualified_name, " "n.file_path, n.start_line, n.end_line, n.properties, " "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " - "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg "; + "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg, "; /* Start building WHERE */ char where[2048] = ""; @@ -2067,9 +2437,18 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear /* Build full SQL */ if (nparams > 0) { - snprintf(sql, sizeof(sql), "%s FROM nodes n WHERE %s", select_cols, where); + snprintf(sql, sizeof(sql), "%s%s FROM nodes n %s WHERE %s", select_cols, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + where); } else { - snprintf(sql, sizeof(sql), "%s FROM nodes n", select_cols); + snprintf(sql, sizeof(sql), "%s%s FROM nodes n %s", select_cols, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); } /* Degree filters: -1 = no filter, 0+ = active filter. @@ -2100,12 +2479,20 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear * When degree filter wraps in subquery, column refs lose the "n." prefix. */ int limit = params->limit > 0 ? params->limit : 500000; int offset = params->offset; - bool has_degree_wrap = has_degree_filter; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *name_col = has_degree_wrap ? "name" : "n.name"; + const char *sort_by = (params->sort_by && params->sort_by[0]) ? params->sort_by : "name"; char order_limit[128]; - snprintf(order_limit, sizeof(order_limit), " ORDER BY %s LIMIT %d OFFSET %d", name_col, limit, - offset); + if (strcmp(sort_by, "degree") == 0) { + snprintf(order_limit, sizeof(order_limit), + " ORDER BY (in_deg + out_deg) DESC, pagerank DESC, name LIMIT %d OFFSET %d", + limit, offset); + } else if (strcmp(sort_by, "relevance") == 0) { + snprintf(order_limit, sizeof(order_limit), + " ORDER BY pagerank DESC, (in_deg + out_deg) DESC, name LIMIT %d OFFSET %d", + limit, offset); + } else { + snprintf(order_limit, sizeof(order_limit), " ORDER BY name LIMIT %d OFFSET %d", limit, + offset); + } strncat(sql, order_limit, sizeof(sql) - strlen(sql) - 1); /* Execute count query */ @@ -2147,6 +2534,7 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear scan_node(main_stmt, &results[n].node); results[n].in_degree = sqlite3_column_int(main_stmt, 9); results[n].out_degree = sqlite3_column_int(main_stmt, 10); + results[n].pagerank = sqlite3_column_double(main_stmt, 11); n++; } @@ -2219,6 +2607,7 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const char sql[4096]; const char *join_cond; const char *next_id; + bool has_scores = store_has_node_scores_table(s); // NOLINTNEXTLINE(readability-implicit-bool-conversion) bool is_inbound = direction && strcmp(direction, "inbound") == 0; @@ -2240,13 +2629,18 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const " WHERE e.type IN (%s) AND bfs.hop < %d" ")" "SELECT DISTINCT n.id, n.project, n.label, n.name, n.qualified_name, " - "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop " + "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop, %s " "FROM bfs " "JOIN nodes n ON n.id = bfs.node_id " + "%s " "WHERE bfs.hop > 0 " /* exclude root */ "ORDER BY bfs.hop " "LIMIT %d;", - (long long)start_id, next_id, join_cond, types_clause, max_depth, max_results); + (long long)start_id, next_id, join_cond, types_clause, max_depth, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + max_results); sqlite3_stmt *stmt = NULL; rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); @@ -2275,6 +2669,7 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const } scan_node(stmt, &visited[n].node); visited[n].hop = sqlite3_column_int(stmt, 9); + visited[n].pagerank = sqlite3_column_double(stmt, 10); n++; } diff --git a/src/store/store.h b/src/store/store.h index d02fec77..99c240e3 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -116,7 +116,7 @@ typedef struct { int offset; bool exclude_entry_points; bool include_connected; - const char *sort_by; /* "relevance" / "name" / "degree", NULL = relevance */ + const char *sort_by; /* "relevance" / "name" / "degree", NULL = name */ bool case_sensitive; const char **exclude_labels; /* NULL-terminated array, or NULL */ } cbm_search_params_t; @@ -125,6 +125,7 @@ typedef struct { cbm_node_t node; int in_degree; int out_degree; + double pagerank; /* connected_names: allocated array of strings, count in connected_count */ const char **connected_names; int connected_count; @@ -141,6 +142,7 @@ typedef struct { typedef struct { cbm_node_t node; int hop; /* BFS depth from root */ + double pagerank; } cbm_node_hop_t; typedef struct { @@ -360,6 +362,25 @@ int cbm_store_delete_file_hash(cbm_store_t *s, const char *project, const char * int cbm_store_delete_file_hashes(cbm_store_t *s, const char *project); +/* ── PageRank ───────────────────────────────────────────────────── */ + +int cbm_store_compute_pagerank(cbm_store_t *s, const char *project, int iterations, double damping); + +typedef struct { + const char *name; + const char *qualified_name; + const char *label; + const char *file_path; + int in_degree; + int out_degree; + double pagerank; +} cbm_key_symbol_t; + +int cbm_store_get_key_symbols(cbm_store_t *s, const char *project, const char *focus, int limit, + cbm_key_symbol_t **out, int *count); + +void cbm_store_key_symbols_free(cbm_key_symbol_t *symbols, int count); + /* ── Search ─────────────────────────────────────────────────────── */ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_search_output_t *out); diff --git a/tests/test_integration.c b/tests/test_integration.c index 318bce3e..d0a14dde 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -381,6 +381,18 @@ TEST(integ_mcp_get_architecture_summary) { PASS(); } +TEST(integ_mcp_get_key_symbols) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"limit\":5}", g_project); + + char *resp = call_tool("get_key_symbols", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "pagerank")); + ASSERT_TRUE(strstr(resp, "Add") || strstr(resp, "greet") || strstr(resp, "Multiply")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -539,6 +551,21 @@ TEST(integ_store_bfs_traversal) { PASS(); } +TEST(integ_store_key_symbols_ranked) { + cbm_store_t *store = cbm_store_open_path(g_dbpath); + ASSERT_NOT_NULL(store); + + cbm_key_symbol_t *symbols = NULL; + int count = 0; + ASSERT_EQ(cbm_store_get_key_symbols(store, g_project, NULL, 5, &symbols, &count), CBM_STORE_OK); + ASSERT_GT(count, 0); + ASSERT_TRUE(symbols[0].pagerank > 0.0); + + cbm_store_key_symbols_free(symbols, count); + cbm_store_close(store); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -548,7 +575,7 @@ SUITE(integration) { if (integration_setup() != 0) { printf(" %-50s", "integration_setup"); printf("SKIP (setup failed)\n"); - tf_skip_count += 16; /* skip all integration tests */ + tf_skip_count += 25; /* skip all integration tests */ integration_teardown(); return; } @@ -569,6 +596,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_get_graph_schema); RUN_TEST(integ_mcp_get_architecture); RUN_TEST(integ_mcp_get_architecture_summary); + RUN_TEST(integ_mcp_get_key_symbols); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); @@ -576,6 +604,7 @@ SUITE(integration) { RUN_TEST(integ_store_search_by_degree); RUN_TEST(integ_store_find_by_file); RUN_TEST(integ_store_bfs_traversal); + RUN_TEST(integ_store_key_symbols_ranked); /* Pipeline API tests (no db needed) */ RUN_TEST(integ_pipeline_fqn_compute); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index dd3c0bb2..8aeeb096 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -131,7 +131,7 @@ TEST(mcp_initialize_response) { TEST(mcp_tools_list) { char *json = cbm_mcp_tools_list(); ASSERT_NOT_NULL(json); - /* Should contain all 15 tools */ + /* Should contain all public tools */ ASSERT_NOT_NULL(strstr(json, "index_repository")); ASSERT_NOT_NULL(strstr(json, "search_graph")); ASSERT_NOT_NULL(strstr(json, "query_graph")); @@ -139,6 +139,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_code_snippet")); ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); + ASSERT_NOT_NULL(strstr(json, "get_key_symbols")); ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); @@ -583,6 +584,51 @@ static void cleanup_arch_summary_server(char *tmp_dir, cbm_mcp_server_t *srv) { } } +static cbm_mcp_server_t *setup_pagerank_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "test-rank", "/tmp/test-rank"); + cbm_mcp_server_set_project(srv, "test-rank"); + + cbm_node_t nodes[] = { + {.project = "test-rank", .label = "Function", .name = "Root", .qualified_name = "test-rank.Root"}, + {.project = "test-rank", .label = "Function", .name = "Small", .qualified_name = "test-rank.Small"}, + {.project = "test-rank", .label = "Function", .name = "Hub", .qualified_name = "test-rank.Hub"}, + {.project = "test-rank", .label = "Function", .name = "Leaf", .qualified_name = "test-rank.Leaf"}, + {.project = "test-rank", .label = "Function", .name = "CallerB", .qualified_name = "test-rank.CallerB"}, + {.project = "test-rank", .label = "Function", .name = "CallerC", .qualified_name = "test-rank.CallerC"}, + }; + int64_t ids[6]; + for (int i = 0; i < 6; i++) { + ids[i] = cbm_store_upsert_node(st, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "test-rank", .source_id = ids[0], .target_id = ids[1], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[1], .target_id = ids[2], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[4], .target_id = ids[2], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[5], .target_id = ids[2], .type = "CALLS"}, + }; + for (int i = 0; i < 4; i++) { + cbm_store_insert_edge(st, &edges[i]); + } + + if (cbm_store_compute_pagerank(st, "test-rank", 20, 0.85) != CBM_STORE_OK) { + cbm_mcp_server_free(srv); + return NULL; + } + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1120,6 +1166,68 @@ static char *extract_text_content(const char *mcp_result) { return result; } +TEST(tool_search_graph_ranked_pagerank) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "search_graph", + "{\"project\":\"test-rank\",\"label\":\"Function\",\"limit\":10}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Small\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_key_symbols_ranked) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "get_key_symbols", "{\"project\":\"test-rank\",\"limit\":3}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"results\"")); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_ranked_pagerank) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-rank\",\"function_name\":\"Root\",\"direction\":\"outbound\",\"depth\":3}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"callees\"")); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Small\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -1834,6 +1942,7 @@ SUITE(mcp) { RUN_TEST(tool_get_graph_schema_empty); RUN_TEST(tool_unknown_tool); RUN_TEST(tool_search_graph_basic); + RUN_TEST(tool_search_graph_ranked_pagerank); RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); @@ -1845,6 +1954,8 @@ SUITE(mcp) { RUN_TEST(tool_get_architecture_summary_missing_project); RUN_TEST(tool_get_architecture_summary_truncated); RUN_TEST(tool_get_architecture_summary_project_path_alias); + RUN_TEST(tool_get_key_symbols_ranked); + RUN_TEST(tool_trace_call_path_ranked_pagerank); RUN_TEST(tool_query_graph_missing_query); /* Pipeline-dependent tool handlers */ diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 569fa04e..8578bdc5 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -19,6 +19,7 @@ #include #include "graph_buffer/graph_buffer.h" #include "yyjson/yyjson.h" +#include /* ── Helper: create temp test repo with known layout ───────────── */ @@ -4971,6 +4972,44 @@ TEST(incremental_full_then_noop) { PASS(); } +TEST(incremental_noop_backfills_pagerank) { + if (setup_incremental_repo() != 0) { SKIP("setup failed"); } + + cbm_pipeline_t *p = cbm_pipeline_new(g_incr_tmpdir, g_incr_dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + char *project = strdup(cbm_pipeline_project_name(p)); + cbm_pipeline_free(p); + + cbm_store_t *s = cbm_store_open_path(g_incr_dbpath); + ASSERT_NOT_NULL(s); + ASSERT_EQ(sqlite3_exec(cbm_store_get_db(s), "DELETE FROM node_scores;", NULL, NULL, NULL), + SQLITE_OK); + cbm_store_close(s); + + p = cbm_pipeline_new(g_incr_tmpdir, g_incr_dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + s = cbm_store_open_path(g_incr_dbpath); + ASSERT_NOT_NULL(s); + sqlite3_stmt *stmt = NULL; + ASSERT_EQ(sqlite3_prepare_v2(cbm_store_get_db(s), + "SELECT COUNT(*) FROM node_scores WHERE project = ?1;", -1, + &stmt, NULL), + SQLITE_OK); + ASSERT_EQ(sqlite3_bind_text(stmt, 1, project, -1, SQLITE_STATIC), SQLITE_OK); + ASSERT_EQ(sqlite3_step(stmt), SQLITE_ROW); + ASSERT_GT(sqlite3_column_int(stmt, 0), 0); + sqlite3_finalize(stmt); + cbm_store_close(s); + free(project); + + cleanup_incremental_repo(); + PASS(); +} + TEST(incremental_detects_changed_file) { /* Full index, modify one file, re-index → changed file re-parsed */ if (setup_incremental_repo() != 0) { SKIP("setup failed"); } @@ -5914,6 +5953,7 @@ SUITE(pipeline) { RUN_TEST(pipeline_fastapi_depends_edges); /* Incremental */ RUN_TEST(incremental_full_then_noop); + RUN_TEST(incremental_noop_backfills_pagerank); RUN_TEST(incremental_detects_changed_file); RUN_TEST(incremental_detects_deleted_file); RUN_TEST(incremental_new_file_added); diff --git a/tests/test_store_search.c b/tests/test_store_search.c index 05df8680..4a5b7b3e 100644 --- a/tests/test_store_search.c +++ b/tests/test_store_search.c @@ -47,6 +47,41 @@ static cbm_store_t *setup_search_store(int64_t *ids) { return s; } +static cbm_store_t *setup_pagerank_store(int64_t *ids) { + cbm_store_t *s = cbm_store_open_memory(); + cbm_store_upsert_project(s, "test", "/tmp/test"); + + cbm_node_t nodes[] = { + {.project = "test", .label = "Function", .name = "Root", .qualified_name = "test.Root"}, + {.project = "test", .label = "Function", .name = "Small", .qualified_name = "test.Small"}, + {.project = "test", .label = "Function", .name = "Hub", .qualified_name = "test.Hub"}, + {.project = "test", .label = "Function", .name = "Leaf", .qualified_name = "test.Leaf"}, + {.project = "test", .label = "Function", .name = "CallerB", .qualified_name = "test.CallerB"}, + {.project = "test", .label = "Function", .name = "CallerC", .qualified_name = "test.CallerC"}, + }; + const int node_count = (int)(sizeof(nodes) / sizeof(nodes[0])); + for (int i = 0; i < node_count; i++) { + ids[i] = cbm_store_upsert_node(s, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "test", .source_id = ids[0], .target_id = ids[1], .type = "CALLS"}, + {.project = "test", .source_id = ids[1], .target_id = ids[2], .type = "CALLS"}, + {.project = "test", .source_id = ids[4], .target_id = ids[2], .type = "CALLS"}, + {.project = "test", .source_id = ids[5], .target_id = ids[2], .type = "CALLS"}, + }; + const int edge_count = (int)(sizeof(edges) / sizeof(edges[0])); + for (int i = 0; i < edge_count; i++) { + cbm_store_insert_edge(s, &edges[i]); + } + + if (cbm_store_compute_pagerank(s, "test", 20, 0.85) != CBM_STORE_OK) { + cbm_store_close(s); + return NULL; + } + return s; +} + /* ── Search by label ────────────────────────────────────────────── */ TEST(store_search_by_label) { @@ -595,6 +630,46 @@ TEST(store_search_case_insensitive) { PASS(); } +TEST(store_search_ranked_by_pagerank) { + int64_t ids[6]; + cbm_store_t *s = setup_pagerank_store(ids); + ASSERT_NOT_NULL(s); + + cbm_search_params_t params = {.project = "test", + .label = "Function", + .limit = 10, + .min_degree = -1, + .max_degree = -1, + .sort_by = "relevance"}; + cbm_search_output_t out = {0}; + int rc = cbm_store_search(s, ¶ms, &out); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_GTE(out.count, 4); + ASSERT_STR_EQ(out.results[0].node.name, "Hub"); + ASSERT_TRUE(out.results[0].pagerank > out.results[1].pagerank); + cbm_store_search_free(&out); + + cbm_store_close(s); + PASS(); +} + +TEST(store_get_key_symbols_ranked) { + int64_t ids[6]; + cbm_store_t *s = setup_pagerank_store(ids); + ASSERT_NOT_NULL(s); + cbm_key_symbol_t *symbols = NULL; + int count = 0; + + ASSERT_EQ(cbm_store_get_key_symbols(s, "test", NULL, 3, &symbols, &count), CBM_STORE_OK); + ASSERT_EQ(count, 3); + ASSERT_STR_EQ(symbols[0].name, "Hub"); + ASSERT_TRUE(symbols[0].pagerank > symbols[1].pagerank); + cbm_store_key_symbols_free(symbols, count); + + cbm_store_close(s); + PASS(); +} + /* ── Impact: HopToRisk ─────────────────────────────────────────── */ TEST(store_hop_to_risk) { @@ -1217,6 +1292,8 @@ SUITE(store_search) { RUN_TEST(store_search_all); RUN_TEST(store_search_exclude_labels); RUN_TEST(store_search_case_insensitive); + RUN_TEST(store_search_ranked_by_pagerank); + RUN_TEST(store_get_key_symbols_ranked); RUN_TEST(store_bfs_outbound); RUN_TEST(store_bfs_inbound); RUN_TEST(store_bfs_cross_service); From 0af23ec9d2f3f0c9cc4e00faa0bd024c788dd71d Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:35:23 +0530 Subject: [PATCH 03/14] Make PageRank failures non-fatal during indexing --- src/pipeline/pipeline.c | 9 ++++----- src/pipeline/pipeline_incremental.c | 27 +++++++++++---------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 41671775..5cddbcb1 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -818,11 +818,10 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { } if (cbm_store_compute_pagerank(hash_store, p->project_name, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("pipeline.err", "phase", "pagerank", "project", p->project_name, - "error", cbm_store_error(hash_store)); - cbm_store_close(hash_store); - rc = -1; - goto cleanup; + /* PageRank is a ranking enhancement; keep a successful index even if + * score computation fails on this pass. */ + cbm_log_warn("pipeline.warn", "phase", "pagerank", "project", p->project_name, + "error", cbm_store_error(hash_store)); } cbm_store_close(hash_store); cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 795f32d2..fa802ba9 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -197,15 +197,13 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.noop", "reason", "no_changes"); cbm_clock_gettime(CLOCK_MONOTONIC, &t); if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", - cbm_store_error(store)); - free(is_changed); - free(deleted); - cbm_store_free_file_hashes(stored, stored_count); - cbm_store_close(store); - return -1; + /* Preserve the successful no-op index result even if ranking refresh fails. */ + cbm_log_warn("incremental.warn", "msg", "pagerank_failed", "project", project, + "error", cbm_store_error(store)); + } else { + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); } - cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); free(is_changed); free(deleted); cbm_store_free_file_hashes(stored, stored_count); @@ -300,15 +298,12 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_clock_gettime(CLOCK_MONOTONIC, &t); if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", - cbm_store_error(store)); - cbm_gbuf_free(gbuf); - cbm_registry_free(registry); - free(changed_files); - cbm_store_close(store); - return -1; + cbm_log_warn("incremental.warn", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + } else { + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); } - cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); /* Cleanup */ cbm_gbuf_free(gbuf); From 408be5154a0041a1e8b84694225854bb6867d367 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:53:03 +0530 Subject: [PATCH 04/14] Fix read-only query opens for snapshot DBs --- src/store/store.c | 51 ++++++++++++++++++++++++++++++++++++++-- tests/test_store_nodes.c | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/src/store/store.c b/src/store/store.c index 90ebc7d0..00b857ac 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -127,6 +127,43 @@ static char *heap_strdup(const char *s) { return d; } +static bool sqlite_uri_path_safe_char(unsigned char c) { + return isalnum(c) || c == '/' || c == '.' || c == '_' || c == '-' || c == '~' || c == ':'; +} + +static char *sqlite_readonly_immutable_uri(const char *db_path) { + if (!db_path) { + return NULL; + } + + static const char suffix[] = "?mode=ro&immutable=1"; + size_t path_len = strlen(db_path); + size_t cap = strlen("file:") + (path_len * 3) + sizeof(suffix); + char *uri = malloc(cap); + if (!uri) { + return NULL; + } + + char *dst = uri; + memcpy(dst, "file:", strlen("file:")); + dst += strlen("file:"); + + static const char hex[] = "0123456789ABCDEF"; + for (size_t i = 0; i < path_len; i++) { + unsigned char c = (unsigned char)db_path[i]; + if (sqlite_uri_path_safe_char(c)) { + *dst++ = (char)c; + } else { + *dst++ = '%'; + *dst++ = hex[(c >> 4) & 0x0F]; + *dst++ = hex[c & 0x0F]; + } + } + + memcpy(dst, suffix, sizeof(suffix)); + return uri; +} + static bool store_has_node_scores_table(cbm_store_t *s) { if (!s || !s->db) { return false; @@ -436,8 +473,18 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { return NULL; } - /* Open read-only and do NOT create — query tools should never need write access. */ - int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READONLY, NULL); + /* Query tools read atomically-written snapshot DBs. Open them via an + * immutable URI so SQLite does not try to create WAL/SHM sidecars when the + * file was produced by the direct page writer and later reopened in WAL + * mode by the indexing pipeline. */ + char *uri = sqlite_readonly_immutable_uri(db_path); + if (!uri) { + free(s); + return NULL; + } + + int rc = sqlite3_open_v2(uri, &s->db, SQLITE_OPEN_READONLY | SQLITE_OPEN_URI, NULL); + free(uri); if (rc != SQLITE_OK) { /* sqlite3_open_v2 allocates a handle even on failure — must close it. */ sqlite3_close(s->db); diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index 6cfc93f3..cef33bdb 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -5,6 +5,7 @@ * TestNodeDedup, TestProjectCRUD, TestUpsertNodeBatch, etc.) */ #include "test_framework.h" +#include "sqlite_writer.h" #include #include #include @@ -140,6 +141,54 @@ TEST(store_open_path_query_readonly_db) { PASS(); } +TEST(store_open_path_query_direct_writer_db) { + char path[] = "/tmp/cbm_store_query_writer_XXXXXX"; + int fd = mkstemp(path); + ASSERT_TRUE(fd >= 0); + close(fd); + + CBMDumpNode node = { + .id = 1, + .project = "writer-proj", + .label = "Function", + .name = "Hello", + .qualified_name = "writer-proj.main.Hello", + .file_path = "main.go", + .start_line = 1, + .end_line = 3, + .properties = "{}", + }; + ASSERT_EQ(cbm_write_db(path, "writer-proj", "/tmp/writer-proj", "2026-03-25T00:00:00Z", + &node, 1, NULL, 0), + 0); + + /* Reopen in the same way the pipeline does to add post-dump metadata. */ + cbm_store_t *writer = cbm_store_open_path(path); + ASSERT_NOT_NULL(writer); + ASSERT_EQ(cbm_store_upsert_file_hash(writer, "writer-proj", "main.go", "abc123", 1, 64), + CBM_STORE_OK); + cbm_store_close(writer); + + cbm_store_t *reader = cbm_store_open_path_query(path); + ASSERT_NOT_NULL(reader); + ASSERT_TRUE(cbm_store_check_integrity(reader)); + + cbm_project_t proj = {0}; + ASSERT_EQ(cbm_store_get_project(reader, "writer-proj", &proj), CBM_STORE_OK); + ASSERT_STR_EQ(proj.root_path, "/tmp/writer-proj"); + cbm_project_free_fields(&proj); + + cbm_node_t found = {0}; + ASSERT_EQ(cbm_store_find_node_by_qn(reader, "writer-proj", "writer-proj.main.Hello", &found), + CBM_STORE_OK); + ASSERT_STR_EQ(found.name, "Hello"); + cbm_node_free_fields(&found); + + cbm_store_close(reader); + unlink(path); + PASS(); +} + /* ── Node CRUD ──────────────────────────────────────────────────── */ TEST(store_node_crud) { @@ -1541,6 +1590,7 @@ SUITE(store_nodes) { RUN_TEST(store_project_update); RUN_TEST(store_project_delete); RUN_TEST(store_open_path_query_readonly_db); + RUN_TEST(store_open_path_query_direct_writer_db); RUN_TEST(store_node_crud); RUN_TEST(store_node_dedup); RUN_TEST(store_node_find_by_label); From 0559f34ca6460274e1afb82f821982f8005af5c4 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 18:42:36 +0530 Subject: [PATCH 05/14] Refine MCP token-budget truncation Account for optional signatures in the search_graph and trace_call_path size estimators, and improve compact trace chains to report omitted-node counts. This also documents the normal-path output enrichment introduced with Task 4: search_graph results now include file_path, start_line, end_line, and signature, and trace_call_path hop items now include file_path, start_line, and signature. --- src/mcp/mcp.c | 491 +++++++++++++++++++++++++++++++++++++++++++---- tests/test_mcp.c | 163 ++++++++++++++++ 2 files changed, 616 insertions(+), 38 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 8b313dbb..05bc6a26 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -46,6 +46,12 @@ /* Default snippet fallback line count */ #define SNIPPET_DEFAULT_LINES 50 +/* Approximate output budget: 1 token ~= 4 chars. */ +#define DEFAULT_MAX_TOKENS 2000 +#define MIN_JSON_CHAR_BUDGET 128 +#define MAX_FULL_BUDGET_ITEMS 5 +#define MAX_COMPACT_QUERY_CELL_CHARS 96 + /* Idle store eviction: close cached project store after this many seconds * of inactivity to free SQLite memory during idle periods. */ #define STORE_IDLE_TIMEOUT_S 60 @@ -80,6 +86,243 @@ static char *yy_doc_to_str(yyjson_mut_doc *doc) { return s; } +static size_t max_tokens_to_char_budget(int max_tokens) { + if (max_tokens <= 0) { + max_tokens = DEFAULT_MAX_TOKENS; + } + size_t budget = (size_t)max_tokens * 4U; + if (budget < MIN_JSON_CHAR_BUDGET) { + budget = MIN_JSON_CHAR_BUDGET; + } + return budget; +} + +static char *json_string_field_dup(const char *json, const char *key) { + if (!json || !key) { + return NULL; + } + + yyjson_doc *doc = yyjson_read(json, strlen(json), 0); + if (!doc) { + return NULL; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *value = root ? yyjson_obj_get(root, key) : NULL; + const char *str = value ? yyjson_get_str(value) : NULL; + char *dup = str ? heap_strdup(str) : NULL; + yyjson_doc_free(doc); + return dup; +} + +static char *node_signature_dup(const cbm_node_t *node) { + if (!node) { + return NULL; + } + return json_string_field_dup(node->properties_json, "signature"); +} + +static size_t estimate_signature_field_chars(const cbm_node_t *node) { + char *signature = node_signature_dup(node); + if (!signature || !signature[0]) { + free(signature); + return 0; + } + + size_t size = strlen(signature) + 24; + free(signature); + return size; +} + +static char *truncate_text_copy(const char *text, size_t max_chars) { + if (!text) { + return heap_strdup(""); + } + + size_t len = strlen(text); + if (len <= max_chars) { + return heap_strdup(text); + } + if (max_chars <= 3) { + char *out = malloc(max_chars + 1); + if (!out) { + return NULL; + } + for (size_t i = 0; i < max_chars; i++) { + out[i] = '.'; + } + out[max_chars] = '\0'; + return out; + } + + char *out = malloc(max_chars + 1); + if (!out) { + return NULL; + } + size_t keep = max_chars - 3; + memcpy(out, text, keep); + memcpy(out + keep, "...", 4); + return out; +} + +static char *build_compact_hop_chain(const cbm_node_hop_t *hops, int count) { + if (!hops || count <= 0) { + return NULL; + } + + const char *first = NULL; + const char *last = NULL; + int named_count = 0; + for (int i = 0; i < count; i++) { + if (hops[i].node.name && hops[i].node.name[0]) { + if (!first) { + first = hops[i].node.name; + } + last = hops[i].node.name; + named_count++; + } + } + if (!first) { + return NULL; + } + if (named_count <= 1 || !last || strcmp(first, last) == 0) { + return heap_strdup(first); + } + if (named_count == 2) { + size_t len = strlen(first) + strlen(last) + strlen(" -> ") + 1; + char *chain = malloc(len); + if (!chain) { + return NULL; + } + snprintf(chain, len, "%s -> %s", first, last); + return chain; + } + + int omitted = named_count - 2; + char omitted_buf[32]; + snprintf(omitted_buf, sizeof(omitted_buf), "%d", omitted); + + size_t len = + strlen(first) + strlen(last) + strlen(omitted_buf) + strlen(" -> ... ( more) -> ") + 1; + char *chain = malloc(len); + if (!chain) { + return NULL; + } + snprintf(chain, len, "%s -> ... (%d more) -> %s", first, omitted, last); + return chain; +} + +static size_t estimate_search_result_chars(const cbm_search_result_t *sr, bool compact) { + size_t size = 96; + size += strlen(sr->node.name ? sr->node.name : ""); + size += strlen(sr->node.file_path ? sr->node.file_path : ""); + size += estimate_signature_field_chars(&sr->node); + if (!compact) { + size += strlen(sr->node.qualified_name ? sr->node.qualified_name : ""); + size += strlen(sr->node.label ? sr->node.label : ""); + size += 48; + } else { + size += 24; + } + return size; +} + +static void add_search_result_item(yyjson_mut_doc *doc, yyjson_mut_val *results, + const cbm_search_result_t *sr, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", sr->node.file_path ? sr->node.file_path : ""); + yyjson_mut_obj_add_int(doc, item, "start_line", sr->node.start_line); + + char *signature = node_signature_dup(&sr->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, item, "signature", signature); + } + free(signature); + + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sr->node.qualified_name ? sr->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); + yyjson_mut_obj_add_int(doc, item, "end_line", sr->node.end_line); + yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); + } + + yyjson_mut_arr_add_val(results, item); +} + +static size_t estimate_node_hop_chars(const cbm_node_hop_t *hop, bool compact) { + size_t size = 80; + size += strlen(hop->node.name ? hop->node.name : ""); + size += strlen(hop->node.file_path ? hop->node.file_path : ""); + size += estimate_signature_field_chars(&hop->node); + if (!compact) { + size += strlen(hop->node.qualified_name ? hop->node.qualified_name : ""); + size += 40; + } else { + size += 20; + } + return size; +} + +static void add_node_hop_item(yyjson_mut_doc *doc, yyjson_mut_val *items, const cbm_node_hop_t *hop, + bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", hop->node.name ? hop->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", hop->node.file_path ? hop->node.file_path : ""); + yyjson_mut_obj_add_int(doc, item, "start_line", hop->node.start_line); + yyjson_mut_obj_add_int(doc, item, "hop", hop->hop); + + char *signature = node_signature_dup(&hop->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, item, "signature", signature); + } + free(signature); + + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + hop->node.qualified_name ? hop->node.qualified_name : ""); + yyjson_mut_obj_add_real(doc, item, "pagerank", hop->pagerank); + } + + yyjson_mut_arr_add_val(items, item); +} + +static size_t estimate_query_row_chars(const char *const *row, int col_count, bool compact) { + size_t size = 8; + for (int c = 0; c < col_count; c++) { + size += 4; + if (!row[c]) { + continue; + } + size += compact ? strnlen(row[c], MAX_COMPACT_QUERY_CELL_CHARS) : strlen(row[c]); + } + return size; +} + +static void add_query_row(yyjson_mut_doc *doc, yyjson_mut_val *rows, const char *const *row, + int col_count, + bool compact) { + yyjson_mut_val *out_row = yyjson_mut_arr(doc); + for (int c = 0; c < col_count; c++) { + const char *cell = row[c] ? row[c] : ""; + if (compact) { + char *clipped = truncate_text_copy(cell, MAX_COMPACT_QUERY_CELL_CHARS); + yyjson_mut_arr_add_strcpy(doc, out_row, clipped ? clipped : ""); + free(clipped); + } else { + yyjson_mut_arr_add_str(doc, out_row, cell); + } + } + yyjson_mut_arr_add_val(rows, out_row); +} + typedef struct { char *buf; size_t len; @@ -349,7 +592,9 @@ static const tool_def_t TOOLS[] = { "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" "\"integer\",\"description\":\"Max results. Default: " "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0},\"ranked\":{\"type\":\"boolean\"," - "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}}," + "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-ranked results when needed.\"}}," "\"required\":[\"project\"]}"}, {"query_graph", @@ -359,7 +604,9 @@ static const tool_def_t TOOLS[] = { "query\"},\"project\":{\"type\":\"string\"},\"max_rows\":{\"type\":\"integer\"," "\"description\":" "\"Optional row limit. Default: unlimited (100k " - "ceiling)\"}},\"required\":[\"query\",\"project\"]}"}, + "ceiling)\"},\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":" + "\"Maximum output size. Compacts lower-priority rows when needed.\"}}," + "\"required\":[\"query\",\"project\"]}"}, {"trace_call_path", "Trace function call paths — who calls a function and what it calls. Use INSTEAD OF grep when " @@ -368,7 +615,9 @@ static const tool_def_t TOOLS[] = { "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"ranked\":{\"type\":\"boolean\"," - "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}}," + "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-ranked path results when needed.\"}}," "\"required\":[\"function_name\",\"project\"]}"}, {"get_code_snippet", @@ -1087,7 +1336,9 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { int offset = cbm_mcp_get_int_arg(args, "offset", 0); int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); + size_t char_budget = max_tokens_to_char_budget(max_tokens); cbm_search_params_t params = { .project = project, @@ -1112,24 +1363,55 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *results = yyjson_mut_arr(doc); for (int i = 0; i < out.count; i++) { - cbm_search_result_t *sr = &out.results[i]; - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); - yyjson_mut_obj_add_str(doc, item, "qualified_name", - sr->node.qualified_name ? sr->node.qualified_name : ""); - yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); - yyjson_mut_obj_add_str(doc, item, "file_path", - sr->node.file_path ? sr->node.file_path : ""); - yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); - yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); - yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); - yyjson_mut_arr_add_val(results, item); + add_search_result_item(doc, results, &out.results[i], false); } yyjson_mut_obj_add_val(doc, root, "results", results); yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + out.count); char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_int(doc, root, "total", out.total); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", out.total); + + results = yyjson_mut_arr(doc); + size_t used = 96; + int shown = 0; + int full_items = 0; + for (int i = 0; i < out.count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_search_result_chars(&out.results[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_search_result_chars(&out.results[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_search_result_item(doc, results, &out.results[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "results", results); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + cbm_store_search_free(&out); free(project); @@ -1147,6 +1429,8 @@ static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { char *project = cbm_mcp_get_string_arg(args, "project"); cbm_store_t *store = resolve_store(srv, project); int max_rows = cbm_mcp_get_int_arg(args, "max_rows", 0); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); if (!query) { free(project); @@ -1194,17 +1478,63 @@ static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { /* rows */ yyjson_mut_val *rows = yyjson_mut_arr(doc); for (int r = 0; r < result.row_count; r++) { - yyjson_mut_val *row = yyjson_mut_arr(doc); - for (int c = 0; c < result.col_count; c++) { - yyjson_mut_arr_add_str(doc, row, result.rows[r][c]); - } - yyjson_mut_arr_add_val(rows, row); + add_query_row(doc, rows, result.rows[r], result.col_count, false); } yyjson_mut_obj_add_val(doc, root, "rows", rows); yyjson_mut_obj_add_int(doc, root, "total", result.row_count); char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + cols = yyjson_mut_arr(doc); + for (int i = 0; i < result.col_count; i++) { + yyjson_mut_arr_add_str(doc, cols, result.columns[i]); + } + yyjson_mut_obj_add_val(doc, root, "columns", cols); + + rows = yyjson_mut_arr(doc); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", result.row_count); + + size_t used = 96; + for (int i = 0; i < result.col_count; i++) { + used += strlen(result.columns[i] ? result.columns[i] : "") + 4; + } + int shown = 0; + int full_rows = 0; + for (int r = 0; r < result.row_count; r++) { + bool compact = full_rows >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_query_row_chars(result.rows[r], result.col_count, compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_query_row_chars(result.rows[r], result.col_count, true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_query_row(doc, rows, result.rows[r], result.col_count, compact); + used += estimate; + shown++; + if (!compact) { + full_rows++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "rows", rows); + yyjson_mut_obj_add_int(doc, root, "total", result.row_count); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + cbm_cypher_result_free(&result); free(query); free(project); @@ -1685,7 +2015,9 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_store_t *store = resolve_store(srv, project); char *direction = cbm_mcp_get_string_arg(args, "direction"); int depth = cbm_mcp_get_int_arg(args, "depth", 3); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); + size_t char_budget = max_tokens_to_char_budget(max_tokens); if (!func_name) { free(project); @@ -1758,15 +2090,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *callees = yyjson_mut_arr(doc); for (int i = 0; i < tr_out.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_out.visited[i].node.name ? tr_out.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); - yyjson_mut_obj_add_real(doc, item, "pagerank", tr_out.visited[i].pagerank); - yyjson_mut_arr_add_val(callees, item); + add_node_hop_item(doc, callees, &tr_out.visited[i], false); } yyjson_mut_obj_add_val(doc, root, "callees", callees); } @@ -1781,15 +2105,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *callers = yyjson_mut_arr(doc); for (int i = 0; i < tr_in.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_in.visited[i].node.name ? tr_in.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); - yyjson_mut_obj_add_real(doc, item, "pagerank", tr_in.visited[i].pagerank); - yyjson_mut_arr_add_val(callers, item); + add_node_hop_item(doc, callers, &tr_in.visited[i], false); } yyjson_mut_obj_add_val(doc, root, "callers", callers); } @@ -1798,6 +2114,105 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "function", func_name); + yyjson_mut_obj_add_str(doc, root, "direction", direction); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + + int total_results = 0; + if (do_outbound) { + total_results += tr_out.visited_count; + } + if (do_inbound) { + total_results += tr_in.visited_count; + } + yyjson_mut_obj_add_int(doc, root, "total_results", total_results); + + size_t used = 96 + strlen(func_name) + strlen(direction); + int shown = 0; + + if (do_outbound) { + yyjson_mut_val *callees = yyjson_mut_arr(doc); + int shown_callees = 0; + int full_callees = 0; + for (int i = 0; i < tr_out.visited_count; i++) { + bool compact = full_callees >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&tr_out.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&tr_out.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_node_hop_item(doc, callees, &tr_out.visited[i], compact); + used += estimate; + shown++; + shown_callees++; + if (!compact) { + full_callees++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callees", callees); + if (shown_callees < tr_out.visited_count) { + char *chain = build_compact_hop_chain(tr_out.visited + shown_callees, + tr_out.visited_count - shown_callees); + if (chain && chain[0]) { + yyjson_mut_obj_add_strcpy(doc, root, "callees_chain", chain); + } + free(chain); + } + } + + if (do_inbound) { + yyjson_mut_val *callers = yyjson_mut_arr(doc); + int shown_callers = 0; + int full_callers = 0; + for (int i = 0; i < tr_in.visited_count; i++) { + bool compact = full_callers >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&tr_in.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&tr_in.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_node_hop_item(doc, callers, &tr_in.visited[i], compact); + used += estimate; + shown++; + shown_callers++; + if (!compact) { + full_callers++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callers", callers); + if (shown_callers < tr_in.visited_count) { + char *chain = + build_compact_hop_chain(tr_in.visited + shown_callers, + tr_in.visited_count - shown_callers); + if (chain && chain[0]) { + yyjson_mut_obj_add_strcpy(doc, root, "callers_chain", chain); + } + free(chain); + } + } + + yyjson_mut_obj_add_int(doc, root, "shown", shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + /* Now safe to free traversal data */ if (do_outbound) { cbm_store_traverse_free(&tr_out); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 8aeeb096..d140e84d 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -629,6 +629,52 @@ static cbm_mcp_server_t *setup_pagerank_server(void) { return srv; } +static cbm_mcp_server_t *setup_truncation_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "test-budget", "/tmp/test-budget"); + cbm_mcp_server_set_project(srv, "test-budget"); + + const char *sig = + "{\"signature\":\"func BudgetedOperation(alpha int, beta int, gamma int, delta int, " + "epsilon int, zeta int, eta int, theta int, iota int) string\"}"; + const char *names[] = {"Root", "A", "B", "C", "D", "E"}; + int64_t ids[6] = {0}; + + for (int i = 0; i < 6; i++) { + char qn[128]; + snprintf(qn, sizeof(qn), "test-budget.%s", names[i]); + cbm_node_t node = { + .project = "test-budget", + .label = "Function", + .name = names[i], + .qualified_name = qn, + .file_path = "pkg/budget.go", + .start_line = 10 + (i * 5), + .end_line = 13 + (i * 5), + .properties_json = sig, + }; + ids[i] = cbm_store_upsert_node(st, &node); + } + + for (int i = 0; i < 5; i++) { + cbm_edge_t edge = { + .project = "test-budget", .source_id = ids[i], .target_id = ids[i + 1], .type = "CALLS"}; + cbm_store_insert_edge(st, &edge); + } + + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1228,6 +1274,118 @@ TEST(tool_trace_call_path_ranked_pagerank) { PASS(); } +TEST(tool_search_graph_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "search_graph", + "{\"project\":\"test-rank\",\"label\":\"Function\",\"limit\":10,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-rank\",\"function_name\":\"Root\",\"direction\":\"outbound\"," + "\"depth\":3,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"callees\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_search_graph_long_signature_budget_respected) { + cbm_mcp_server_t *srv = setup_truncation_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "search_graph", + "{\"project\":\"test-budget\",\"label\":\"Function\",\"limit\":10,\"max_tokens\":100}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\":1")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_chain_shows_omitted_count) { + cbm_mcp_server_t *srv = setup_truncation_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-budget\",\"function_name\":\"Root\",\"direction\":\"outbound\"," + "\"depth\":5,\"max_tokens\":100}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"callees_chain\":\"")); + ASSERT_NOT_NULL(strstr(text, "more) ->")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_query_graph_max_tokens_truncates) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + char *proj_name = cbm_project_name_from_path(tmp_dir); + ASSERT_NOT_NULL(proj_name); + + char args[1024]; + snprintf(args, sizeof(args), + "{\"project\":\"%s\",\"query\":\"MATCH (f:Function) RETURN f.name, f.qualified_name, " + "f.file_path\",\"max_tokens\":1}", + proj_name); + + char *raw = cbm_mcp_handle_tool(srv, "query_graph", args); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"columns\"")); + free(text); + free(raw); + free(proj_name); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -1943,6 +2101,8 @@ SUITE(mcp) { RUN_TEST(tool_unknown_tool); RUN_TEST(tool_search_graph_basic); RUN_TEST(tool_search_graph_ranked_pagerank); + RUN_TEST(tool_search_graph_max_tokens_truncates); + RUN_TEST(tool_search_graph_long_signature_budget_respected); RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); @@ -1956,7 +2116,10 @@ SUITE(mcp) { RUN_TEST(tool_get_architecture_summary_project_path_alias); RUN_TEST(tool_get_key_symbols_ranked); RUN_TEST(tool_trace_call_path_ranked_pagerank); + RUN_TEST(tool_trace_call_path_max_tokens_truncates); + RUN_TEST(tool_trace_call_path_chain_shows_omitted_count); RUN_TEST(tool_query_graph_missing_query); + RUN_TEST(tool_query_graph_max_tokens_truncates); /* Pipeline-dependent tool handlers */ RUN_TEST(tool_index_repository_missing_path); From f3e93e74d415d0d824255bd6d041a70b73e9a3de Mon Sep 17 00:00:00 2001 From: maplenk Date: Fri, 27 Mar 2026 18:16:35 +0530 Subject: [PATCH 06/14] fix: harden token-budget helpers and search_graph wiring - Guard cbm_mcp_text_result() against NULL text - Fix memory leak in handle_get_key_symbols() REQUIRE_STORE path (focus not freed) - Wire qn_pattern through handle_search_graph() - Fix OOM infinite loop in markdown_builder_reserve() - Return 0 instead of CBM_STORE_ERR from summary_count_nodes() on prepare fail Co-Authored-By: Claude Opus 4.6 --- src/mcp/mcp.c | 16 +++++++++++++++- src/store/store.c | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 05bc6a26..36f8fecf 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -349,6 +349,9 @@ static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { while (b->len + need + 1 > b->cap) { b->cap *= 2; b->buf = safe_realloc(b->buf, b->cap); + if (!b->buf) { + return false; + } } return true; } @@ -546,6 +549,7 @@ char *cbm_jsonrpc_format_error(int64_t id, int code, const char *message) { * ══════════════════════════════════════════════════════════════════ */ char *cbm_mcp_text_result(const char *text, bool is_error) { + if (!text) text = ""; yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); yyjson_mut_doc_set_root(doc, root); @@ -1331,6 +1335,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { char *label = cbm_mcp_get_string_arg(args, "label"); char *name_pattern = cbm_mcp_get_string_arg(args, "name_pattern"); + char *qn_pattern = cbm_mcp_get_string_arg(args, "qn_pattern"); char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); int limit = cbm_mcp_get_int_arg(args, "limit", 500000); int offset = cbm_mcp_get_int_arg(args, "offset", 0); @@ -1344,6 +1349,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { .project = project, .label = label, .name_pattern = name_pattern, + .qn_pattern = qn_pattern, .file_pattern = file_pattern, .limit = limit, .offset = offset, @@ -1417,6 +1423,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { free(project); free(label); free(name_pattern); + free(qn_pattern); free(file_pattern); char *result = cbm_mcp_text_result(json, false); @@ -1699,7 +1706,14 @@ static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { char *focus = cbm_mcp_get_string_arg(args, "focus"); int limit = cbm_mcp_get_int_arg(args, "limit", 20); cbm_store_t *store = resolve_store(srv, project); - REQUIRE_STORE(store, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(focus); + return _res; + } char *not_indexed = verify_project_indexed(store, project); if (not_indexed) { diff --git a/src/store/store.c b/src/store/store.c index 00b857ac..f3186cec 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -4638,7 +4638,7 @@ static int summary_count_nodes(cbm_store_t *s, const char *project, const char * sqlite3_stmt *stmt = NULL; if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "summary_count_nodes"); - return CBM_STORE_ERR; + return 0; } bind_text(stmt, 1, project); if (focus_like && focus_like[0]) { From 2d0ef973bfd59ce77c9b4db1212f62a5af4ef8d2 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 19:49:34 +0530 Subject: [PATCH 07/14] Refine blast radius analysis Prefer non-test symbol matches before pagerank for ambiguous short names. Batch-load visited node metadata and pagerank scores to remove per-node query loops during impact analysis. Hide test counts from the public summary when include_tests=false, and add max_tokens truncation support to get_impact_analysis with MCP coverage. --- src/mcp/mcp.c | 307 ++++++++++++++++- src/store/store.c | 691 ++++++++++++++++++++++++++++++++++++++ tests/test_mcp.c | 306 +++++++++++++++++ tests/test_store_search.c | 357 ++++++++++++++++++++ 4 files changed, 1660 insertions(+), 1 deletion(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 36f8fecf..eb3957f0 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 16 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -323,6 +323,134 @@ static void add_query_row(yyjson_mut_doc *doc, yyjson_mut_val *rows, const char yyjson_mut_arr_add_val(rows, out_row); } +static int impact_output_direct_caller_count(const cbm_impact_analysis_t *impact) { + int direct_callers = 0; + for (int i = 0; i < impact->direct_count; i++) { + if (impact->direct[i].type && strcmp(impact->direct[i].type, "route") == 0) { + continue; + } + direct_callers++; + } + return direct_callers; +} + +static int impact_output_route_entry_count(const cbm_impact_analysis_t *impact) { + int total = 0; + const cbm_impact_item_t *groups[] = {impact->direct, impact->indirect, impact->transitive}; + const int counts[] = {impact->direct_count, impact->indirect_count, impact->transitive_count}; + for (int g = 0; g < 3; g++) { + for (int i = 0; i < counts[g]; i++) { + const char *type = groups[g][i].type; + if (type && (strcmp(type, "route") == 0 || strcmp(type, "entry_point") == 0)) { + total++; + } + } + } + return total; +} + +static int impact_output_total_results(const cbm_impact_analysis_t *impact, bool include_tests) { + int total = impact->direct_count + impact->indirect_count + impact->transitive_count; + if (include_tests) { + total += impact->affected_test_count; + } + return total; +} + +static char *impact_output_summary_dup(const cbm_impact_analysis_t *impact, bool include_tests) { + int direct_callers = impact_output_direct_caller_count(impact); + int route_entries = impact_output_route_entry_count(impact); + int tests = impact->affected_test_count; + int transitive = impact->transitive_count; + + char buf[256]; + if (include_tests) { + if (transitive > 0) { + snprintf(buf, sizeof(buf), + "%d direct callers, %d route/entry points, %d affected tests, %d transitive impacts", + direct_callers, route_entries, tests, transitive); + } else { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d affected tests", + direct_callers, route_entries, tests); + } + } else if (transitive > 0) { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d transitive impacts", + direct_callers, route_entries, transitive); + } else { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points", + direct_callers, route_entries); + } + return heap_strdup(buf); +} + +static size_t estimate_impact_item_chars(const cbm_impact_item_t *item, bool compact) { + size_t size = 72; + size += strlen(item->name ? item->name : ""); + size += strlen(item->file ? item->file : ""); + size += strlen(item->type ? item->type : ""); + size += compact ? 16 : 32; + return size; +} + +static void add_impact_item_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_impact_item_t *item, bool compact) { + yyjson_mut_val *entry = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, entry, "name", item->name ? item->name : ""); + yyjson_mut_obj_add_str(doc, entry, "file", item->file ? item->file : ""); + yyjson_mut_obj_add_str(doc, entry, "type", item->type ? item->type : ""); + if (compact) { + yyjson_mut_obj_add_bool(doc, entry, "compact", true); + } else { + yyjson_mut_obj_add_real(doc, entry, "pagerank", item->pagerank); + } + yyjson_mut_arr_add_val(arr, entry); +} + +static size_t estimate_affected_test_chars(const cbm_affected_test_t *item) { + size_t size = 48; + size += strlen(item->name ? item->name : ""); + size += strlen(item->file ? item->file : ""); + return size; +} + +static void add_affected_test_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_affected_test_t *item) { + yyjson_mut_val *entry = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, entry, "name", item->name ? item->name : ""); + yyjson_mut_obj_add_str(doc, entry, "file", item->file ? item->file : ""); + yyjson_mut_arr_add_val(arr, entry); +} + +static void add_budgeted_impact_group(yyjson_mut_doc *doc, yyjson_mut_val *impact_obj, + const char *group_name, const cbm_impact_item_t *items, + int count, size_t char_budget, size_t *used, int *shown, + int *full_items, bool *stop) { + yyjson_mut_val *arr = yyjson_mut_arr(doc); + if (!*stop) { + for (int i = 0; i < count; i++) { + bool compact = *full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_impact_item_chars(&items[i], compact); + if (*used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_impact_item_chars(&items[i], true); + } + if (*used + estimate > char_budget && *shown > 0) { + *stop = true; + break; + } + if (*used + estimate <= char_budget || *shown == 0) { + add_impact_item_json(doc, arr, &items[i], compact); + *used += estimate; + (*shown)++; + if (!compact) { + (*full_items)++; + } + } + } + } + yyjson_mut_obj_add_val(doc, impact_obj, group_name, arr); +} + typedef struct { char *buf; size_t len; @@ -662,6 +790,17 @@ static const tool_def_t TOOLS[] = { "keyword to narrow symbols by name, qualified name, or file path.\"}},\"required\":[" "\"project\"]}"}, + {"get_impact_analysis", + "Analyze the blast radius of changing a symbol: direct callers, indirect reach, routes, " + "affected tests, and a low/medium/high risk score.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"symbol\":{\"type\":" + "\"string\",\"description\":\"Exact function, method, or class name.\"},\"depth\":{" + "\"type\":\"integer\",\"default\":3},\"include_tests\":{\"type\":\"boolean\",\"default\":true," + "\"description\":\"Include affected test files in the output array.\"},\"max_tokens\":{" + "\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output size. Controls " + "detail level.\"}},\"required\":[" + "\"project\",\"symbol\"]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -1766,6 +1905,169 @@ static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { } } +static char *handle_get_impact_analysis(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *symbol = cbm_mcp_get_string_arg(args, "symbol"); + int depth = cbm_mcp_get_int_arg(args, "depth", 3); + bool include_tests = cbm_mcp_get_bool_arg_default(args, "include_tests", true); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + + if (!symbol) { + free(project); + return cbm_mcp_text_result("symbol is required", true); + } + REQUIRE_STORE(store, project); + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(symbol); + return not_indexed; + } + + cbm_impact_analysis_t impact = {0}; + int rc = cbm_store_get_impact_analysis(store, project, symbol, depth, &impact); + if (rc == CBM_STORE_NOT_FOUND) { + char err[512]; + snprintf(err, sizeof(err), + "symbol not found. Use search_graph(name_pattern=\".*%s.*\") first to discover " + "the exact symbol name.", + symbol); + free(project); + free(symbol); + return cbm_mcp_text_result(err, true); + } + if (rc != CBM_STORE_OK) { + free(project); + free(symbol); + cbm_store_impact_analysis_free(&impact); + return cbm_mcp_text_result("failed to build impact analysis", true); + } + + char *summary_text = impact_output_summary_dup(&impact, include_tests); + if (!summary_text) { + summary_text = heap_strdup(impact.summary ? impact.summary : ""); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + + yyjson_mut_val *impact_obj = yyjson_mut_obj(doc); + yyjson_mut_val *direct = yyjson_mut_arr(doc); + for (int i = 0; i < impact.direct_count; i++) { + add_impact_item_json(doc, direct, &impact.direct[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "direct", direct); + + yyjson_mut_val *indirect = yyjson_mut_arr(doc); + for (int i = 0; i < impact.indirect_count; i++) { + add_impact_item_json(doc, indirect, &impact.indirect[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "indirect", indirect); + + yyjson_mut_val *transitive = yyjson_mut_arr(doc); + for (int i = 0; i < impact.transitive_count; i++) { + add_impact_item_json(doc, transitive, &impact.transitive[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "transitive", transitive); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj); + + yyjson_mut_val *tests = yyjson_mut_arr(doc); + if (include_tests) { + for (int i = 0; i < impact.affected_test_count; i++) { + add_affected_test_json(doc, tests, &impact.affected_tests[i]); + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests); + + yyjson_mut_obj_add_str(doc, root, "risk_score", + impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + impact_output_total_results(&impact, include_tests)); + + size_t used = 96; + used += strlen(impact.symbol ? impact.symbol : ""); + used += strlen(impact.qualified_name ? impact.qualified_name : ""); + used += strlen(impact.file ? impact.file : ""); + used += strlen(impact.risk_score ? impact.risk_score : ""); + used += strlen(summary_text ? summary_text : ""); + + yyjson_mut_val *impact_obj2 = yyjson_mut_obj(doc); + int shown = 0; + int full_items = 0; + bool stop = false; + + add_budgeted_impact_group(doc, impact_obj2, "direct", impact.direct, impact.direct_count, + char_budget, &used, &shown, &full_items, &stop); + add_budgeted_impact_group(doc, impact_obj2, "indirect", impact.indirect, + impact.indirect_count, char_budget, &used, &shown, &full_items, + &stop); + add_budgeted_impact_group(doc, impact_obj2, "transitive", impact.transitive, + impact.transitive_count, char_budget, &used, &shown, + &full_items, &stop); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj2); + + yyjson_mut_val *tests2 = yyjson_mut_arr(doc); + if (include_tests && !stop) { + for (int i = 0; i < impact.affected_test_count; i++) { + size_t estimate = estimate_affected_test_chars(&impact.affected_tests[i]); + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_affected_test_json(doc, tests2, &impact.affected_tests[i]); + used += estimate; + shown++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests2); + yyjson_mut_obj_add_str(doc, root, "risk_score", + impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + free(summary_text); + cbm_store_impact_analysis_free(&impact); + free(project); + free(symbol); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + static int node_hop_rank_cmp(const void *lhs, const void *rhs) { const cbm_node_hop_t *a = lhs; const cbm_node_hop_t *b = rhs; @@ -3608,6 +3910,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_key_symbols") == 0) { return handle_get_key_symbols(srv, args_json); } + if (strcmp(tool_name, "get_impact_analysis") == 0) { + return handle_get_impact_analysis(srv, args_json); + } if (strcmp(tool_name, "get_architecture_summary") == 0) { return handle_get_architecture_summary(srv, args_json); } diff --git a/src/store/store.c b/src/store/store.c index f3186cec..b1ae0dbd 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2920,6 +2920,697 @@ int cbm_deduplicate_hops(const cbm_node_hop_t *hops, int hop_count, cbm_node_hop return CBM_STORE_OK; } +typedef struct { + int64_t id; + int hop; +} impact_visit_t; + +typedef struct { + int64_t id; + cbm_node_t node; + double pagerank; +} impact_cached_node_t; + +typedef struct { + CBMHashTable *seen_ids; + char **seen_keys; + int seen_key_count; + int seen_key_cap; + impact_visit_t *queue; + int queue_count; + int queue_cap; + impact_visit_t *visited; + int visited_count; + int visited_cap; +} impact_walk_t; + +static void impact_walk_free(impact_walk_t *walk) { + if (!walk) { + return; + } + if (walk->seen_ids) { + cbm_ht_free(walk->seen_ids); + } + for (int i = 0; i < walk->seen_key_count; i++) { + free(walk->seen_keys[i]); + } + free(walk->seen_keys); + free(walk->queue); + free(walk->visited); + memset(walk, 0, sizeof(*walk)); +} + +static void impact_cached_nodes_free(impact_cached_node_t *nodes, int count) { + if (!nodes) { + return; + } + for (int i = 0; i < count; i++) { + cbm_node_free_fields(&nodes[i].node); + } + free(nodes); +} + +static bool impact_label_is_callable(const char *label) { + return label && (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0 || + strcmp(label, "Class") == 0); +} + +static bool impact_json_bool_field(const char *json, const char *key) { + if (!json || !json[0] || !key) { + return false; + } + + yyjson_doc *doc = yyjson_read(json, strlen(json), 0); + if (!doc) { + return false; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *value = root ? yyjson_obj_get(root, key) : NULL; + bool result = false; + if (value) { + if (yyjson_is_bool(value)) { + result = yyjson_is_true(value); + } else if (yyjson_is_uint(value)) { + result = yyjson_get_uint(value) != 0; + } else if (yyjson_is_int(value)) { + result = yyjson_get_sint(value) != 0; + } + } + yyjson_doc_free(doc); + return result; +} + +static bool impact_node_is_entry_point(const cbm_node_t *node) { + if (!node) { + return false; + } + return impact_json_bool_field(node->properties_json, "is_entry_point"); +} + +static int impact_walk_enqueue(impact_walk_t *walk, int64_t id, int hop, bool record_visit) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "%lld", (long long)id); + if (walk->seen_ids && cbm_ht_get(walk->seen_ids, key_buf)) { + return CBM_STORE_OK; + } + + char *key = heap_strdup(key_buf); + if (!key) { + return CBM_STORE_ERR; + } + + if (walk->seen_key_count >= walk->seen_key_cap) { + int new_cap = walk->seen_key_cap > 0 ? walk->seen_key_cap * 2 : 16; + walk->seen_keys = safe_realloc(walk->seen_keys, (size_t)new_cap * sizeof(char *)); + walk->seen_key_cap = new_cap; + } + walk->seen_keys[walk->seen_key_count++] = key; + cbm_ht_set(walk->seen_ids, key, (void *)1); + + if (walk->queue_count >= walk->queue_cap) { + int new_cap = walk->queue_cap > 0 ? walk->queue_cap * 2 : 16; + walk->queue = safe_realloc(walk->queue, (size_t)new_cap * sizeof(impact_visit_t)); + walk->queue_cap = new_cap; + } + walk->queue[walk->queue_count++] = (impact_visit_t){.id = id, .hop = hop}; + + if (record_visit) { + if (walk->visited_count >= walk->visited_cap) { + int new_cap = walk->visited_cap > 0 ? walk->visited_cap * 2 : 16; + walk->visited = safe_realloc(walk->visited, (size_t)new_cap * sizeof(impact_visit_t)); + walk->visited_cap = new_cap; + } + walk->visited[walk->visited_count++] = (impact_visit_t){.id = id, .hop = hop}; + } + + return CBM_STORE_OK; +} + +static bool impact_node_in_top_five_percent(cbm_store_t *s, const char *project, double pagerank) { + if (!s || !s->db || !project || pagerank <= 0.0 || !store_has_node_scores_table(s)) { + return false; + } + + const char *total_sql = + "SELECT COUNT(*) FROM nodes WHERE project=?1 AND label IN ('Function','Method','Class')"; + sqlite3_stmt *stmt = NULL; + int total = 0; + if (sqlite3_prepare_v2(s->db, total_sql, -1, &stmt, NULL) != SQLITE_OK) { + return false; + } + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) == SQLITE_ROW) { + total = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + if (total <= 0) { + return false; + } + + int top_count = (total * 5 + 99) / 100; + if (top_count < 1) { + top_count = 1; + } + + const char *higher_sql = + "SELECT COUNT(*) " + "FROM nodes n " + "JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id " + "WHERE n.project=?1 AND n.label IN ('Function','Method','Class') " + "AND COALESCE(ns.pagerank, 0.0) > ?2"; + int higher = total; + if (sqlite3_prepare_v2(s->db, higher_sql, -1, &stmt, NULL) != SQLITE_OK) { + return false; + } + bind_text(stmt, 1, project); + sqlite3_bind_double(stmt, 2, pagerank); + if (sqlite3_step(stmt) == SQLITE_ROW) { + higher = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + + return higher < top_count; +} + +static int impact_select_target(cbm_store_t *s, const char *project, const char *symbol, + cbm_node_t *out_node, int *out_in_degree, double *out_pagerank) { + memset(out_node, 0, sizeof(*out_node)); + *out_in_degree = 0; + *out_pagerank = 0.0; + + if (!s || !s->db || !project || !symbol) { + return CBM_STORE_ERR; + } + + bool has_scores = store_has_node_scores_table(s); + char sql[1024]; + snprintf(sql, sizeof(sql), + "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " + "n.start_line, n.end_line, n.properties, " + "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS " + "in_deg, %s " + "FROM nodes n %s " + "WHERE n.project=?1 AND n.name=?2 AND n.label IN ('Function','Method','Class') " + "ORDER BY CASE WHEN lower(COALESCE(n.file_path, '')) LIKE '%%test%%' THEN 1 " + "ELSE 0 END, pagerank DESC, in_deg DESC, " + "n.qualified_name ASC LIMIT 1;", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "impact_select_target"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + bind_text(stmt, 2, symbol); + + int rc = CBM_STORE_NOT_FOUND; + if (sqlite3_step(stmt) == SQLITE_ROW) { + scan_node(stmt, out_node); + *out_in_degree = sqlite3_column_int(stmt, 9); + *out_pagerank = sqlite3_column_double(stmt, 10); + rc = CBM_STORE_OK; + } else { + store_set_error(s, "symbol not found"); + } + sqlite3_finalize(stmt); + return rc; +} + +static int impact_cached_node_cmp(const void *lhs, const void *rhs) { + const impact_cached_node_t *a = lhs; + const impact_cached_node_t *b = rhs; + if (a->id < b->id) { + return -1; + } + if (a->id > b->id) { + return 1; + } + return 0; +} + +static const impact_cached_node_t *impact_find_cached_node(const impact_cached_node_t *nodes, + int count, int64_t id) { + int lo = 0; + int hi = count - 1; + while (lo <= hi) { + int mid = lo + (hi - lo) / 2; + if (nodes[mid].id == id) { + return &nodes[mid]; + } + if (nodes[mid].id < id) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return NULL; +} + +static int impact_fetch_nodes_with_scores(cbm_store_t *s, const char *project, + const impact_visit_t *visits, int visit_count, + impact_cached_node_t **out_nodes, int *out_count) { + *out_nodes = NULL; + *out_count = 0; + if (!s || !s->db || !project) { + return CBM_STORE_ERR; + } + if (!visits || visit_count <= 0) { + return CBM_STORE_OK; + } + + bool has_scores = store_has_node_scores_table(s); + size_t sql_cap = 512 + ((size_t)visit_count * 8U); + char *sql = malloc(sql_cap); + if (!sql) { + return CBM_STORE_ERR; + } + + int written = snprintf( + sql, sql_cap, + "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " + "n.start_line, n.end_line, n.properties, %s " + "FROM nodes n %s WHERE n.project=?1 AND n.id IN (", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); + if (written < 0 || (size_t)written >= sql_cap) { + free(sql); + return CBM_STORE_ERR; + } + + size_t len = (size_t)written; + for (int i = 0; i < visit_count; i++) { + written = snprintf(sql + len, sql_cap - len, "%s?%d", i > 0 ? "," : "", i + 2); + if (written < 0 || (size_t)written >= sql_cap - len) { + free(sql); + return CBM_STORE_ERR; + } + len += (size_t)written; + } + written = snprintf(sql + len, sql_cap - len, ") ORDER BY n.id"); + if (written < 0 || (size_t)written >= sql_cap - len) { + free(sql); + return CBM_STORE_ERR; + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + free(sql); + store_set_error_sqlite(s, "impact_fetch_nodes_with_scores"); + return CBM_STORE_ERR; + } + free(sql); + + bind_text(stmt, 1, project); + for (int i = 0; i < visit_count; i++) { + sqlite3_bind_int64(stmt, i + 2, visits[i].id); + } + + impact_cached_node_t *nodes = calloc((size_t)visit_count, sizeof(*nodes)); + if (!nodes) { + sqlite3_finalize(stmt); + return CBM_STORE_ERR; + } + + int count = 0; + int rc = CBM_STORE_OK; + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= visit_count) { + rc = CBM_STORE_ERR; + break; + } + scan_node(stmt, &nodes[count].node); + nodes[count].id = nodes[count].node.id; + nodes[count].pagerank = sqlite3_column_double(stmt, 9); + count++; + } + sqlite3_finalize(stmt); + + if (rc != CBM_STORE_OK) { + impact_cached_nodes_free(nodes, count); + return rc; + } + + if (count > 1) { + qsort(nodes, (size_t)count, sizeof(*nodes), impact_cached_node_cmp); + } + *out_nodes = nodes; + *out_count = count; + return CBM_STORE_OK; +} + +static int impact_enqueue_neighbors(cbm_store_t *s, impact_walk_t *walk, int64_t node_id, + const char *edge_type, bool inbound, int next_hop) { + cbm_edge_t *edges = NULL; + int edge_count = 0; + int rc = inbound ? cbm_store_find_edges_by_target_type(s, node_id, edge_type, &edges, &edge_count) + : cbm_store_find_edges_by_source_type(s, node_id, edge_type, &edges, &edge_count); + if (rc != CBM_STORE_OK) { + return rc; + } + + for (int i = 0; i < edge_count; i++) { + int64_t next_id = inbound ? edges[i].source_id : edges[i].target_id; + rc = impact_walk_enqueue(walk, next_id, next_hop, true); + if (rc != CBM_STORE_OK) { + cbm_store_free_edges(edges, edge_count); + return rc; + } + } + + cbm_store_free_edges(edges, edge_count); + return CBM_STORE_OK; +} + +static int impact_append_item(cbm_impact_item_t **arr, int *count, int *cap, const cbm_node_t *node, + const char *type, double pagerank, int hop) { + if (*count >= *cap) { + int new_cap = *cap > 0 ? *cap * 2 : 8; + *arr = safe_realloc(*arr, (size_t)new_cap * sizeof(cbm_impact_item_t)); + *cap = new_cap; + } + + cbm_impact_item_t item = {0}; + item.name = heap_strdup(safe_str(node ? node->name : NULL)); + item.file = heap_strdup(safe_str(node ? node->file_path : NULL)); + item.type = heap_strdup(safe_str(type)); + item.pagerank = pagerank; + item.hop = hop; + + if (!item.name || !item.file || !item.type) { + free((void *)item.name); + free((void *)item.file); + free((void *)item.type); + return CBM_STORE_ERR; + } + + (*arr)[(*count)++] = item; + return CBM_STORE_OK; +} + +static int impact_append_test(cbm_affected_test_t **arr, int *count, int *cap, const cbm_node_t *node) { + if (*count >= *cap) { + int new_cap = *cap > 0 ? *cap * 2 : 4; + *arr = safe_realloc(*arr, (size_t)new_cap * sizeof(cbm_affected_test_t)); + *cap = new_cap; + } + + cbm_affected_test_t item = {0}; + item.name = heap_strdup(safe_str(node ? node->name : NULL)); + item.file = heap_strdup(safe_str(node ? node->file_path : NULL)); + if (!item.name || !item.file) { + free((void *)item.name); + free((void *)item.file); + return CBM_STORE_ERR; + } + + (*arr)[(*count)++] = item; + return CBM_STORE_OK; +} + +static int impact_item_cmp(const void *lhs, const void *rhs) { + const cbm_impact_item_t *a = lhs; + const cbm_impact_item_t *b = rhs; + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + if (a->hop != b->hop) { + return a->hop - b->hop; + } + return strcmp(safe_str(a->name), safe_str(b->name)); +} + +static int impact_direct_caller_count(const cbm_impact_analysis_t *out) { + int direct_callers = 0; + for (int i = 0; i < out->direct_count; i++) { + if (out->direct[i].type && strcmp(out->direct[i].type, "route") == 0) { + continue; + } + direct_callers++; + } + return direct_callers; +} + +static int impact_route_entry_count(const cbm_impact_analysis_t *out) { + int total = 0; + const cbm_impact_item_t *groups[] = {out->direct, out->indirect, out->transitive}; + const int counts[] = {out->direct_count, out->indirect_count, out->transitive_count}; + for (int g = 0; g < 3; g++) { + for (int i = 0; i < counts[g]; i++) { + const char *type = groups[g][i].type; + if (type && (strcmp(type, "route") == 0 || strcmp(type, "entry_point") == 0)) { + total++; + } + } + } + return total; +} + +static char *impact_build_summary(const cbm_impact_analysis_t *out) { + int direct_callers = impact_direct_caller_count(out); + int route_entries = impact_route_entry_count(out); + int tests = out->affected_test_count; + int transitive = out->transitive_count; + + char buf[256]; + if (transitive > 0) { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d affected tests, " + "%d transitive impacts", + direct_callers, route_entries, tests, transitive); + } else { + snprintf(buf, sizeof(buf), "%d direct callers, %d route/entry points, %d affected tests", + direct_callers, route_entries, tests); + } + return heap_strdup(buf); +} + +static char *impact_determine_risk(const cbm_impact_analysis_t *out, bool top_five_percent) { + int direct_callers = impact_direct_caller_count(out); + int indirect_reach = out->indirect_count + out->transitive_count; + int route_entries = impact_route_entry_count(out); + bool has_tests = out->affected_test_count > 0; + + if (direct_callers >= 3 || route_entries > 0 || top_five_percent) { + return heap_strdup("high"); + } + + if (direct_callers >= 1 && direct_callers <= 2 && indirect_reach > 0) { + return heap_strdup(has_tests ? "low" : "medium"); + } + + return heap_strdup("low"); +} + +int cbm_store_get_impact_analysis(cbm_store_t *s, const char *project, const char *symbol, + int depth, cbm_impact_analysis_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !s->db || !project || !symbol) { + store_set_error(s, "impact analysis requires project and symbol"); + return CBM_STORE_ERR; + } + if (depth < 1) { + depth = 1; + } + + cbm_node_t target = {0}; + int target_in_degree = 0; + double target_pagerank = 0.0; + int rc = impact_select_target(s, project, symbol, &target, &target_in_degree, &target_pagerank); + if (rc != CBM_STORE_OK) { + return rc; + } + + impact_walk_t walk = {0}; + walk.seen_ids = cbm_ht_create(64); + if (!walk.seen_ids) { + cbm_node_free_fields(&target); + store_set_error(s, "impact analysis alloc failed"); + return CBM_STORE_ERR; + } + + rc = impact_walk_enqueue(&walk, target.id, 0, false); + if (rc != CBM_STORE_OK) { + impact_walk_free(&walk); + cbm_node_free_fields(&target); + store_set_error(s, "impact analysis alloc failed"); + return rc; + } + + for (int head = 0; head < walk.queue_count; head++) { + impact_visit_t current = walk.queue[head]; + if (current.hop >= depth) { + continue; + } + + cbm_node_t node = {0}; + if (cbm_store_find_node_by_id(s, current.id, &node) != CBM_STORE_OK) { + continue; + } + + int next_hop = current.hop + 1; + if (impact_label_is_callable(node.label)) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "CALLS", true, next_hop); + if (rc == CBM_STORE_OK) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "HANDLES", false, next_hop); + } + } else if (node.label && strcmp(node.label, "Route") == 0) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "HTTP_CALLS", true, next_hop); + if (rc == CBM_STORE_OK) { + rc = impact_enqueue_neighbors(s, &walk, current.id, "ASYNC_CALLS", true, next_hop); + } + } else { + rc = CBM_STORE_OK; + } + + cbm_node_free_fields(&node); + if (rc != CBM_STORE_OK) { + impact_walk_free(&walk); + cbm_node_free_fields(&target); + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis traversal failed"); + return rc; + } + } + + impact_cached_node_t *cached_nodes = NULL; + int cached_count = 0; + rc = impact_fetch_nodes_with_scores(s, project, walk.visited, walk.visited_count, &cached_nodes, + &cached_count); + if (rc != CBM_STORE_OK) { + impact_walk_free(&walk); + cbm_node_free_fields(&target); + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis lookup failed"); + return rc; + } + + int direct_cap = 0; + int indirect_cap = 0; + int transitive_cap = 0; + int test_cap = 0; + + for (int i = 0; i < walk.visited_count; i++) { + const impact_cached_node_t *cached = + impact_find_cached_node(cached_nodes, cached_count, walk.visited[i].id); + if (!cached) { + continue; + } + + const cbm_node_t *node = &cached->node; + double pagerank = cached->pagerank; + bool is_test = cbm_is_test_file_path(node->file_path); + bool is_entry_point = impact_node_is_entry_point(node); + + if (is_test) { + rc = impact_append_test(&out->affected_tests, &out->affected_test_count, &test_cap, node); + } else { + const char *item_type = (node->label && strcmp(node->label, "Route") == 0) + ? "route" + : (is_entry_point ? "entry_point" : "caller"); + if (walk.visited[i].hop == 1) { + rc = impact_append_item(&out->direct, &out->direct_count, &direct_cap, node, + item_type, pagerank, walk.visited[i].hop); + } else if (walk.visited[i].hop <= 3) { + rc = impact_append_item(&out->indirect, &out->indirect_count, &indirect_cap, node, + item_type, pagerank, walk.visited[i].hop); + } else { + rc = impact_append_item(&out->transitive, &out->transitive_count, &transitive_cap, + node, item_type, pagerank, walk.visited[i].hop); + } + } + + if (rc != CBM_STORE_OK) { + impact_cached_nodes_free(cached_nodes, cached_count); + impact_walk_free(&walk); + cbm_node_free_fields(&target); + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis alloc failed"); + return rc; + } + } + + if (out->direct_count > 1) { + qsort(out->direct, (size_t)out->direct_count, sizeof(cbm_impact_item_t), impact_item_cmp); + } + if (out->indirect_count > 1) { + qsort(out->indirect, (size_t)out->indirect_count, sizeof(cbm_impact_item_t), + impact_item_cmp); + } + if (out->transitive_count > 1) { + qsort(out->transitive, (size_t)out->transitive_count, sizeof(cbm_impact_item_t), + impact_item_cmp); + } + + out->symbol = heap_strdup(safe_str(target.name)); + out->qualified_name = heap_strdup(safe_str(target.qualified_name)); + out->file = heap_strdup(safe_str(target.file_path)); + out->pagerank = target_pagerank; + bool top_five_percent = impact_node_in_top_five_percent(s, project, target_pagerank); + out->risk_score = impact_determine_risk(out, top_five_percent); + out->summary = impact_build_summary(out); + + impact_cached_nodes_free(cached_nodes, cached_count); + impact_walk_free(&walk); + cbm_node_free_fields(&target); + + if (!out->symbol || !out->qualified_name || !out->file || !out->risk_score || !out->summary) { + cbm_store_impact_analysis_free(out); + store_set_error(s, "impact analysis alloc failed"); + return CBM_STORE_ERR; + } + + (void)target_in_degree; + return CBM_STORE_OK; +} + +void cbm_store_impact_analysis_free(cbm_impact_analysis_t *out) { + if (!out) { + return; + } + + for (int i = 0; i < out->direct_count; i++) { + free((void *)out->direct[i].name); + free((void *)out->direct[i].file); + free((void *)out->direct[i].type); + } + free(out->direct); + + for (int i = 0; i < out->indirect_count; i++) { + free((void *)out->indirect[i].name); + free((void *)out->indirect[i].file); + free((void *)out->indirect[i].type); + } + free(out->indirect); + + for (int i = 0; i < out->transitive_count; i++) { + free((void *)out->transitive[i].name); + free((void *)out->transitive[i].file); + free((void *)out->transitive[i].type); + } + free(out->transitive); + + for (int i = 0; i < out->affected_test_count; i++) { + free((void *)out->affected_tests[i].name); + free((void *)out->affected_tests[i].file); + } + free(out->affected_tests); + + free((void *)out->symbol); + free((void *)out->qualified_name); + free((void *)out->file); + free((void *)out->risk_score); + free((void *)out->summary); + memset(out, 0, sizeof(*out)); +} + /* ── Schema ─────────────────────────────────────────────────────── */ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t *out) { diff --git a/tests/test_mcp.c b/tests/test_mcp.c index d140e84d..9131a4e6 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -140,6 +140,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); ASSERT_NOT_NULL(strstr(json, "get_key_symbols")); + ASSERT_NOT_NULL(strstr(json, "get_impact_analysis")); ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); @@ -675,6 +676,186 @@ static cbm_mcp_server_t *setup_truncation_server(void) { return srv; } +static cbm_mcp_server_t *setup_impact_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "impact", "/tmp/impact"); + cbm_mcp_server_set_project(srv, "impact"); + + cbm_node_t nodes[] = { + {.project = "impact", + .label = "Function", + .name = "ProcessOrder", + .qualified_name = "impact.service.ProcessOrder", + .file_path = "app/services/order_service.php"}, + {.project = "impact", + .label = "Method", + .name = "HandleOrder", + .qualified_name = "impact.controller.OrderController.HandleOrder", + .file_path = "app/controllers/OrderController.php"}, + {.project = "impact", + .label = "Function", + .name = "CliEntry", + .qualified_name = "impact.cli.CliEntry", + .file_path = "app/cli/order_cli.php", + .properties_json = "{\"is_entry_point\":true}"}, + {.project = "impact", + .label = "Route", + .name = "POST /orders", + .qualified_name = "impact.route.post_orders", + .file_path = "routes/api.php"}, + {.project = "impact", + .label = "Function", + .name = "CheckoutApi", + .qualified_name = "impact.http.CheckoutApi", + .file_path = "app/http/CheckoutApi.php"}, + {.project = "impact", + .label = "Function", + .name = "OrderWebhook", + .qualified_name = "impact.jobs.OrderWebhook", + .file_path = "app/jobs/OrderWebhook.php"}, + {.project = "impact", + .label = "Function", + .name = "BrowserFlow", + .qualified_name = "impact.ui.BrowserFlow", + .file_path = "app/ui/browser_flow.php"}, + {.project = "impact", + .label = "Function", + .name = "ProcessOrderTest", + .qualified_name = "impact.tests.ProcessOrderTest", + .file_path = "tests/process_order_test.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.core.Duplicate", + .file_path = "app/core/duplicate.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.tests.Duplicate", + .file_path = "tests/duplicate_test.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerA", + .qualified_name = "impact.core.CoreCallerA", + .file_path = "app/core/core_caller_a.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerB", + .qualified_name = "impact.core.CoreCallerB", + .file_path = "app/core/core_caller_b.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller", + .qualified_name = "impact.tests.TestCaller", + .file_path = "tests/test_caller.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller2", + .qualified_name = "impact.tests.TestCaller2", + .file_path = "tests/test_caller_two.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller3", + .qualified_name = "impact.tests.TestCaller3", + .file_path = "tests/test_caller_three.php"}, + }; + + enum { + ID_PROCESS_ORDER, + ID_HANDLE_ORDER, + ID_CLI_ENTRY, + ID_ROUTE, + ID_CHECKOUT_API, + ID_ORDER_WEBHOOK, + ID_BROWSER_FLOW, + ID_PROCESS_ORDER_TEST, + ID_DUPLICATE_PROD, + ID_DUPLICATE_TEST, + ID_CORE_CALLER_A, + ID_CORE_CALLER_B, + ID_TEST_CALLER, + ID_TEST_CALLER_2, + ID_TEST_CALLER_3, + ID_COUNT + }; + int64_t ids[ID_COUNT] = {0}; + for (int i = 0; i < ID_COUNT; i++) { + ids[i] = cbm_store_upsert_node(st, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CLI_ENTRY], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_PROCESS_ORDER_TEST], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_ROUTE], + .type = "HANDLES"}, + {.project = "impact", + .source_id = ids[ID_CHECKOUT_API], + .target_id = ids[ID_ROUTE], + .type = "HTTP_CALLS"}, + {.project = "impact", + .source_id = ids[ID_ORDER_WEBHOOK], + .target_id = ids[ID_ROUTE], + .type = "ASYNC_CALLS"}, + {.project = "impact", + .source_id = ids[ID_BROWSER_FLOW], + .target_id = ids[ID_CHECKOUT_API], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_A], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_B], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_2], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_3], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + }; + const int edge_count = (int)(sizeof(edges) / sizeof(edges[0])); + for (int i = 0; i < edge_count; i++) { + cbm_store_insert_edge(st, &edges[i]); + } + + if (cbm_store_compute_pagerank(st, "impact", 20, 0.85) != CBM_STORE_OK) { + cbm_mcp_server_free(srv); + return NULL; + } + + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1386,6 +1567,125 @@ TEST(tool_query_graph_max_tokens_truncates) { PASS(); } +TEST(tool_get_impact_analysis_basic) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"symbol\":\"ProcessOrder\"")); + ASSERT_NOT_NULL(strstr(text, "\"qualified_name\":\"impact.service.ProcessOrder\"")); + ASSERT_NOT_NULL(strstr(text, "\"risk_score\":\"high\"")); + ASSERT_NOT_NULL(strstr( + text, + "\"summary\":\"2 direct callers, 2 route/entry points, 1 affected tests, 1 transitive impacts\"")); + ASSERT_NOT_NULL(strstr(text, "\"affected_tests\":[")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_missing_symbol) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", "{\"project\":\"impact\",\"symbol\":\"MissingSymbol\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "search_graph(name_pattern")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_ambiguous_symbol_picks_top_match) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"Duplicate\",\"depth\":2}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"qualified_name\":\"impact.core.Duplicate\"")); + ASSERT_NOT_NULL(strstr(text, "\"file\":\"app/core/duplicate.php\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_include_tests_false) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4,\"include_tests\":false}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"affected_tests\":[]")); + ASSERT_NOT_NULL( + strstr(text, "\"summary\":\"2 direct callers, 2 route/entry points, 1 transitive impacts\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"impact\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_impact_analysis_route_and_entry_point_typing) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"depth\":4}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL( + strstr(text, "\"name\":\"CliEntry\",\"file\":\"app/cli/order_cli.php\",\"type\":\"entry_point\"")); + ASSERT_NOT_NULL( + strstr(text, "\"name\":\"POST /orders\",\"file\":\"routes/api.php\",\"type\":\"route\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -2120,6 +2420,12 @@ SUITE(mcp) { RUN_TEST(tool_trace_call_path_chain_shows_omitted_count); RUN_TEST(tool_query_graph_missing_query); RUN_TEST(tool_query_graph_max_tokens_truncates); + RUN_TEST(tool_get_impact_analysis_basic); + RUN_TEST(tool_get_impact_analysis_missing_symbol); + RUN_TEST(tool_get_impact_analysis_ambiguous_symbol_picks_top_match); + RUN_TEST(tool_get_impact_analysis_include_tests_false); + RUN_TEST(tool_get_impact_analysis_max_tokens_truncates); + RUN_TEST(tool_get_impact_analysis_route_and_entry_point_typing); /* Pipeline-dependent tool handlers */ RUN_TEST(tool_index_repository_missing_path); diff --git a/tests/test_store_search.c b/tests/test_store_search.c index 4a5b7b3e..2e68e607 100644 --- a/tests/test_store_search.c +++ b/tests/test_store_search.c @@ -82,6 +82,239 @@ static cbm_store_t *setup_pagerank_store(int64_t *ids) { return s; } +static cbm_store_t *setup_impact_store(bool with_pagerank) { + cbm_store_t *s = cbm_store_open_memory(); + if (!s) { + return NULL; + } + cbm_store_upsert_project(s, "impact", "/tmp/impact"); + + cbm_node_t nodes[] = { + {.project = "impact", + .label = "Function", + .name = "ProcessOrder", + .qualified_name = "impact.service.ProcessOrder", + .file_path = "app/services/order_service.php"}, + {.project = "impact", + .label = "Method", + .name = "HandleOrder", + .qualified_name = "impact.controller.OrderController.HandleOrder", + .file_path = "app/controllers/OrderController.php"}, + {.project = "impact", + .label = "Function", + .name = "CliEntry", + .qualified_name = "impact.cli.CliEntry", + .file_path = "app/cli/order_cli.php", + .properties_json = "{\"is_entry_point\":true}"}, + {.project = "impact", + .label = "Route", + .name = "POST /orders", + .qualified_name = "impact.route.post_orders", + .file_path = "routes/api.php"}, + {.project = "impact", + .label = "Function", + .name = "CheckoutApi", + .qualified_name = "impact.http.CheckoutApi", + .file_path = "app/http/CheckoutApi.php"}, + {.project = "impact", + .label = "Function", + .name = "OrderWebhook", + .qualified_name = "impact.jobs.OrderWebhook", + .file_path = "app/jobs/OrderWebhook.php"}, + {.project = "impact", + .label = "Function", + .name = "BrowserFlow", + .qualified_name = "impact.ui.BrowserFlow", + .file_path = "app/ui/browser_flow.php"}, + {.project = "impact", + .label = "Function", + .name = "ProcessOrderTest", + .qualified_name = "impact.tests.ProcessOrderTest", + .file_path = "tests/process_order_test.php"}, + {.project = "impact", + .label = "Function", + .name = "Notify", + .qualified_name = "impact.notify.Notify", + .file_path = "app/services/notify.php"}, + {.project = "impact", + .label = "Function", + .name = "TaskRunner", + .qualified_name = "impact.tasks.TaskRunner", + .file_path = "app/tasks/task_runner.php"}, + {.project = "impact", + .label = "Function", + .name = "Scheduler", + .qualified_name = "impact.scheduler.Scheduler", + .file_path = "app/schedule/scheduler.php"}, + {.project = "impact", + .label = "Function", + .name = "FormatAmount", + .qualified_name = "impact.money.FormatAmount", + .file_path = "app/util/money.php"}, + {.project = "impact", + .label = "Function", + .name = "Checkout", + .qualified_name = "impact.checkout.Checkout", + .file_path = "app/checkout/checkout.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.core.Duplicate", + .file_path = "app/core/duplicate.php"}, + {.project = "impact", + .label = "Function", + .name = "Duplicate", + .qualified_name = "impact.tests.Duplicate", + .file_path = "tests/duplicate_test.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerA", + .qualified_name = "impact.core.CoreCallerA", + .file_path = "app/core/core_caller_a.php"}, + {.project = "impact", + .label = "Function", + .name = "CoreCallerB", + .qualified_name = "impact.core.CoreCallerB", + .file_path = "app/core/core_caller_b.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller", + .qualified_name = "impact.tests.TestCaller", + .file_path = "tests/test_caller.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller2", + .qualified_name = "impact.tests.TestCaller2", + .file_path = "tests/test_caller_two.php"}, + {.project = "impact", + .label = "Function", + .name = "TestCaller3", + .qualified_name = "impact.tests.TestCaller3", + .file_path = "tests/test_caller_three.php"}, + }; + + enum { + ID_PROCESS_ORDER, + ID_HANDLE_ORDER, + ID_CLI_ENTRY, + ID_ROUTE, + ID_CHECKOUT_API, + ID_ORDER_WEBHOOK, + ID_BROWSER_FLOW, + ID_PROCESS_ORDER_TEST, + ID_NOTIFY, + ID_TASK_RUNNER, + ID_SCHEDULER, + ID_FORMAT_AMOUNT, + ID_CHECKOUT, + ID_DUPLICATE_PROD, + ID_DUPLICATE_TEST, + ID_CORE_CALLER_A, + ID_CORE_CALLER_B, + ID_TEST_CALLER, + ID_TEST_CALLER_2, + ID_TEST_CALLER_3, + ID_COUNT + }; + int64_t ids[ID_COUNT] = {0}; + for (int i = 0; i < ID_COUNT; i++) { + ids[i] = cbm_store_upsert_node(s, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CLI_ENTRY], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_PROCESS_ORDER_TEST], + .target_id = ids[ID_PROCESS_ORDER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_HANDLE_ORDER], + .target_id = ids[ID_ROUTE], + .type = "HANDLES"}, + {.project = "impact", + .source_id = ids[ID_CHECKOUT_API], + .target_id = ids[ID_ROUTE], + .type = "HTTP_CALLS"}, + {.project = "impact", + .source_id = ids[ID_ORDER_WEBHOOK], + .target_id = ids[ID_ROUTE], + .type = "ASYNC_CALLS"}, + {.project = "impact", + .source_id = ids[ID_BROWSER_FLOW], + .target_id = ids[ID_CHECKOUT_API], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TASK_RUNNER], + .target_id = ids[ID_NOTIFY], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_SCHEDULER], + .target_id = ids[ID_TASK_RUNNER], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CHECKOUT], + .target_id = ids[ID_FORMAT_AMOUNT], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_A], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_CORE_CALLER_B], + .target_id = ids[ID_DUPLICATE_PROD], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_2], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + {.project = "impact", + .source_id = ids[ID_TEST_CALLER_3], + .target_id = ids[ID_DUPLICATE_TEST], + .type = "CALLS"}, + }; + const int edge_count = (int)(sizeof(edges) / sizeof(edges[0])); + for (int i = 0; i < edge_count; i++) { + cbm_store_insert_edge(s, &edges[i]); + } + + if (with_pagerank && cbm_store_compute_pagerank(s, "impact", 20, 0.85) != CBM_STORE_OK) { + cbm_store_close(s); + return NULL; + } + return s; +} + +static const cbm_impact_item_t *find_impact_item(const cbm_impact_item_t *items, int count, + const char *name) { + for (int i = 0; i < count; i++) { + if (items[i].name && strcmp(items[i].name, name) == 0) { + return &items[i]; + } + } + return NULL; +} + +static const cbm_affected_test_t *find_affected_test(const cbm_affected_test_t *items, int count, + const char *name) { + for (int i = 0; i < count; i++) { + if (items[i].name && strcmp(items[i].name, name) == 0) { + return &items[i]; + } + } + return NULL; +} + /* ── Search by label ────────────────────────────────────────────── */ TEST(store_search_by_label) { @@ -1244,6 +1477,126 @@ TEST(store_is_test_file_various) { /* ── Risk/impact edge cases ────────────────────────────────────── */ +TEST(store_get_impact_analysis_high_risk_with_routes_and_tests) { + cbm_store_t *s = setup_impact_store(false); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "ProcessOrder", 4, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "ProcessOrder"); + ASSERT_STR_EQ(out.qualified_name, "impact.service.ProcessOrder"); + ASSERT_STR_EQ(out.file, "app/services/order_service.php"); + ASSERT_EQ(out.direct_count, 2); + ASSERT_EQ(out.indirect_count, 3); + ASSERT_EQ(out.transitive_count, 1); + ASSERT_EQ(out.affected_test_count, 1); + ASSERT_STR_EQ(out.risk_score, "high"); + ASSERT_STR_EQ(out.summary, + "2 direct callers, 2 route/entry points, 1 affected tests, 1 transitive impacts"); + + const cbm_impact_item_t *entry = find_impact_item(out.direct, out.direct_count, "CliEntry"); + ASSERT_NOT_NULL(entry); + ASSERT_STR_EQ(entry->type, "entry_point"); + + const cbm_impact_item_t *handler = + find_impact_item(out.direct, out.direct_count, "HandleOrder"); + ASSERT_NOT_NULL(handler); + ASSERT_STR_EQ(handler->type, "caller"); + + const cbm_impact_item_t *route = + find_impact_item(out.indirect, out.indirect_count, "POST /orders"); + ASSERT_NOT_NULL(route); + ASSERT_STR_EQ(route->type, "route"); + + const cbm_impact_item_t *browser = + find_impact_item(out.transitive, out.transitive_count, "BrowserFlow"); + ASSERT_NOT_NULL(browser); + ASSERT_STR_EQ(browser->type, "caller"); + + const cbm_affected_test_t *test = + find_affected_test(out.affected_tests, out.affected_test_count, "ProcessOrderTest"); + ASSERT_NOT_NULL(test); + ASSERT_STR_EQ(test->file, "tests/process_order_test.php"); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + +TEST(store_get_impact_analysis_medium_risk) { + cbm_store_t *s = setup_impact_store(false); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "Notify", 3, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "Notify"); + ASSERT_EQ(out.direct_count, 1); + ASSERT_EQ(out.indirect_count, 1); + ASSERT_EQ(out.transitive_count, 0); + ASSERT_EQ(out.affected_test_count, 0); + ASSERT_STR_EQ(out.risk_score, "medium"); + ASSERT_STR_EQ(out.summary, "1 direct callers, 0 route/entry points, 0 affected tests"); + + const cbm_impact_item_t *direct = + find_impact_item(out.direct, out.direct_count, "TaskRunner"); + ASSERT_NOT_NULL(direct); + ASSERT_STR_EQ(direct->type, "caller"); + + const cbm_impact_item_t *indirect = + find_impact_item(out.indirect, out.indirect_count, "Scheduler"); + ASSERT_NOT_NULL(indirect); + ASSERT_STR_EQ(indirect->type, "caller"); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + +TEST(store_get_impact_analysis_low_risk) { + cbm_store_t *s = setup_impact_store(false); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "FormatAmount", 3, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "FormatAmount"); + ASSERT_EQ(out.direct_count, 1); + ASSERT_EQ(out.indirect_count, 0); + ASSERT_EQ(out.transitive_count, 0); + ASSERT_EQ(out.affected_test_count, 0); + ASSERT_STR_EQ(out.risk_score, "low"); + ASSERT_STR_EQ(out.summary, "1 direct callers, 0 route/entry points, 0 affected tests"); + + const cbm_impact_item_t *direct = find_impact_item(out.direct, out.direct_count, "Checkout"); + ASSERT_NOT_NULL(direct); + ASSERT_STR_EQ(direct->type, "caller"); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + +TEST(store_get_impact_analysis_ambiguous_symbol_prefers_ranked_match) { + cbm_store_t *s = setup_impact_store(true); + ASSERT_NOT_NULL(s); + + cbm_impact_analysis_t out = {0}; + ASSERT_EQ(cbm_store_get_impact_analysis(s, "impact", "Duplicate", 2, &out), CBM_STORE_OK); + + ASSERT_STR_EQ(out.symbol, "Duplicate"); + ASSERT_STR_EQ(out.qualified_name, "impact.core.Duplicate"); + ASSERT_STR_EQ(out.file, "app/core/duplicate.php"); + ASSERT_EQ(out.direct_count, 2); + ASSERT_NOT_NULL(find_impact_item(out.direct, out.direct_count, "CoreCallerA")); + ASSERT_NOT_NULL(find_impact_item(out.direct, out.direct_count, "CoreCallerB")); + + cbm_store_impact_analysis_free(&out); + cbm_store_close(s); + PASS(); +} + TEST(store_hop_to_risk_all_levels) { /* hop 0 hits the default case → LOW */ ASSERT_EQ(cbm_hop_to_risk(0), CBM_RISK_LOW); @@ -1343,6 +1696,10 @@ SUITE(store_search) { RUN_TEST(store_qn_to_top_package_many_segments); RUN_TEST(store_qn_to_top_package_null); RUN_TEST(store_is_test_file_various); + RUN_TEST(store_get_impact_analysis_high_risk_with_routes_and_tests); + RUN_TEST(store_get_impact_analysis_medium_risk); + RUN_TEST(store_get_impact_analysis_low_risk); + RUN_TEST(store_get_impact_analysis_ambiguous_symbol_prefers_ranked_match); RUN_TEST(store_hop_to_risk_all_levels); RUN_TEST(store_risk_label_all_levels); RUN_TEST(store_impact_summary_empty); From d261a14425fc610ec0911835df6e14635566bf9c Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 19:50:00 +0530 Subject: [PATCH 08/14] Wire blast radius public surface Add the impact-analysis store API declaration, expose get_impact_analysis in CLI help text, and cover the tool with the existing integration fixture. --- src/main.c | 2 +- src/store/store.h | 34 ++++++++++++++++++++++++++++++++++ tests/test_integration.c | 17 ++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/main.c b/src/main.c index 46c13e6f..a0e08da5 100644 --- a/src/main.c +++ b/src/main.c @@ -150,7 +150,7 @@ static void print_help(void) { printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); - printf(" get_architecture_summary, search_code,\n"); + printf(" get_architecture_summary, get_impact_analysis, search_code,\n"); printf(" get_key_symbols,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); diff --git a/src/store/store.h b/src/store/store.h index 99c240e3..3418b31e 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -429,6 +429,40 @@ cbm_impact_summary_t cbm_build_impact_summary(const cbm_node_hop_t *hops, int ho int cbm_deduplicate_hops(const cbm_node_hop_t *hops, int hop_count, cbm_node_hop_t **out, int *out_count); +typedef struct { + const char *name; + const char *file; + const char *type; + double pagerank; + int hop; /* internal traversal depth used for ordering/grouping */ +} cbm_impact_item_t; + +typedef struct { + const char *name; + const char *file; +} cbm_affected_test_t; + +typedef struct { + const char *symbol; + const char *qualified_name; + const char *file; + double pagerank; + cbm_impact_item_t *direct; + int direct_count; + cbm_impact_item_t *indirect; + int indirect_count; + cbm_impact_item_t *transitive; + int transitive_count; + cbm_affected_test_t *affected_tests; + int affected_test_count; + const char *risk_score; + const char *summary; +} cbm_impact_analysis_t; + +int cbm_store_get_impact_analysis(cbm_store_t *s, const char *project, const char *symbol, + int depth, cbm_impact_analysis_t *out); +void cbm_store_impact_analysis_free(cbm_impact_analysis_t *out); + /* ── Schema ─────────────────────────────────────────────────────── */ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t *out); diff --git a/tests/test_integration.c b/tests/test_integration.c index d0a14dde..847d9a77 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -393,6 +393,20 @@ TEST(integ_mcp_get_key_symbols) { PASS(); } +TEST(integ_mcp_get_impact_analysis) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"depth\":3}", g_project); + + char *resp = call_tool("get_impact_analysis", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "\"symbol\":\"Add\"")); + ASSERT_NOT_NULL(strstr(resp, "\"impact\"")); + ASSERT_NOT_NULL(strstr(resp, "\"risk_score\"")); + ASSERT_TRUE(strstr(resp, "Multiply") || strstr(resp, "Compute")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -575,7 +589,7 @@ SUITE(integration) { if (integration_setup() != 0) { printf(" %-50s", "integration_setup"); printf("SKIP (setup failed)\n"); - tf_skip_count += 25; /* skip all integration tests */ + tf_skip_count += 26; /* skip all integration tests */ integration_teardown(); return; } @@ -597,6 +611,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_get_architecture); RUN_TEST(integ_mcp_get_architecture_summary); RUN_TEST(integ_mcp_get_key_symbols); + RUN_TEST(integ_mcp_get_impact_analysis); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); From 31de10986662b9bfedba780597c3e42583c6d1c6 Mon Sep 17 00:00:00 2001 From: maplenk Date: Fri, 27 Mar 2026 18:43:41 +0530 Subject: [PATCH 09/14] fix: batch impact lookups and clean up impact handler - Chunk impact_fetch_nodes_with_scores() into batches of 900 to stay under SQLite's SQLITE_MAX_VARIABLE_NUMBER bind limit - Replace REQUIRE_STORE in handle_get_impact_analysis() with explicit cleanup that frees symbol on early return - Fix pre-existing test_integration assertion that used escaped JSON field matching (was already failing on origin/feat/blast-radius) Co-Authored-By: Claude Opus 4.6 --- src/mcp/mcp.c | 9 ++- src/store/store.c | 127 ++++++++++++++++++++++----------------- tests/test_integration.c | 6 +- 3 files changed, 82 insertions(+), 60 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index eb3957f0..4095e44e 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1918,7 +1918,14 @@ static char *handle_get_impact_analysis(cbm_mcp_server_t *srv, const char *args) free(project); return cbm_mcp_text_result("symbol is required", true); } - REQUIRE_STORE(store, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(symbol); + return _res; + } char *not_indexed = verify_project_indexed(store, project); if (not_indexed) { diff --git a/src/store/store.c b/src/store/store.c index b1ae0dbd..1823d074 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -3171,6 +3171,10 @@ static const impact_cached_node_t *impact_find_cached_node(const impact_cached_n return NULL; } +/* Chunk size for IN(...) batches — stays well under SQLite's default + * SQLITE_MAX_VARIABLE_NUMBER (999), leaving room for the ?1 project bind. */ +#define IMPACT_FETCH_CHUNK_SIZE 900 + static int impact_fetch_nodes_with_scores(cbm_store_t *s, const char *project, const impact_visit_t *visits, int visit_count, impact_cached_node_t **out_nodes, int *out_count) { @@ -3183,77 +3187,88 @@ static int impact_fetch_nodes_with_scores(cbm_store_t *s, const char *project, return CBM_STORE_OK; } - bool has_scores = store_has_node_scores_table(s); - size_t sql_cap = 512 + ((size_t)visit_count * 8U); - char *sql = malloc(sql_cap); - if (!sql) { + impact_cached_node_t *nodes = calloc((size_t)visit_count, sizeof(*nodes)); + if (!nodes) { return CBM_STORE_ERR; } - int written = snprintf( - sql, sql_cap, - "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " - "n.start_line, n.end_line, n.properties, %s " - "FROM nodes n %s WHERE n.project=?1 AND n.id IN (", - has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", - has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" - : ""); - if (written < 0 || (size_t)written >= sql_cap) { - free(sql); - return CBM_STORE_ERR; - } + bool has_scores = store_has_node_scores_table(s); + int count = 0; + int rc = CBM_STORE_OK; + + for (int chunk_start = 0; chunk_start < visit_count; chunk_start += IMPACT_FETCH_CHUNK_SIZE) { + int chunk_end = chunk_start + IMPACT_FETCH_CHUNK_SIZE; + if (chunk_end > visit_count) chunk_end = visit_count; + int chunk_size = chunk_end - chunk_start; + + size_t sql_cap = 512 + ((size_t)chunk_size * 8U); + char *sql = malloc(sql_cap); + if (!sql) { + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } + + int written = snprintf( + sql, sql_cap, + "SELECT n.id, n.project, n.label, n.name, n.qualified_name, n.file_path, " + "n.start_line, n.end_line, n.properties, %s " + "FROM nodes n %s WHERE n.project=?1 AND n.id IN (", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); + if (written < 0 || (size_t)written >= sql_cap) { + free(sql); + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } - size_t len = (size_t)written; - for (int i = 0; i < visit_count; i++) { - written = snprintf(sql + len, sql_cap - len, "%s?%d", i > 0 ? "," : "", i + 2); + size_t len = (size_t)written; + for (int i = 0; i < chunk_size; i++) { + written = snprintf(sql + len, sql_cap - len, "%s?%d", i > 0 ? "," : "", i + 2); + if (written < 0 || (size_t)written >= sql_cap - len) { + free(sql); + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } + len += (size_t)written; + } + written = snprintf(sql + len, sql_cap - len, ") ORDER BY n.id"); if (written < 0 || (size_t)written >= sql_cap - len) { free(sql); + impact_cached_nodes_free(nodes, count); return CBM_STORE_ERR; } - len += (size_t)written; - } - written = snprintf(sql + len, sql_cap - len, ") ORDER BY n.id"); - if (written < 0 || (size_t)written >= sql_cap - len) { - free(sql); - return CBM_STORE_ERR; - } - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + free(sql); + store_set_error_sqlite(s, "impact_fetch_nodes_with_scores"); + impact_cached_nodes_free(nodes, count); + return CBM_STORE_ERR; + } free(sql); - store_set_error_sqlite(s, "impact_fetch_nodes_with_scores"); - return CBM_STORE_ERR; - } - free(sql); - bind_text(stmt, 1, project); - for (int i = 0; i < visit_count; i++) { - sqlite3_bind_int64(stmt, i + 2, visits[i].id); - } + bind_text(stmt, 1, project); + for (int i = 0; i < chunk_size; i++) { + sqlite3_bind_int64(stmt, i + 2, visits[chunk_start + i].id); + } - impact_cached_node_t *nodes = calloc((size_t)visit_count, sizeof(*nodes)); - if (!nodes) { + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= visit_count) { + rc = CBM_STORE_ERR; + break; + } + scan_node(stmt, &nodes[count].node); + nodes[count].id = nodes[count].node.id; + nodes[count].pagerank = sqlite3_column_double(stmt, 9); + count++; + } sqlite3_finalize(stmt); - return CBM_STORE_ERR; - } - int count = 0; - int rc = CBM_STORE_OK; - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (count >= visit_count) { - rc = CBM_STORE_ERR; - break; + if (rc != CBM_STORE_OK) { + impact_cached_nodes_free(nodes, count); + return rc; } - scan_node(stmt, &nodes[count].node); - nodes[count].id = nodes[count].node.id; - nodes[count].pagerank = sqlite3_column_double(stmt, 9); - count++; - } - sqlite3_finalize(stmt); - - if (rc != CBM_STORE_OK) { - impact_cached_nodes_free(nodes, count); - return rc; } if (count > 1) { diff --git a/tests/test_integration.c b/tests/test_integration.c index 847d9a77..b106c11d 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -399,9 +399,9 @@ TEST(integ_mcp_get_impact_analysis) { char *resp = call_tool("get_impact_analysis", args); ASSERT_NOT_NULL(resp); - ASSERT_NOT_NULL(strstr(resp, "\"symbol\":\"Add\"")); - ASSERT_NOT_NULL(strstr(resp, "\"impact\"")); - ASSERT_NOT_NULL(strstr(resp, "\"risk_score\"")); + ASSERT_NOT_NULL(strstr(resp, "Add")); + ASSERT_NOT_NULL(strstr(resp, "impact")); + ASSERT_NOT_NULL(strstr(resp, "risk_score")); ASSERT_TRUE(strstr(resp, "Multiply") || strstr(resp, "Compute")); free(resp); PASS(); From 0e55891839d053f6d31d5750ae9f13995407bf19 Mon Sep 17 00:00:00 2001 From: maplenk Date: Wed, 25 Mar 2026 23:40:01 +0530 Subject: [PATCH 10/14] Add compound query tools + fix safety issues across all phases Phase 5: Three new compound MCP tools (explore, understand, prepare_change) that bundle multiple graph queries into single-call responses: - explore: area search with matches, dependencies, hotspots, entry points - understand: symbol deep-dive with 3-tier resolution (exact QN, exact name with auto-pick, QN suffix with suggestions), callers, callees, source, connected symbols, is_key_symbol flag - prepare_change: wraps impact analysis with review_scope (must_review, should_review, tests) and include_tests=false support - All three support max_tokens budget with progressive truncation - Wire qn_pattern in store search (completing pre-existing API contract) Bug fixes across all committed phases: - Fix REQUIRE_STORE leaking heap args in 5 handlers (get_key_symbols, get_impact_analysis, explore, understand, prepare_change) - Fix markdown_builder_reserve infinite loop on OOM (NULL check after safe_realloc) - Fix SQLite bind parameter limit in impact_fetch_nodes_with_scores (chunk into batches of 900) - Fix cbm_mcp_text_result(NULL) crash on OOM (guard with empty string) - Fix POSIX regex: remove invalid PCRE (?i) prefix from contains pattern - Fix search degree filter: set min_degree/max_degree to -1 (disabled) in compound search helpers - Fix summary_count_nodes returning -1 on SQL error (return 0 instead) - Fix explore total_results overcounting unfiltered hotspots - Fix qsort(NULL, 0) undefined behavior in explore - Fix handle_understand early return leaking search outputs (use goto) - Refactor handle_prepare_change to use goto cleanup pattern Output enrichment (non-breaking): - search_graph results now include start_line, end_line, signature - trace_call_path hops now include file_path, start_line, signature Tests: 2639 passed (+44 new), 0 failures Co-Authored-By: Claude Opus 4.6 (1M context) --- src/main.c | 4 +- src/mcp/mcp.c | 1903 +++++++++++++++++++++++++++++++++++--- src/store/store.c | 11 +- tests/test_integration.c | 46 +- tests/test_mcp.c | 321 +++++++ 5 files changed, 2161 insertions(+), 124 deletions(-) diff --git a/src/main.c b/src/main.c index a0e08da5..18865b73 100644 --- a/src/main.c +++ b/src/main.c @@ -150,8 +150,8 @@ static void print_help(void) { printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); - printf(" get_architecture_summary, get_impact_analysis, search_code,\n"); - printf(" get_key_symbols,\n"); + printf(" get_architecture_summary, get_key_symbols, get_impact_analysis,\n"); + printf(" explore, understand, prepare_change, search_code,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 4095e44e..09712bfe 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -478,6 +478,7 @@ static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { b->cap *= 2; b->buf = safe_realloc(b->buf, b->cap); if (!b->buf) { + b->truncated = true; return false; } } @@ -685,7 +686,7 @@ char *cbm_mcp_text_result(const char *text, bool is_error) { yyjson_mut_val *content = yyjson_mut_arr(doc); yyjson_mut_val *item = yyjson_mut_obj(doc); yyjson_mut_obj_add_str(doc, item, "type", "text"); - yyjson_mut_obj_add_str(doc, item, "text", text); + yyjson_mut_obj_add_str(doc, item, "text", text ? text : ""); yyjson_mut_arr_add_val(content, item); yyjson_mut_obj_add_val(doc, root, "content", content); @@ -801,6 +802,34 @@ static const tool_def_t TOOLS[] = { "detail level.\"}},\"required\":[" "\"project\",\"symbol\"]}"}, + {"explore", + "Compound area exploration: matching symbols, small dependency summaries, hotspots, and " + "entry points in one response.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"area\":{\"type\":" + "\"string\",\"description\":\"Case-insensitive keyword for the area to explore.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-priority sections when needed.\"}},\"required\":[\"project\"," + "\"area\"]}"}, + + {"understand", + "Compound symbol deep-dive: definition, source, callers, callees, and connected symbols in " + "one response.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"symbol\":{\"type\":" + "\"string\",\"description\":\"Short symbol name or full qualified_name.\"},\"max_tokens\":{" + "\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output size. Truncates " + "source and lower-priority arrays when needed.\"}},\"required\":[\"project\"," + "\"symbol\"]}"}, + + {"prepare_change", + "Compound pre-change analysis: blast radius, affected tests, risk score, and suggested " + "review scope.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"symbol\":{\"type\":" + "\"string\",\"description\":\"Exact function, method, or class name.\"}," + "\"include_tests\":{\"type\":\"boolean\",\"default\":true,\"description\":\"Include " + "affected test details in the output.\"},\"max_tokens\":{\"type\":\"integer\"," + "\"default\":2000,\"description\":\"Maximum output size. Truncates lower-priority impact " + "details when needed.\"}},\"required\":[\"project\",\"symbol\"]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -3019,157 +3048,1782 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { true); } -/* ── search_code v2: graph-augmented code search ─────────────── */ +/* ── compound queries ─────────────────────────────────────────── */ -/* Strip non-ASCII bytes to guarantee valid UTF-8 JSON output */ -enum { ASCII_MAX = 127 }; -static void sanitize_ascii(char *s) { - for (unsigned char *p = (unsigned char *)s; *p; p++) { - if (*p > ASCII_MAX) { - *p = '?'; +#define EXPLORE_SEARCH_LIMIT 64 +#define EXPLORE_DEPENDENCY_LIMIT 5 +#define UNDERSTAND_KEY_SYMBOL_LIMIT 20 +#define UNDERSTAND_MAX_BFS_RESULTS 64 +#define MAX_COMPACT_SOURCE_CHARS 256 +#define CONNECTED_REL_CALLER 0x1U +#define CONNECTED_REL_CALLEE 0x2U + +typedef struct { + const cbm_search_result_t *match; + char **callers; + int caller_count; + char **callees; + int callee_count; +} explore_dependency_t; + +typedef struct { + cbm_node_t node; + double pagerank; + unsigned relation_mask; +} connected_symbol_t; + +typedef struct { + char **must_review; + int must_review_count; + char **should_review; + int should_review_count; + char **tests; + int test_count; +} review_scope_t; + +static bool compound_is_symbol_label(const char *label) { + return label && (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0 || + strcmp(label, "Class") == 0); +} + +static char *regex_escape_literal(const char *text) { + static const char *meta = ".^$|()[]{}*+?\\"; + size_t extra = 0; + for (const char *p = text; p && *p; p++) { + if (strchr(meta, *p)) { + extra++; + } + } + + size_t len = strlen(text ? text : ""); + char *out = malloc(len + extra + 1); + if (!out) { + return NULL; + } + + char *dst = out; + for (const char *p = text; p && *p; p++) { + if (strchr(meta, *p)) { + *dst++ = '\\'; } + *dst++ = *p; } + *dst = '\0'; + return out; } -/* Intermediate grep match */ -typedef struct { - char file[512]; - int line; - char content[1024]; -} grep_match_t; +static char *build_exact_regex_pattern(const char *text) { + char *escaped = regex_escape_literal(text ? text : ""); + if (!escaped) { + return NULL; + } + size_t len = strlen(escaped) + 3; + char *pattern = malloc(len); + if (!pattern) { + free(escaped); + return NULL; + } + snprintf(pattern, len, "^%s$", escaped); + free(escaped); + return pattern; +} -/* Deduped result: one per containing graph node */ -typedef struct { - int64_t node_id; /* 0 = raw match (no containing node) */ - char node_name[256]; - char qualified_name[512]; - char label[64]; - char file[512]; - int start_line; - int end_line; - int in_degree; - int out_degree; - int score; - int match_lines[64]; - int match_count; -} search_result_t; +static char *build_contains_regex_pattern(const char *text) { + char *escaped = regex_escape_literal(text ? text : ""); + if (!escaped) { + return NULL; + } + size_t len = strlen(escaped) + 5; + char *pattern = malloc(len); + if (!pattern) { + free(escaped); + return NULL; + } + snprintf(pattern, len, ".*%s.*", escaped); + free(escaped); + return pattern; +} -/* Score a result for ranking: project source first, vendored last, tests lowest */ -enum { SCORE_FUNC = 10, SCORE_ROUTE = 15, SCORE_VENDORED = -50, SCORE_TEST = -5 }; -enum { MAX_LINE_SPAN = 999999 }; +static char *build_contains_glob_pattern(const char *text) { + size_t len = strlen(text ? text : "") + 3; + char *pattern = malloc(len); + if (!pattern) { + return NULL; + } + snprintf(pattern, len, "*%s*", text ? text : ""); + return pattern; +} -static int compute_search_score(const search_result_t *r) { - int score = r->in_degree; - if (strcmp(r->label, "Function") == 0 || strcmp(r->label, "Method") == 0) { - score += SCORE_FUNC; +static bool search_result_matches_area(const cbm_search_result_t *sr, const char *area) { + if (!sr || !area || !area[0]) { + return false; } - if (strcmp(r->label, "Route") == 0) { - score += SCORE_ROUTE; + return (sr->node.name && cbm_strcasestr(sr->node.name, area)) || + (sr->node.qualified_name && cbm_strcasestr(sr->node.qualified_name, area)) || + (sr->node.file_path && cbm_strcasestr(sr->node.file_path, area)); +} + +static bool key_symbol_matches_area(const cbm_key_symbol_t *sym, const char *area) { + if (!sym || !area || !area[0]) { + return false; } - if (strstr(r->file, "vendored/") || strstr(r->file, "vendor/") || - strstr(r->file, "node_modules/")) { - score += SCORE_VENDORED; + return (sym->name && cbm_strcasestr(sym->name, area)) || + (sym->qualified_name && cbm_strcasestr(sym->qualified_name, area)) || + (sym->file_path && cbm_strcasestr(sym->file_path, area)); +} + +static bool entry_point_matches_area(const cbm_entry_point_t *entry, const char *area) { + if (!entry || !area || !area[0]) { + return false; } - /* Penalize test files */ - if (strstr(r->file, "test") || strstr(r->file, "spec") || strstr(r->file, "_test.")) { - score += SCORE_TEST; + return (entry->name && cbm_strcasestr(entry->name, area)) || + (entry->qualified_name && cbm_strcasestr(entry->qualified_name, area)) || + (entry->file && cbm_strcasestr(entry->file, area)); +} + +static int search_result_ref_rank_cmp(const void *lhs, const void *rhs) { + const cbm_search_result_t *a = *(const cbm_search_result_t *const *)lhs; + const cbm_search_result_t *b = *(const cbm_search_result_t *const *)rhs; + if (a->pagerank < b->pagerank) { + return 1; } - return score; + if (a->pagerank > b->pagerank) { + return -1; + } + int a_degree = a->in_degree + a->out_degree; + int b_degree = b->in_degree + b->out_degree; + if (a_degree != b_degree) { + return b_degree - a_degree; + } + const char *aqn = a->node.qualified_name ? a->node.qualified_name : ""; + const char *bqn = b->node.qualified_name ? b->node.qualified_name : ""; + return strcmp(aqn, bqn); } -static int search_result_cmp(const void *a, const void *b) { - const search_result_t *ra = (const search_result_t *)a; - const search_result_t *rb = (const search_result_t *)b; - return rb->score - ra->score; /* descending */ +static int understand_exact_match_cmp(const void *lhs, const void *rhs) { + const cbm_search_result_t *a = *(const cbm_search_result_t *const *)lhs; + const cbm_search_result_t *b = *(const cbm_search_result_t *const *)rhs; + bool a_test = cbm_is_test_file_path(a->node.file_path); + bool b_test = cbm_is_test_file_path(b->node.file_path); + if (a_test != b_test) { + return a_test ? 1 : -1; + } + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + if (a->in_degree != b->in_degree) { + return b->in_degree - a->in_degree; + } + const char *aqn = a->node.qualified_name ? a->node.qualified_name : ""; + const char *bqn = b->node.qualified_name ? b->node.qualified_name : ""; + return strcmp(aqn, bqn); } -/* Build the grep command string based on scoped vs recursive mode */ -static void build_grep_cmd(char *cmd, size_t cmd_sz, bool use_regex, bool scoped, - const char *file_pattern, const char *tmpfile, const char *filelist, - const char *root_path) { - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *flag = use_regex ? "-E" : "-F"; - if (scoped) { - if (file_pattern) { - snprintf(cmd, cmd_sz, "xargs grep -n %s --include='%s' -f '%s' < '%s' 2>/dev/null", - flag, file_pattern, tmpfile, filelist); - } else { - snprintf(cmd, cmd_sz, "xargs grep -n %s -f '%s' < '%s' 2>/dev/null", flag, tmpfile, - filelist); +static int connected_symbol_cmp(const void *lhs, const void *rhs) { + const connected_symbol_t *a = lhs; + const connected_symbol_t *b = rhs; + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + const char *an = a->node.name ? a->node.name : ""; + const char *bn = b->node.name ? b->node.name : ""; + return strcmp(an, bn); +} + +static bool append_unique_search_ref(const cbm_search_result_t *sr, + const cbm_search_result_t ***refs, int *count, int *cap) { + if (!sr || !refs || !count || !cap) { + return false; + } + for (int i = 0; i < *count; i++) { + if ((*refs)[i]->node.id == sr->node.id) { + return true; } - } else { - if (file_pattern) { - snprintf(cmd, cmd_sz, "grep -rn %s --include='%s' -f '%s' '%s' 2>/dev/null", flag, - file_pattern, tmpfile, root_path); - } else { - snprintf(cmd, cmd_sz, "grep -rn %s -f '%s' '%s' 2>/dev/null", flag, tmpfile, root_path); + } + if (*count >= *cap) { + *cap = *cap > 0 ? (*cap * 2) : 16; + *refs = safe_realloc((void *)*refs, (size_t)*cap * sizeof(**refs)); + } + (*refs)[(*count)++] = sr; + return true; +} + +static void append_matching_refs(const cbm_search_output_t *out, const char *area, + bool symbol_only, const cbm_search_result_t ***refs, + int *count, int *cap) { + if (!out || !refs || !count || !cap) { + return; + } + for (int i = 0; i < out->count; i++) { + if (symbol_only && !compound_is_symbol_label(out->results[i].node.label)) { + continue; + } + if (area && area[0] && !search_result_matches_area(&out->results[i], area)) { + continue; } + (void)append_unique_search_ref(&out->results[i], refs, count, cap); } } -/* Phase 4: assemble JSON output from search results */ -static char *assemble_search_output(search_result_t *sr, int sr_count, grep_match_t *raw, - int raw_count, int gm_count, int limit, int mode, - int context_lines, const char *root_path) { - enum { MODE_COMPACT = 0, MODE_FULL = 1, MODE_FILES = 2 }; +static char *load_node_source(cbm_mcp_server_t *srv, const cbm_node_t *node) { + if (!srv || !node || !node->project || !node->file_path) { + return NULL; + } - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root_obj = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root_obj); + char *root_path = get_project_root(srv, node->project); + if (!root_path) { + return NULL; + } - int output_count = sr_count < limit ? sr_count : limit; + int start = node->start_line > 0 ? node->start_line : 1; + int end = node->end_line > start ? node->end_line : start + SNIPPET_DEFAULT_LINES; + size_t path_len = strlen(root_path) + strlen(node->file_path) + 2; + char *abs_path = malloc(path_len); + char *source = NULL; + if (!abs_path) { + free(root_path); + return NULL; + } + snprintf(abs_path, path_len, "%s/%s", root_path, node->file_path); - if (mode == MODE_FILES) { - yyjson_mut_val *files_arr = yyjson_mut_arr(doc); - char *seen_files[512]; - int seen_count = 0; - for (int fi = 0; fi < output_count; fi++) { - bool dup = false; - for (int j = 0; j < seen_count; j++) { - if (strcmp(seen_files[j], sr[fi].file) == 0) { - dup = true; - break; - } - } - if (!dup && seen_count < 512) { - seen_files[seen_count++] = sr[fi].file; - yyjson_mut_arr_add_str(doc, files_arr, sr[fi].file); - } + char real_root[4096]; + char real_file[4096]; + bool path_ok = false; +#ifdef _WIN32 + if (_fullpath(real_root, root_path, sizeof(real_root)) && + _fullpath(real_file, abs_path, sizeof(real_file))) { +#else + if (realpath(root_path, real_root) && realpath(abs_path, real_file)) { +#endif + size_t root_len = strlen(real_root); + if (strncmp(real_file, real_root, root_len) == 0 && + (real_file[root_len] == '/' || real_file[root_len] == '\\' || + real_file[root_len] == '\0')) { + path_ok = true; } - for (int fi = 0; fi < raw_count && seen_count < 512; fi++) { - bool dup = false; - for (int j = 0; j < seen_count; j++) { - if (strcmp(seen_files[j], raw[fi].file) == 0) { - dup = true; + } + if (path_ok) { + source = read_file_lines(abs_path, start, end); + } + + free(abs_path); + free(root_path); + return source; +} + +static void free_name_lists(char **items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + free(items[i]); + } + free(items); +} + +static void free_explore_dependencies(explore_dependency_t *deps, int count) { + if (!deps) { + return; + } + for (int i = 0; i < count; i++) { + free_name_lists(deps[i].callers, deps[i].caller_count); + free_name_lists(deps[i].callees, deps[i].callee_count); + } + free(deps); +} + +static int collect_explore_dependencies(cbm_store_t *store, const cbm_search_result_t **matches, + int match_count, explore_dependency_t **out, int *out_count) { + *out = NULL; + *out_count = 0; + if (!store || !matches || match_count <= 0) { + return CBM_STORE_OK; + } + + int dep_count = match_count < EXPLORE_DEPENDENCY_LIMIT ? match_count : EXPLORE_DEPENDENCY_LIMIT; + explore_dependency_t *deps = calloc((size_t)dep_count, sizeof(*deps)); + if (!deps) { + return CBM_STORE_ERR; + } + + for (int i = 0; i < dep_count; i++) { + deps[i].match = matches[i]; + if (cbm_store_node_neighbor_names(store, matches[i]->node.id, 5, &deps[i].callers, + &deps[i].caller_count, &deps[i].callees, + &deps[i].callee_count) != CBM_STORE_OK) { + free_explore_dependencies(deps, dep_count); + return CBM_STORE_ERR; + } + } + + *out = deps; + *out_count = dep_count; + return CBM_STORE_OK; +} + +static int collect_connected_symbols(const cbm_traverse_result_t *callers, + const cbm_traverse_result_t *callees, + connected_symbol_t **out, int *out_count) { + *out = NULL; + *out_count = 0; + + int cap = callers->visited_count + callees->visited_count; + if (cap <= 0) { + return CBM_STORE_OK; + } + + connected_symbol_t *items = calloc((size_t)cap, sizeof(*items)); + if (!items) { + return CBM_STORE_ERR; + } + + int count = 0; + const cbm_traverse_result_t *groups[] = {callers, callees}; + const unsigned masks[] = {CONNECTED_REL_CALLER, CONNECTED_REL_CALLEE}; + for (int g = 0; g < 2; g++) { + for (int i = 0; i < groups[g]->visited_count; i++) { + const cbm_node_hop_t *hop = &groups[g]->visited[i]; + int found = -1; + for (int j = 0; j < count; j++) { + if (items[j].node.id == hop->node.id) { + found = j; break; } } - if (!dup) { - seen_files[seen_count++] = raw[fi].file; - yyjson_mut_arr_add_str(doc, files_arr, raw[fi].file); + if (found >= 0) { + items[found].relation_mask |= masks[g]; + if (hop->pagerank > items[found].pagerank) { + items[found].pagerank = hop->pagerank; + } + continue; } + + copy_node(&hop->node, &items[count].node); + items[count].pagerank = hop->pagerank; + items[count].relation_mask = masks[g]; + count++; } - yyjson_mut_obj_add_val(doc, root_obj, "files", files_arr); - } else { - yyjson_mut_val *results_arr = yyjson_mut_arr(doc); - for (int ri = 0; ri < output_count; ri++) { - search_result_t *r = &sr[ri]; - yyjson_mut_val *item = yyjson_mut_obj(doc); + } - yyjson_mut_obj_add_str(doc, item, "node", r->node_name); - yyjson_mut_obj_add_str(doc, item, "qualified_name", r->qualified_name); - yyjson_mut_obj_add_str(doc, item, "label", r->label); - yyjson_mut_obj_add_str(doc, item, "file", r->file); - yyjson_mut_obj_add_int(doc, item, "start_line", r->start_line); - yyjson_mut_obj_add_int(doc, item, "end_line", r->end_line); - yyjson_mut_obj_add_int(doc, item, "in_degree", r->in_degree); - yyjson_mut_obj_add_int(doc, item, "out_degree", r->out_degree); + qsort(items, (size_t)count, sizeof(*items), connected_symbol_cmp); + *out = items; + *out_count = count; + return CBM_STORE_OK; +} - yyjson_mut_val *ml = yyjson_mut_arr(doc); - for (int j = 0; j < r->match_count; j++) { - yyjson_mut_arr_add_int(doc, ml, r->match_lines[j]); - } - yyjson_mut_obj_add_val(doc, item, "match_lines", ml); +static void free_connected_symbols(connected_symbol_t *items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + cbm_node_free_fields(&items[i].node); + } + free(items); +} + +static const char *connected_relation_label(unsigned relation_mask) { + if ((relation_mask & CONNECTED_REL_CALLER) && (relation_mask & CONNECTED_REL_CALLEE)) { + return "both"; + } + if (relation_mask & CONNECTED_REL_CALLER) { + return "caller"; + } + return "callee"; +} + +static bool understand_is_key_symbol(cbm_store_t *store, const char *project, + const cbm_search_result_t *selected) { + cbm_key_symbol_t *symbols = NULL; + int count = 0; + bool found = false; + if (cbm_store_get_key_symbols(store, project, NULL, UNDERSTAND_KEY_SYMBOL_LIMIT, &symbols, + &count) != CBM_STORE_OK) { + return false; + } + for (int i = 0; i < count; i++) { + if (selected->node.qualified_name && symbols[i].qualified_name && + strcmp(selected->node.qualified_name, symbols[i].qualified_name) == 0) { + found = true; + break; + } + } + cbm_store_key_symbols_free(symbols, count); + return found; +} + +static int append_unique_string(char ***items, int *count, const char *value) { + if (!items || !count || !value || !value[0]) { + return CBM_STORE_OK; + } + for (int i = 0; i < *count; i++) { + if (strcmp((*items)[i], value) == 0) { + return CBM_STORE_OK; + } + } + char **resized = safe_realloc(*items, (size_t)(*count + 1) * sizeof(**items)); + *items = resized; + (*items)[*count] = heap_strdup(value); + if (!(*items)[*count]) { + return CBM_STORE_ERR; + } + (*count)++; + return CBM_STORE_OK; +} + +static void free_string_array(char **items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + free(items[i]); + } + free(items); +} + +static void review_scope_free(review_scope_t *scope) { + if (!scope) { + return; + } + free_string_array(scope->must_review, scope->must_review_count); + free_string_array(scope->should_review, scope->should_review_count); + free_string_array(scope->tests, scope->test_count); + memset(scope, 0, sizeof(*scope)); +} + +static int build_review_scope(const cbm_impact_analysis_t *impact, bool include_tests, + review_scope_t *scope) { + memset(scope, 0, sizeof(*scope)); + if (append_unique_string(&scope->must_review, &scope->must_review_count, impact->file) != + CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + for (int i = 0; i < impact->direct_count; i++) { + if (append_unique_string(&scope->must_review, &scope->must_review_count, + impact->direct[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + for (int i = 0; i < impact->indirect_count; i++) { + if (append_unique_string(&scope->should_review, &scope->should_review_count, + impact->indirect[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + for (int i = 0; i < impact->transitive_count; i++) { + if (append_unique_string(&scope->should_review, &scope->should_review_count, + impact->transitive[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + if (include_tests) { + for (int i = 0; i < impact->affected_test_count; i++) { + if (append_unique_string(&scope->tests, &scope->test_count, + impact->affected_tests[i].file) != CBM_STORE_OK) { + review_scope_free(scope); + return CBM_STORE_ERR; + } + } + } + return CBM_STORE_OK; +} + +static size_t estimate_string_list_entry_chars(const char *value) { + return strlen(value ? value : "") + 8; +} + +static size_t estimate_key_symbol_chars(const cbm_key_symbol_t *sym, bool compact) { + size_t size = 72; + size += strlen(sym->name ? sym->name : ""); + size += strlen(sym->file_path ? sym->file_path : ""); + if (!compact) { + size += strlen(sym->qualified_name ? sym->qualified_name : ""); + size += strlen(sym->label ? sym->label : ""); + size += 32; + } else { + size += 16; + } + return size; +} + +static void add_key_symbol_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, const cbm_key_symbol_t *sym, + bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", sym->name ? sym->name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", sym->file_path ? sym->file_path : ""); + if (compact) { + yyjson_mut_obj_add_real(doc, item, "pagerank", sym->pagerank); + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sym->qualified_name ? sym->qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", sym->label ? sym->label : ""); + yyjson_mut_obj_add_int(doc, item, "in_degree", sym->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", sym->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sym->pagerank); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_entry_point_chars(const cbm_entry_point_t *entry, bool compact) { + size_t size = 48; + size += strlen(entry->name ? entry->name : ""); + size += strlen(entry->file ? entry->file : ""); + if (!compact) { + size += strlen(entry->qualified_name ? entry->qualified_name : ""); + } else { + size += 12; + } + return size; +} + +static void add_entry_point_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_entry_point_t *entry, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", entry->name ? entry->name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", entry->file ? entry->file : ""); + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + entry->qualified_name ? entry->qualified_name : ""); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_explore_dependency_chars(const explore_dependency_t *dep, bool compact) { + size_t size = 64; + size += strlen(dep->match->node.name ? dep->match->node.name : ""); + size += strlen(dep->match->node.file_path ? dep->match->node.file_path : ""); + if (compact) { + size += 24; + return size; + } + size += strlen(dep->match->node.qualified_name ? dep->match->node.qualified_name : ""); + for (int i = 0; i < dep->caller_count; i++) { + size += strlen(dep->callers[i]); + } + for (int i = 0; i < dep->callee_count; i++) { + size += strlen(dep->callees[i]); + } + return size + 24; +} + +static void add_string_array_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, char **values, + int count) { + for (int i = 0; i < count; i++) { + yyjson_mut_arr_add_str(doc, arr, values[i] ? values[i] : ""); + } +} + +static void add_explore_dependency_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const explore_dependency_t *dep, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", + dep->match->node.name ? dep->match->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", + dep->match->node.file_path ? dep->match->node.file_path : ""); + if (compact) { + yyjson_mut_obj_add_int(doc, item, "caller_count", dep->caller_count); + yyjson_mut_obj_add_int(doc, item, "callee_count", dep->callee_count); + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + dep->match->node.qualified_name ? dep->match->node.qualified_name + : ""); + yyjson_mut_val *callers = yyjson_mut_arr(doc); + yyjson_mut_val *callees = yyjson_mut_arr(doc); + add_string_array_json(doc, callers, dep->callers, dep->caller_count); + add_string_array_json(doc, callees, dep->callees, dep->callee_count); + yyjson_mut_obj_add_val(doc, item, "callers", callers); + yyjson_mut_obj_add_val(doc, item, "callees", callees); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_alternative_chars(const cbm_search_result_t *sr, bool compact) { + size_t size = 48; + size += strlen(sr->node.qualified_name ? sr->node.qualified_name : ""); + size += strlen(sr->node.file_path ? sr->node.file_path : ""); + size += compact ? 12 : strlen(sr->node.label ? sr->node.label : "") + 16; + return size; +} + +static void add_alternative_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const cbm_search_result_t *sr, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sr->node.qualified_name ? sr->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", sr->node.file_path ? sr->node.file_path : ""); + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); + } + yyjson_mut_arr_add_val(arr, item); +} + +static size_t estimate_connected_symbol_chars(const connected_symbol_t *item, bool compact) { + size_t size = 64; + size += strlen(item->node.name ? item->node.name : ""); + size += strlen(item->node.file_path ? item->node.file_path : ""); + size += strlen(connected_relation_label(item->relation_mask)); + if (!compact) { + size += strlen(item->node.qualified_name ? item->node.qualified_name : ""); + size += 16; + } else { + size += 8; + } + return size; +} + +static void add_connected_symbol_json(yyjson_mut_doc *doc, yyjson_mut_val *arr, + const connected_symbol_t *item, bool compact) { + yyjson_mut_val *entry = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, entry, "name", item->node.name ? item->node.name : ""); + yyjson_mut_obj_add_str(doc, entry, "file_path", item->node.file_path ? item->node.file_path + : ""); + yyjson_mut_obj_add_int(doc, entry, "start_line", item->node.start_line); + yyjson_mut_obj_add_str(doc, entry, "relation", connected_relation_label(item->relation_mask)); + if (compact) { + yyjson_mut_obj_add_bool(doc, entry, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, entry, "qualified_name", + item->node.qualified_name ? item->node.qualified_name : ""); + yyjson_mut_obj_add_real(doc, entry, "pagerank", item->pagerank); + } + yyjson_mut_arr_add_val(arr, entry); +} + +static size_t estimate_understand_definition_chars(const cbm_search_result_t *selected, + const char *source, bool include_source) { + size_t size = 96; + size += strlen(selected->node.file_path ? selected->node.file_path : ""); + size += estimate_signature_field_chars(&selected->node); + if (include_source && source) { + size += strlen(source) + 24; + } + return size; +} + +static void add_understand_definition_json(yyjson_mut_doc *doc, yyjson_mut_val *root, + const cbm_search_result_t *selected, + const char *source) { + yyjson_mut_val *definition = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, definition, "file_path", + selected->node.file_path ? selected->node.file_path : ""); + yyjson_mut_obj_add_int(doc, definition, "start_line", selected->node.start_line); + yyjson_mut_obj_add_int(doc, definition, "end_line", selected->node.end_line); + + char *signature = node_signature_dup(&selected->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, definition, "signature", signature); + } + free(signature); + + if (source) { + yyjson_mut_obj_add_str(doc, definition, "source", source); + } + + yyjson_mut_obj_add_val(doc, root, "definition", definition); +} + +static char *build_symbol_suggestions_response(const char *input, cbm_node_t *nodes, int count) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "status", "ambiguous"); + char msg[512]; + snprintf(msg, sizeof(msg), + "%d matches for \"%s\". Pick a qualified_name from suggestions below, " + "or use search_graph(name_pattern=\"...\") to narrow results.", + count, input); + yyjson_mut_obj_add_str(doc, root, "message", msg); + + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + nodes[i].qualified_name ? nodes[i].qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "name", nodes[i].name ? nodes[i].name : ""); + yyjson_mut_obj_add_str(doc, item, "label", nodes[i].label ? nodes[i].label : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", nodes[i].file_path ? nodes[i].file_path + : ""); + yyjson_mut_arr_add_val(arr, item); + } + yyjson_mut_obj_add_val(doc, root, "suggestions", arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static int run_store_search(cbm_store_t *store, const cbm_search_params_t *params, + cbm_search_output_t *out) { + memset(out, 0, sizeof(*out)); + return cbm_store_search(store, params, out); +} + +static int search_exact_qn(cbm_store_t *store, const char *project, const char *qn, + cbm_search_output_t *out) { + char *pattern = build_exact_regex_pattern(qn); + if (!pattern) { + return CBM_STORE_ERR; + } + cbm_search_params_t params = { + .project = project, + .qn_pattern = pattern, + .limit = 16, + .sort_by = "relevance", + .min_degree = -1, + .max_degree = -1, + }; + int rc = run_store_search(store, ¶ms, out); + free(pattern); + return rc; +} + +static int search_exact_name(cbm_store_t *store, const char *project, const char *name, + cbm_search_output_t *out) { + char *pattern = build_exact_regex_pattern(name); + if (!pattern) { + return CBM_STORE_ERR; + } + cbm_search_params_t params = { + .project = project, + .name_pattern = pattern, + .limit = 64, + .sort_by = "relevance", + .min_degree = -1, + .max_degree = -1, + }; + int rc = run_store_search(store, ¶ms, out); + free(pattern); + return rc; +} + +static int search_contains(cbm_store_t *store, const char *project, const char *name_pattern, + const char *qn_pattern, const char *file_pattern, + cbm_search_output_t *out) { + cbm_search_params_t params = { + .project = project, + .name_pattern = name_pattern, + .qn_pattern = qn_pattern, + .file_pattern = file_pattern, + .limit = EXPLORE_SEARCH_LIMIT, + .sort_by = "relevance", + .min_degree = -1, + .max_degree = -1, + }; + return run_store_search(store, ¶ms, out); +} + +static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *area = cbm_mcp_get_string_arg(args, "area"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + + if (!area || !area[0]) { + free(project); + free(area); + return cbm_mcp_text_result("area is required", true); + } + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(area); + return _res; + } + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(area); + return not_indexed; + } + + char *contains_regex = build_contains_regex_pattern(area); + char *file_glob = build_contains_glob_pattern(area); + cbm_search_output_t name_out = {0}; + cbm_search_output_t qn_out = {0}; + cbm_search_output_t file_out = {0}; + const cbm_search_result_t **matches = NULL; + int match_count = 0; + int match_cap = 0; + explore_dependency_t *deps = NULL; + int dep_count = 0; + cbm_key_symbol_t *hotspots = NULL; + int hotspot_count = 0; + cbm_architecture_info_t arch = {0}; + cbm_entry_point_t **entry_refs = NULL; + int entry_count = 0; + int entry_cap = 0; + char *result = NULL; + + if (!contains_regex || !file_glob) { + result = cbm_mcp_text_result("failed to allocate explore filters", true); + goto cleanup_explore; + } + if (search_contains(store, project, contains_regex, NULL, NULL, &name_out) != CBM_STORE_OK || + search_contains(store, project, NULL, contains_regex, NULL, &qn_out) != CBM_STORE_OK || + search_contains(store, project, NULL, NULL, file_glob, &file_out) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load explore matches", true); + goto cleanup_explore; + } + + append_matching_refs(&name_out, area, false, &matches, &match_count, &match_cap); + append_matching_refs(&qn_out, area, false, &matches, &match_count, &match_cap); + append_matching_refs(&file_out, area, false, &matches, &match_count, &match_cap); + if (match_count > 0) { + qsort(matches, (size_t)match_count, sizeof(*matches), search_result_ref_rank_cmp); + } + + if (collect_explore_dependencies(store, matches, match_count, &deps, &dep_count) != + CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load explore dependencies", true); + goto cleanup_explore; + } + + if (cbm_store_get_key_symbols(store, project, area, 10, &hotspots, &hotspot_count) != + CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load area hotspots", true); + goto cleanup_explore; + } + + const char *aspects[] = {"entry_points"}; + if (cbm_store_get_architecture(store, project, aspects, 1, &arch) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load entry points", true); + goto cleanup_explore; + } + for (int i = 0; i < arch.entry_point_count; i++) { + if (!entry_point_matches_area(&arch.entry_points[i], area)) { + continue; + } + if (entry_count >= entry_cap) { + entry_cap = entry_cap > 0 ? entry_cap * 2 : 8; + entry_refs = safe_realloc(entry_refs, (size_t)entry_cap * sizeof(*entry_refs)); + } + entry_refs[entry_count++] = &arch.entry_points[i]; + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + yyjson_mut_obj_add_str(doc, root, "area", area); + + yyjson_mut_val *match_arr = yyjson_mut_arr(doc); + for (int i = 0; i < match_count; i++) { + add_search_result_item(doc, match_arr, matches[i], false); + } + yyjson_mut_obj_add_val(doc, root, "matches", match_arr); + + yyjson_mut_val *dep_arr = yyjson_mut_arr(doc); + for (int i = 0; i < dep_count; i++) { + add_explore_dependency_json(doc, dep_arr, &deps[i], false); + } + yyjson_mut_obj_add_val(doc, root, "dependencies", dep_arr); + + int filtered_hotspot_count = 0; + for (int i = 0; i < hotspot_count; i++) { + if (key_symbol_matches_area(&hotspots[i], area)) { + filtered_hotspot_count++; + } + } + + yyjson_mut_val *hotspot_arr = yyjson_mut_arr(doc); + for (int i = 0; i < hotspot_count; i++) { + if (!key_symbol_matches_area(&hotspots[i], area)) { + continue; + } + add_key_symbol_json(doc, hotspot_arr, &hotspots[i], false); + } + yyjson_mut_obj_add_val(doc, root, "hotspots", hotspot_arr); + + yyjson_mut_val *entry_arr = yyjson_mut_arr(doc); + for (int i = 0; i < entry_count; i++) { + add_entry_point_json(doc, entry_arr, entry_refs[i], false); + } + yyjson_mut_obj_add_val(doc, root, "entry_points", entry_arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + yyjson_mut_obj_add_str(doc, root, "area", area); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + match_count + dep_count + filtered_hotspot_count + entry_count); + + size_t used = 64 + strlen(area); + int shown = 0; + int full_items = 0; + bool stop = false; + + match_arr = yyjson_mut_arr(doc); + for (int i = 0; i < match_count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_search_result_chars(matches[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_search_result_chars(matches[i], true); + } + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_search_result_item(doc, match_arr, matches[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + yyjson_mut_obj_add_val(doc, root, "matches", match_arr); + + dep_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < dep_count; i++) { + bool compact = true; + size_t estimate = estimate_explore_dependency_chars(&deps[i], compact); + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_explore_dependency_json(doc, dep_arr, &deps[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "dependencies", dep_arr); + + hotspot_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < hotspot_count; i++) { + if (!key_symbol_matches_area(&hotspots[i], area)) { + continue; + } + bool compact = true; + size_t estimate = estimate_key_symbol_chars(&hotspots[i], compact); + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_key_symbol_json(doc, hotspot_arr, &hotspots[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "hotspots", hotspot_arr); + + entry_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < entry_count; i++) { + bool compact = true; + size_t estimate = estimate_entry_point_chars(entry_refs[i], compact); + if (used + estimate > char_budget && shown > 0) { + break; + } + add_entry_point_json(doc, entry_arr, entry_refs[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "entry_points", entry_arr); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + result = cbm_mcp_text_result(json, false); + free(json); + +cleanup_explore: + free(contains_regex); + free(file_glob); + free(matches); + free(entry_refs); + free_explore_dependencies(deps, dep_count); + cbm_store_search_free(&name_out); + cbm_store_search_free(&qn_out); + cbm_store_search_free(&file_out); + cbm_store_key_symbols_free(hotspots, hotspot_count); + cbm_store_architecture_free(&arch); + free(project); + free(area); + return result; +} + +static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *symbol = cbm_mcp_get_string_arg(args, "symbol"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + + if (!symbol || !symbol[0]) { + free(project); + free(symbol); + return cbm_mcp_text_result("symbol is required", true); + } + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(symbol); + return _res; + } + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(symbol); + return not_indexed; + } + + cbm_search_output_t qn_out = {0}; + cbm_search_output_t exact_name_out = {0}; + cbm_search_output_t selected_out = {0}; + cbm_node_t *suffix_nodes = NULL; + int suffix_count = 0; + const cbm_search_result_t **exact_refs = NULL; + int exact_ref_count = 0; + int exact_ref_cap = 0; + const cbm_search_result_t *selected = NULL; + bool auto_picked = false; + cbm_traverse_result_t callers = {0}; + cbm_traverse_result_t callees = {0}; + connected_symbol_t *connected = NULL; + int connected_count = 0; + bool is_key_symbol = false; + char *source = NULL; + char *result = NULL; + + if (search_exact_qn(store, project, symbol, &qn_out) == CBM_STORE_OK && qn_out.count > 0) { + selected = &qn_out.results[0]; + } else if (search_exact_name(store, project, symbol, &exact_name_out) == CBM_STORE_OK) { + append_matching_refs(&exact_name_out, NULL, true, &exact_refs, &exact_ref_count, + &exact_ref_cap); + if (exact_ref_count > 0) { + qsort(exact_refs, (size_t)exact_ref_count, sizeof(*exact_refs), + understand_exact_match_cmp); + selected = exact_refs[0]; + auto_picked = exact_ref_count > 1; + } + } + + if (!selected) { + if (cbm_store_find_nodes_by_qn_suffix(store, project, symbol, &suffix_nodes, + &suffix_count) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to resolve symbol", true); + goto cleanup_understand; + } + + int filtered = 0; + for (int i = 0; i < suffix_count; i++) { + if (compound_is_symbol_label(suffix_nodes[i].label)) { + suffix_nodes[filtered++] = suffix_nodes[i]; + } else { + cbm_node_free_fields(&suffix_nodes[i]); + } + } + suffix_count = filtered; + + if (suffix_count == 1) { + if (search_exact_qn(store, project, suffix_nodes[0].qualified_name, &selected_out) == + CBM_STORE_OK && + selected_out.count > 0) { + selected = &selected_out.results[0]; + } + } else if (suffix_count > 1) { + result = build_symbol_suggestions_response(symbol, suffix_nodes, suffix_count); + goto cleanup_understand; + } + } + + if (!selected) { + char err[512]; + snprintf(err, sizeof(err), + "symbol not found. Use search_graph(name_pattern=\".*%s.*\") first to discover " + "the exact symbol name.", + symbol); + result = cbm_mcp_text_result(err, true); + goto cleanup_understand; + } + + source = load_node_source(srv, &selected->node); + const char *edge_types[] = {"CALLS"}; + if (cbm_store_bfs(store, selected->node.id, "inbound", edge_types, 1, 1, + UNDERSTAND_MAX_BFS_RESULTS, &callers) != CBM_STORE_OK || + cbm_store_bfs(store, selected->node.id, "outbound", edge_types, 1, 1, + UNDERSTAND_MAX_BFS_RESULTS, &callees) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to load symbol dependencies", true); + goto cleanup_understand; + } + + if (callers.visited_count > 1) { + qsort(callers.visited, (size_t)callers.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } + if (callees.visited_count > 1) { + qsort(callees.visited, (size_t)callees.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } + if (collect_connected_symbols(&callers, &callees, &connected, &connected_count) != + CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to build connected symbol summary", true); + goto cleanup_understand; + } + is_key_symbol = understand_is_key_symbol(store, project, selected); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", symbol); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + selected->node.qualified_name ? selected->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "label", selected->node.label ? selected->node.label : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", selected->pagerank); + yyjson_mut_obj_add_bool(doc, root, "is_key_symbol", is_key_symbol); + add_understand_definition_json(doc, root, selected, source); + + if (auto_picked) { + yyjson_mut_val *alts = yyjson_mut_arr(doc); + for (int i = 1; i < exact_ref_count; i++) { + add_alternative_json(doc, alts, exact_refs[i], false); + } + yyjson_mut_obj_add_val(doc, root, "alternatives", alts); + } + + yyjson_mut_val *caller_arr = yyjson_mut_arr(doc); + for (int i = 0; i < callers.visited_count; i++) { + add_node_hop_item(doc, caller_arr, &callers.visited[i], false); + } + yyjson_mut_obj_add_val(doc, root, "callers", caller_arr); + + yyjson_mut_val *callee_arr = yyjson_mut_arr(doc); + for (int i = 0; i < callees.visited_count; i++) { + add_node_hop_item(doc, callee_arr, &callees.visited[i], false); + } + yyjson_mut_obj_add_val(doc, root, "callees", callee_arr); + + yyjson_mut_val *connected_arr = yyjson_mut_arr(doc); + for (int i = 0; i < connected_count; i++) { + add_connected_symbol_json(doc, connected_arr, &connected[i], false); + } + yyjson_mut_obj_add_val(doc, root, "connected_symbols", connected_arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", symbol); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + selected->node.qualified_name ? selected->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "label", + selected->node.label ? selected->node.label : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", selected->pagerank); + yyjson_mut_obj_add_bool(doc, root, "is_key_symbol", is_key_symbol); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + 1 + callers.visited_count + callees.visited_count + + connected_count + (auto_picked ? exact_ref_count - 1 : 0)); + + size_t used = 96 + strlen(symbol) + + strlen(selected->node.qualified_name ? selected->node.qualified_name : ""); + int shown = 1; + + char *compact_source = NULL; + if (source) { + compact_source = truncate_text_copy(source, MAX_COMPACT_SOURCE_CHARS); + } + add_understand_definition_json(doc, root, selected, NULL); + if (compact_source && compact_source[0]) { + size_t estimate = estimate_understand_definition_chars(selected, compact_source, true) - + estimate_understand_definition_chars(selected, NULL, false); + if (used + estimate <= char_budget) { + yyjson_mut_val *definition = yyjson_mut_obj_get(root, "definition"); + yyjson_mut_obj_add_strcpy(doc, definition, "source", compact_source); + yyjson_mut_obj_add_bool(doc, definition, "source_truncated", true); + used += estimate; + } + } + free(compact_source); + + if (auto_picked) { + yyjson_mut_val *alts = yyjson_mut_arr(doc); + for (int i = 1; i < exact_ref_count; i++) { + size_t estimate = estimate_alternative_chars(exact_refs[i], true); + if (used + estimate > char_budget && shown > 0) { + break; + } + add_alternative_json(doc, alts, exact_refs[i], true); + used += estimate; + shown++; + } + yyjson_mut_obj_add_val(doc, root, "alternatives", alts); + } + + int full_items = 0; + bool stop = false; + caller_arr = yyjson_mut_arr(doc); + for (int i = 0; i < callers.visited_count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&callers.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&callers.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_node_hop_item(doc, caller_arr, &callers.visited[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + yyjson_mut_obj_add_val(doc, root, "callers", caller_arr); + + callee_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < callees.visited_count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&callees.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&callees.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_node_hop_item(doc, callee_arr, &callees.visited[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callees", callee_arr); + + connected_arr = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < connected_count; i++) { + bool compact = true; + size_t estimate = estimate_connected_symbol_chars(&connected[i], compact); + if (used + estimate > char_budget && shown > 0) { + break; + } + add_connected_symbol_json(doc, connected_arr, &connected[i], compact); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "connected_symbols", connected_arr); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + result = cbm_mcp_text_result(json, false); + free(json); + +cleanup_understand: + free(project); + free(symbol); + free(source); + free(exact_refs); + cbm_store_search_free(&qn_out); + cbm_store_search_free(&exact_name_out); + cbm_store_search_free(&selected_out); + cbm_store_traverse_free(&callers); + cbm_store_traverse_free(&callees); + free_connected_symbols(connected, connected_count); + cbm_store_free_nodes(suffix_nodes, suffix_count); + return result; +} + +static size_t estimate_review_scope_chars(const review_scope_t *scope, bool include_tests) { + size_t size = 48; + for (int i = 0; i < scope->must_review_count; i++) { + size += estimate_string_list_entry_chars(scope->must_review[i]); + } + for (int i = 0; i < scope->should_review_count; i++) { + size += estimate_string_list_entry_chars(scope->should_review[i]); + } + if (include_tests) { + for (int i = 0; i < scope->test_count; i++) { + size += estimate_string_list_entry_chars(scope->tests[i]); + } + } + return size; +} + +static void add_review_scope_json(yyjson_mut_doc *doc, yyjson_mut_val *root, + const review_scope_t *scope, bool include_tests) { + yyjson_mut_val *review = yyjson_mut_obj(doc); + yyjson_mut_val *must = yyjson_mut_arr(doc); + yyjson_mut_val *should = yyjson_mut_arr(doc); + for (int i = 0; i < scope->must_review_count; i++) { + yyjson_mut_arr_add_str(doc, must, scope->must_review[i] ? scope->must_review[i] : ""); + } + for (int i = 0; i < scope->should_review_count; i++) { + yyjson_mut_arr_add_str(doc, should, scope->should_review[i] ? scope->should_review[i] : ""); + } + yyjson_mut_obj_add_val(doc, review, "must_review", must); + yyjson_mut_obj_add_val(doc, review, "should_review", should); + if (include_tests) { + yyjson_mut_val *tests = yyjson_mut_arr(doc); + for (int i = 0; i < scope->test_count; i++) { + yyjson_mut_arr_add_str(doc, tests, scope->tests[i] ? scope->tests[i] : ""); + } + yyjson_mut_obj_add_val(doc, review, "tests", tests); + } + yyjson_mut_obj_add_val(doc, root, "review_scope", review); +} + +static int prepare_change_total_results(const cbm_impact_analysis_t *impact, + const review_scope_t *scope, bool include_tests) { + int total = impact_output_total_results(impact, include_tests); + total += scope->must_review_count + scope->should_review_count; + if (include_tests) { + total += scope->test_count; + } + return total; +} + +static char *handle_prepare_change(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *symbol = cbm_mcp_get_string_arg(args, "symbol"); + bool include_tests = cbm_mcp_get_bool_arg_default(args, "include_tests", true); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); + cbm_store_t *store = resolve_store(srv, project); + char *result = NULL; + + if (!symbol || !symbol[0]) { + free(project); + free(symbol); + return cbm_mcp_text_result("symbol is required", true); + } + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(symbol); + return _res; + } + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(symbol); + return not_indexed; + } + + cbm_impact_analysis_t impact = {0}; + review_scope_t scope = {0}; + char *summary_text = NULL; + + int rc = cbm_store_get_impact_analysis(store, project, symbol, 4, &impact); + if (rc == CBM_STORE_NOT_FOUND) { + char err[512]; + snprintf(err, sizeof(err), + "symbol not found. Use search_graph(name_pattern=\".*%s.*\") first to discover " + "the exact symbol name.", + symbol); + result = cbm_mcp_text_result(err, true); + goto cleanup_prepare_change; + } + if (rc != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to build pre-change analysis", true); + goto cleanup_prepare_change; + } + + if (build_review_scope(&impact, include_tests, &scope) != CBM_STORE_OK) { + result = cbm_mcp_text_result("failed to build review scope", true); + goto cleanup_prepare_change; + } + + summary_text = impact_output_summary_dup(&impact, include_tests); + if (!summary_text) { + summary_text = heap_strdup(impact.summary ? impact.summary : ""); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + + yyjson_mut_val *impact_obj = yyjson_mut_obj(doc); + yyjson_mut_val *direct = yyjson_mut_arr(doc); + for (int i = 0; i < impact.direct_count; i++) { + add_impact_item_json(doc, direct, &impact.direct[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "direct", direct); + + yyjson_mut_val *indirect = yyjson_mut_arr(doc); + for (int i = 0; i < impact.indirect_count; i++) { + add_impact_item_json(doc, indirect, &impact.indirect[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "indirect", indirect); + + yyjson_mut_val *transitive = yyjson_mut_arr(doc); + for (int i = 0; i < impact.transitive_count; i++) { + add_impact_item_json(doc, transitive, &impact.transitive[i], false); + } + yyjson_mut_obj_add_val(doc, impact_obj, "transitive", transitive); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj); + + yyjson_mut_val *tests = yyjson_mut_arr(doc); + if (include_tests) { + for (int i = 0; i < impact.affected_test_count; i++) { + add_affected_test_json(doc, tests, &impact.affected_tests[i]); + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests); + add_review_scope_json(doc, root, &scope, include_tests); + yyjson_mut_obj_add_str(doc, root, "risk_score", impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "symbol", impact.symbol ? impact.symbol : ""); + yyjson_mut_obj_add_str(doc, root, "qualified_name", + impact.qualified_name ? impact.qualified_name : ""); + yyjson_mut_obj_add_str(doc, root, "file", impact.file ? impact.file : ""); + yyjson_mut_obj_add_real(doc, root, "pagerank", impact.pagerank); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", + prepare_change_total_results(&impact, &scope, include_tests)); + + size_t used = 96; + used += strlen(impact.symbol ? impact.symbol : ""); + used += strlen(impact.qualified_name ? impact.qualified_name : ""); + used += strlen(impact.file ? impact.file : ""); + used += strlen(summary_text ? summary_text : ""); + used += strlen(impact.risk_score ? impact.risk_score : ""); + + int shown = 0; + int full_items = 0; + bool stop = false; + + impact_obj = yyjson_mut_obj(doc); + add_budgeted_impact_group(doc, impact_obj, "direct", impact.direct, impact.direct_count, + char_budget, &used, &shown, &full_items, &stop); + yyjson_mut_obj_add_val(doc, root, "impact", impact_obj); + + if (!stop) { + size_t estimate = estimate_review_scope_chars(&scope, include_tests); + if (used + estimate <= char_budget || shown == 0) { + add_review_scope_json(doc, root, &scope, include_tests); + used += estimate; + shown += scope.must_review_count + scope.should_review_count + + (include_tests ? scope.test_count : 0); + } else { + yyjson_mut_val *review = yyjson_mut_obj(doc); + yyjson_mut_val *must = yyjson_mut_arr(doc); + yyjson_mut_val *should = yyjson_mut_arr(doc); + for (int i = 0; i < scope.must_review_count; i++) { + size_t item_estimate = estimate_string_list_entry_chars(scope.must_review[i]); + if (used + item_estimate > char_budget && shown > 0) { + stop = true; + break; + } + yyjson_mut_arr_add_str(doc, must, scope.must_review[i]); + used += item_estimate; + shown++; + } + if (!stop) { + for (int i = 0; i < scope.should_review_count; i++) { + size_t item_estimate = estimate_string_list_entry_chars(scope.should_review[i]); + if (used + item_estimate > char_budget && shown > 0) { + stop = true; + break; + } + yyjson_mut_arr_add_str(doc, should, scope.should_review[i]); + used += item_estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, review, "must_review", must); + yyjson_mut_obj_add_val(doc, review, "should_review", should); + if (include_tests) { + yyjson_mut_val *review_tests = yyjson_mut_arr(doc); + if (!stop) { + for (int i = 0; i < scope.test_count; i++) { + size_t item_estimate = estimate_string_list_entry_chars(scope.tests[i]); + if (used + item_estimate > char_budget && shown > 0) { + stop = true; + break; + } + yyjson_mut_arr_add_str(doc, review_tests, scope.tests[i]); + used += item_estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, review, "tests", review_tests); + } + yyjson_mut_obj_add_val(doc, root, "review_scope", review); + } + } else { + yyjson_mut_val *review = yyjson_mut_obj(doc); + yyjson_mut_obj_add_val(doc, review, "must_review", yyjson_mut_arr(doc)); + yyjson_mut_obj_add_val(doc, review, "should_review", yyjson_mut_arr(doc)); + if (include_tests) { + yyjson_mut_obj_add_val(doc, review, "tests", yyjson_mut_arr(doc)); + } + yyjson_mut_obj_add_val(doc, root, "review_scope", review); + } + + tests = yyjson_mut_arr(doc); + if (include_tests && !stop) { + for (int i = 0; i < impact.affected_test_count; i++) { + size_t estimate = estimate_affected_test_chars(&impact.affected_tests[i]); + if (used + estimate > char_budget && shown > 0) { + stop = true; + break; + } + add_affected_test_json(doc, tests, &impact.affected_tests[i]); + used += estimate; + shown++; + } + } + yyjson_mut_obj_add_val(doc, root, "affected_tests", tests); + + if (!stop) { + yyjson_mut_val *impact_existing = yyjson_mut_obj_get(root, "impact"); + add_budgeted_impact_group(doc, impact_existing, "indirect", impact.indirect, + impact.indirect_count, char_budget, &used, &shown, + &full_items, &stop); + add_budgeted_impact_group(doc, impact_existing, "transitive", impact.transitive, + impact.transitive_count, char_budget, &used, &shown, + &full_items, &stop); + } else { + yyjson_mut_val *impact_existing = yyjson_mut_obj_get(root, "impact"); + yyjson_mut_obj_add_val(doc, impact_existing, "indirect", yyjson_mut_arr(doc)); + yyjson_mut_obj_add_val(doc, impact_existing, "transitive", yyjson_mut_arr(doc)); + } + + yyjson_mut_obj_add_str(doc, root, "risk_score", impact.risk_score ? impact.risk_score : ""); + yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + + result = cbm_mcp_text_result(json, false); + free(json); + +cleanup_prepare_change: + free(summary_text); + review_scope_free(&scope); + cbm_store_impact_analysis_free(&impact); + free(project); + free(symbol); + return result; +} + +/* ── search_code v2: graph-augmented code search ─────────────── */ + +/* Strip non-ASCII bytes to guarantee valid UTF-8 JSON output */ +enum { ASCII_MAX = 127 }; +static void sanitize_ascii(char *s) { + for (unsigned char *p = (unsigned char *)s; *p; p++) { + if (*p > ASCII_MAX) { + *p = '?'; + } + } +} + +/* Intermediate grep match */ +typedef struct { + char file[512]; + int line; + char content[1024]; +} grep_match_t; + +/* Deduped result: one per containing graph node */ +typedef struct { + int64_t node_id; /* 0 = raw match (no containing node) */ + char node_name[256]; + char qualified_name[512]; + char label[64]; + char file[512]; + int start_line; + int end_line; + int in_degree; + int out_degree; + int score; + int match_lines[64]; + int match_count; +} search_result_t; + +/* Score a result for ranking: project source first, vendored last, tests lowest */ +enum { SCORE_FUNC = 10, SCORE_ROUTE = 15, SCORE_VENDORED = -50, SCORE_TEST = -5 }; +enum { MAX_LINE_SPAN = 999999 }; + +static int compute_search_score(const search_result_t *r) { + int score = r->in_degree; + if (strcmp(r->label, "Function") == 0 || strcmp(r->label, "Method") == 0) { + score += SCORE_FUNC; + } + if (strcmp(r->label, "Route") == 0) { + score += SCORE_ROUTE; + } + if (strstr(r->file, "vendored/") || strstr(r->file, "vendor/") || + strstr(r->file, "node_modules/")) { + score += SCORE_VENDORED; + } + /* Penalize test files */ + if (strstr(r->file, "test") || strstr(r->file, "spec") || strstr(r->file, "_test.")) { + score += SCORE_TEST; + } + return score; +} + +static int search_result_cmp(const void *a, const void *b) { + const search_result_t *ra = (const search_result_t *)a; + const search_result_t *rb = (const search_result_t *)b; + return rb->score - ra->score; /* descending */ +} + +/* Build the grep command string based on scoped vs recursive mode */ +static void build_grep_cmd(char *cmd, size_t cmd_sz, bool use_regex, bool scoped, + const char *file_pattern, const char *tmpfile, const char *filelist, + const char *root_path) { + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + const char *flag = use_regex ? "-E" : "-F"; + if (scoped) { + if (file_pattern) { + snprintf(cmd, cmd_sz, "xargs grep -n %s --include='%s' -f '%s' < '%s' 2>/dev/null", + flag, file_pattern, tmpfile, filelist); + } else { + snprintf(cmd, cmd_sz, "xargs grep -n %s -f '%s' < '%s' 2>/dev/null", flag, tmpfile, + filelist); + } + } else { + if (file_pattern) { + snprintf(cmd, cmd_sz, "grep -rn %s --include='%s' -f '%s' '%s' 2>/dev/null", flag, + file_pattern, tmpfile, root_path); + } else { + snprintf(cmd, cmd_sz, "grep -rn %s -f '%s' '%s' 2>/dev/null", flag, tmpfile, root_path); + } + } +} + +/* Phase 4: assemble JSON output from search results */ +static char *assemble_search_output(search_result_t *sr, int sr_count, grep_match_t *raw, + int raw_count, int gm_count, int limit, int mode, + int context_lines, const char *root_path) { + enum { MODE_COMPACT = 0, MODE_FULL = 1, MODE_FILES = 2 }; + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root_obj = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root_obj); + + int output_count = sr_count < limit ? sr_count : limit; + + if (mode == MODE_FILES) { + yyjson_mut_val *files_arr = yyjson_mut_arr(doc); + char *seen_files[512]; + int seen_count = 0; + for (int fi = 0; fi < output_count; fi++) { + bool dup = false; + for (int j = 0; j < seen_count; j++) { + if (strcmp(seen_files[j], sr[fi].file) == 0) { + dup = true; + break; + } + } + if (!dup && seen_count < 512) { + seen_files[seen_count++] = sr[fi].file; + yyjson_mut_arr_add_str(doc, files_arr, sr[fi].file); + } + } + for (int fi = 0; fi < raw_count && seen_count < 512; fi++) { + bool dup = false; + for (int j = 0; j < seen_count; j++) { + if (strcmp(seen_files[j], raw[fi].file) == 0) { + dup = true; + break; + } + } + if (!dup) { + seen_files[seen_count++] = raw[fi].file; + yyjson_mut_arr_add_str(doc, files_arr, raw[fi].file); + } + } + yyjson_mut_obj_add_val(doc, root_obj, "files", files_arr); + } else { + yyjson_mut_val *results_arr = yyjson_mut_arr(doc); + for (int ri = 0; ri < output_count; ri++) { + search_result_t *r = &sr[ri]; + yyjson_mut_val *item = yyjson_mut_obj(doc); + + yyjson_mut_obj_add_str(doc, item, "node", r->node_name); + yyjson_mut_obj_add_str(doc, item, "qualified_name", r->qualified_name); + yyjson_mut_obj_add_str(doc, item, "label", r->label); + yyjson_mut_obj_add_str(doc, item, "file", r->file); + yyjson_mut_obj_add_int(doc, item, "start_line", r->start_line); + yyjson_mut_obj_add_int(doc, item, "end_line", r->end_line); + yyjson_mut_obj_add_int(doc, item, "in_degree", r->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", r->out_degree); + + yyjson_mut_val *ml = yyjson_mut_arr(doc); + for (int j = 0; j < r->match_count; j++) { + yyjson_mut_arr_add_int(doc, ml, r->match_lines[j]); + } + yyjson_mut_obj_add_val(doc, item, "match_lines", ml); if (r->start_line > 0 && r->end_line > 0) { char abs_path[1024]; @@ -3923,6 +5577,15 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture_summary") == 0) { return handle_get_architecture_summary(srv, args_json); } + if (strcmp(tool_name, "explore") == 0) { + return handle_explore(srv, args_json); + } + if (strcmp(tool_name, "understand") == 0) { + return handle_understand(srv, args_json); + } + if (strcmp(tool_name, "prepare_change") == 0) { + return handle_prepare_change(srv, args_json); + } /* Pipeline-dependent tools */ if (strcmp(tool_name, "index_repository") == 0) { diff --git a/src/store/store.c b/src/store/store.c index 1823d074..5c420dbf 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2453,6 +2453,15 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear ADD_WHERE(bind_buf); BIND_TEXT(params->name_pattern); } + if (params->qn_pattern) { + if (params->case_sensitive) { + snprintf(bind_buf, sizeof(bind_buf), "n.qualified_name REGEXP ?%d", bind_idx + 1); + } else { + snprintf(bind_buf, sizeof(bind_buf), "iregexp(?%d, n.qualified_name)", bind_idx + 1); + } + ADD_WHERE(bind_buf); + BIND_TEXT(params->qn_pattern); + } if (params->file_pattern) { like_pattern = cbm_glob_to_like(params->file_pattern); snprintf(bind_buf, sizeof(bind_buf), "n.file_path LIKE ?%d", bind_idx + 1); @@ -5344,7 +5353,7 @@ static int summary_count_nodes(cbm_store_t *s, const char *project, const char * sqlite3_stmt *stmt = NULL; if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "summary_count_nodes"); - return 0; + return 0; /* return 0 rather than -1 so callers display "0" not "-1" */ } bind_text(stmt, 1, project); if (focus_like && focus_like[0]) { diff --git a/tests/test_integration.c b/tests/test_integration.c index b106c11d..147386bc 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -393,12 +393,40 @@ TEST(integ_mcp_get_key_symbols) { PASS(); } +TEST(integ_mcp_explore) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"area\":\"main\",\"max_tokens\":400}", g_project); + + char *resp = call_tool("explore", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "matches")); + ASSERT_NOT_NULL(strstr(resp, "hotspots")); + ASSERT_NOT_NULL(strstr(resp, "main")); + free(resp); + PASS(); +} + +TEST(integ_mcp_understand) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"max_tokens\":400}", g_project); + + char *resp = call_tool("understand", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "qualified_name")); + ASSERT_NOT_NULL(strstr(resp, "definition")); + ASSERT_NOT_NULL(strstr(resp, "callers")); + ASSERT_NOT_NULL(strstr(resp, "return a + b")); + free(resp); + PASS(); +} + TEST(integ_mcp_get_impact_analysis) { char args[256]; snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"depth\":3}", g_project); char *resp = call_tool("get_impact_analysis", args); ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "symbol")); ASSERT_NOT_NULL(strstr(resp, "Add")); ASSERT_NOT_NULL(strstr(resp, "impact")); ASSERT_NOT_NULL(strstr(resp, "risk_score")); @@ -407,6 +435,19 @@ TEST(integ_mcp_get_impact_analysis) { PASS(); } +TEST(integ_mcp_prepare_change) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"symbol\":\"Add\",\"max_tokens\":400}", g_project); + + char *resp = call_tool("prepare_change", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "review_scope")); + ASSERT_NOT_NULL(strstr(resp, "risk_score")); + ASSERT_NOT_NULL(strstr(resp, "utils.go")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -589,7 +630,7 @@ SUITE(integration) { if (integration_setup() != 0) { printf(" %-50s", "integration_setup"); printf("SKIP (setup failed)\n"); - tf_skip_count += 26; /* skip all integration tests */ + tf_skip_count += 29; /* skip all integration tests */ integration_teardown(); return; } @@ -611,7 +652,10 @@ SUITE(integration) { RUN_TEST(integ_mcp_get_architecture); RUN_TEST(integ_mcp_get_architecture_summary); RUN_TEST(integ_mcp_get_key_symbols); + RUN_TEST(integ_mcp_explore); + RUN_TEST(integ_mcp_understand); RUN_TEST(integ_mcp_get_impact_analysis); + RUN_TEST(integ_mcp_prepare_change); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 9131a4e6..62c21f0a 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -142,6 +142,9 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_key_symbols")); ASSERT_NOT_NULL(strstr(json, "get_impact_analysis")); ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); + ASSERT_NOT_NULL(strstr(json, "explore")); + ASSERT_NOT_NULL(strstr(json, "understand")); + ASSERT_NOT_NULL(strstr(json, "prepare_change")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); ASSERT_NOT_NULL(strstr(json, "delete_project")); @@ -307,6 +310,9 @@ TEST(server_handle_tools_list) { ASSERT_NOT_NULL(strstr(resp, "\"id\":2")); ASSERT_NOT_NULL(strstr(resp, "search_graph")); ASSERT_NOT_NULL(strstr(resp, "query_graph")); + ASSERT_NOT_NULL(strstr(resp, "explore")); + ASSERT_NOT_NULL(strstr(resp, "understand")); + ASSERT_NOT_NULL(strstr(resp, "prepare_change")); free(resp); cbm_mcp_server_free(srv); @@ -1344,6 +1350,16 @@ static cbm_mcp_server_t *setup_snippet_server(char *tmp_dir, size_t tmp_sz) { n_run2.end_line = 13; cbm_store_upsert_node(st, &n_run2); + cbm_node_t n_run3 = {0}; + n_run3.project = proj_name; + n_run3.label = "Function"; + n_run3.name = "Run"; + n_run3.qualified_name = "test-project.api.server.Run"; + n_run3.file_path = "main.go"; + n_run3.start_line = 11; + n_run3.end_line = 13; + cbm_store_upsert_node(st, &n_run3); + /* Create edges: HandleRequest -> ProcessOrder, HandleRequest -> Run1 */ cbm_edge_t e1 = {.project = proj_name, .source_id = id_hr, .target_id = id_po, .type = "CALLS"}; cbm_store_insert_edge(st, &e1); @@ -1353,6 +1369,11 @@ static cbm_mcp_server_t *setup_snippet_server(char *tmp_dir, size_t tmp_sz) { cbm_store_insert_edge(st, &e2); (void)id_run1; /* run1 used for edge above */ + if (cbm_store_compute_pagerank(st, proj_name, 20, 0.85) != CBM_STORE_OK) { + cbm_mcp_server_free(srv); + return NULL; + } + return srv; } @@ -1686,6 +1707,291 @@ TEST(tool_get_impact_analysis_route_and_entry_point_typing) { PASS(); } +TEST(tool_explore_basic) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"matches\"")); + ASSERT_NOT_NULL(strstr(text, "\"dependencies\"")); + ASSERT_NOT_NULL(strstr(text, "\"hotspots\"")); + ASSERT_NOT_NULL(strstr(text, "\"entry_points\"")); + ASSERT_NOT_NULL(strstr(text, "ProcessOrder")); + ASSERT_NOT_NULL(strstr(text, "CliEntry")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_explore_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\",\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_exact_short_name_autopicks_best_non_test) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "understand", "{\"project\":\"impact\",\"symbol\":\"Duplicate\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"qualified_name\":\"impact.core.Duplicate\"")); + ASSERT_NOT_NULL(strstr(text, "\"alternatives\"")); + ASSERT_NOT_NULL(strstr(text, "impact.tests.Duplicate")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_qualified_name_resolution) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "understand", + "{\"project\":\"test-project\",\"symbol\":\"test-project.cmd.server.main.ProcessOrder\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL( + strstr(text, "\"qualified_name\":\"test-project.cmd.server.main.ProcessOrder\"")); + ASSERT_NOT_NULL(strstr(text, "\"definition\"")); + ASSERT_NOT_NULL(strstr(text, "\"source\"")); + ASSERT_NOT_NULL(strstr(text, "func ProcessOrder(id int)")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + +TEST(tool_understand_suffix_ambiguity_returns_suggestions) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "understand", "{\"project\":\"test-project\",\"symbol\":\"server.Run\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"status\":\"ambiguous\"")); + ASSERT_NOT_NULL(strstr(text, "test-project.cmd.server.Run")); + ASSERT_NOT_NULL(strstr(text, "test-project.api.server.Run")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + +TEST(tool_understand_max_tokens_truncates) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "understand", + "{\"project\":\"test-project\",\"symbol\":\"test-project.cmd.server.main.HandleRequest\"," + "\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"definition\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + +TEST(tool_prepare_change_basic) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "prepare_change", "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"review_scope\"")); + ASSERT_NOT_NULL(strstr(text, "\"risk_score\":\"high\"")); + ASSERT_NOT_NULL(strstr(text, "\"must_review\"")); + ASSERT_NOT_NULL(strstr(text, "app/services/order_service.php")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_include_tests_false) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "prepare_change", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"include_tests\":false}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"affected_tests\":[]")); + ASSERT_NOT_NULL(strstr(text, "\"summary\":\"2 direct callers, 2 route/entry points, 1 transitive impacts\"")); + ASSERT_NULL(strstr(text, "\"review_scope\":{\"must_review\":[\"app/services/order_service.php\"],\"should_review\":[\"app/ui/browser_flow.php\"],\"tests\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "prepare_change", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\",\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"review_scope\"")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +/* ── Error-path tests for compound tools ──────────────────────── */ + +TEST(tool_explore_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"nonexistent\",\"area\":\"foo\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_explore_no_matches) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"zzzznonexistent\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + /* Should return valid JSON with empty arrays, not an error */ + ASSERT_NOT_NULL(strstr(text, "\"matches\"")); + ASSERT_NOT_NULL(strstr(text, "\"hotspots\"")); + ASSERT_NULL(strstr(text, "isError")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"nonexistent\",\"symbol\":\"Foo\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_understand_missing_symbol) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ZZZNoSuchSymbol\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "prepare_change", + "{\"project\":\"nonexistent\",\"symbol\":\"Foo\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_prepare_change_missing_symbol) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "prepare_change", + "{\"project\":\"impact\",\"symbol\":\"ZZZNoSuchSymbol\"}"); + ASSERT_NOT_NULL(raw); + ASSERT_NOT_NULL(strstr(raw, "isError")); + ASSERT_NOT_NULL(strstr(raw, "not found")); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -2426,6 +2732,21 @@ SUITE(mcp) { RUN_TEST(tool_get_impact_analysis_include_tests_false); RUN_TEST(tool_get_impact_analysis_max_tokens_truncates); RUN_TEST(tool_get_impact_analysis_route_and_entry_point_typing); + RUN_TEST(tool_explore_basic); + RUN_TEST(tool_explore_max_tokens_truncates); + RUN_TEST(tool_understand_exact_short_name_autopicks_best_non_test); + RUN_TEST(tool_understand_qualified_name_resolution); + RUN_TEST(tool_understand_suffix_ambiguity_returns_suggestions); + RUN_TEST(tool_understand_max_tokens_truncates); + RUN_TEST(tool_prepare_change_basic); + RUN_TEST(tool_prepare_change_include_tests_false); + RUN_TEST(tool_prepare_change_max_tokens_truncates); + RUN_TEST(tool_explore_missing_project); + RUN_TEST(tool_explore_no_matches); + RUN_TEST(tool_understand_missing_project); + RUN_TEST(tool_understand_missing_symbol); + RUN_TEST(tool_prepare_change_missing_project); + RUN_TEST(tool_prepare_change_missing_symbol); /* Pipeline-dependent tool handlers */ RUN_TEST(tool_index_repository_missing_path); From d43cef9d98702716a7adaed7e57d0fcdba79a6bb Mon Sep 17 00:00:00 2001 From: maplenk Date: Thu, 26 Mar 2026 10:03:09 +0530 Subject: [PATCH 11/14] Add session memory tracking and get_session_context tool (Phase 7A) Track files read/edited, symbols queried, areas explored, and impact analyses across the MCP session. New get_session_context tool returns accumulated state plus related_untouched: graph neighbors of touched symbols ranked by PageRank that haven't been examined yet. Co-Authored-By: Claude Opus 4.6 (1M context) --- Makefile.cbm | 2 +- src/mcp/mcp.c | 282 ++++++++++++++++++++++++++++++++++++++++++++++ src/mcp/mcp.h | 3 + src/mcp/session.c | 225 ++++++++++++++++++++++++++++++++++++ src/mcp/session.h | 58 ++++++++++ tests/test_mcp.c | 220 ++++++++++++++++++++++++++++++++++++ 6 files changed, 789 insertions(+), 1 deletion(-) create mode 100644 src/mcp/session.c create mode 100644 src/mcp/session.h diff --git a/Makefile.cbm b/Makefile.cbm index b3bb4a8c..164e47b9 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -145,7 +145,7 @@ STORE_SRCS = src/store/store.c CYPHER_SRCS = src/cypher/cypher.c # MCP server module (new) -MCP_SRCS = src/mcp/mcp.c +MCP_SRCS = src/mcp/mcp.c src/mcp/session.c # Discover module (new) DISCOVER_SRCS = \ diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 09712bfe..bfca1e4b 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -8,7 +8,9 @@ // operations #include "mcp/mcp.h" +#include "mcp/session.h" #include "store/store.h" +#include "foundation/hash_table.h" #include "cypher/cypher.h" #include "pipeline/pipeline.h" #include "cli/cli.h" @@ -873,6 +875,16 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"object\",\"properties\":{\"traces\":{\"type\":\"array\",\"items\":{\"type\":" "\"object\"}},\"project\":{\"type\":" "\"string\"}},\"required\":[\"traces\",\"project\"]}"}, + + {"get_session_context", + "Get session memory: files read/edited, symbols queried, areas explored, " + "and related symbols not yet examined, ranked by PageRank.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"," + "\"description\":\"Project name (needed for related_untouched computation).\"}," + "\"include_related\":{\"type\":\"boolean\",\"default\":true," + "\"description\":\"Include graph neighbors of touched symbols that have not been " + "examined yet.\"},\"limit\":{\"type\":\"integer\",\"default\":10," + "\"description\":\"Max related_untouched items.\"}},\"required\":[]}"}, }; static const int TOOL_COUNT = sizeof(TOOLS) / sizeof(TOOLS[0]); @@ -1080,6 +1092,9 @@ struct cbm_mcp_server { struct cbm_config *config; /* external config ref (not owned) */ cbm_thread_t autoindex_tid; bool autoindex_active; /* true if auto-index thread was started */ + + /* Ephemeral session memory (Phase 7A) — tracks files/symbols/areas touched */ + cbm_session_state_t *session; }; cbm_mcp_server_t *cbm_mcp_server_new(const char *store_path) { @@ -1105,6 +1120,18 @@ cbm_store_t *cbm_mcp_server_store(cbm_mcp_server_t *srv) { return srv ? srv->store : NULL; } +struct cbm_session_state *cbm_mcp_server_session(cbm_mcp_server_t *srv) { + return srv ? srv->session : NULL; +} + +/* Lazily create session state on first use. */ +static cbm_session_state_t *ensure_session(cbm_mcp_server_t *srv) { + if (!srv->session) { + srv->session = cbm_session_create(); + } + return srv->session; +} + void cbm_mcp_server_set_project(cbm_mcp_server_t *srv, const char *project) { if (!srv) { return; @@ -1138,6 +1165,7 @@ void cbm_mcp_server_free(cbm_mcp_server_t *srv) { if (srv->owns_store && srv->store) { cbm_store_close(srv->store); } + cbm_session_free(srv->session); free(srv->current_project); free(srv); } @@ -1529,6 +1557,18 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { cbm_search_output_t out = {0}; cbm_store_search(store, ¶ms, &out); + /* Session tracking: record top result names as queried symbols (cap 20) */ + { + cbm_session_state_t *ss = ensure_session(srv); + cbm_session_bump_query_count(ss); + int track_limit = out.count < 20 ? out.count : 20; + for (int i = 0; i < track_limit; i++) { + if (out.results[i].node.name) { + cbm_session_track_symbol(ss, out.results[i].node.name); + } + } + } + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); yyjson_mut_doc_set_root(doc, root); @@ -1963,6 +2003,13 @@ static char *handle_get_impact_analysis(cbm_mcp_server_t *srv, const char *args) return not_indexed; } + /* Session tracking */ + { + cbm_session_state_t *ss = ensure_session(srv); + cbm_session_bump_query_count(ss); + cbm_session_track_impact(ss, symbol); + } + cbm_impact_analysis_t impact = {0}; int rc = cbm_store_get_impact_analysis(store, project, symbol, depth, &impact); if (rc == CBM_STORE_NOT_FOUND) { @@ -2394,6 +2441,13 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { return not_indexed; } + /* Session tracking */ + { + cbm_session_state_t *ss = ensure_session(srv); + cbm_session_bump_query_count(ss); + cbm_session_track_symbol(ss, func_name); + } + if (!direction) { direction = heap_strdup("both"); } @@ -3002,10 +3056,16 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { /* Default to current project (same as all other tools) */ const char *effective_project = project ? project : srv->current_project; + /* Session tracking: bump count on every call */ + cbm_session_bump_query_count(ensure_session(srv)); + /* Tier 1: Exact QN match */ cbm_node_t node = {0}; int rc = cbm_store_find_node_by_qn(store, effective_project, qn, &node); if (rc == CBM_STORE_OK) { + if (node.file_path) { + cbm_session_track_file_read(ensure_session(srv), node.file_path); + } char *result = build_snippet_response(srv, &node, NULL, include_neighbors, NULL, 0); free_node_contents(&node); free(qn); @@ -3022,6 +3082,9 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { if (suffix_count == 1) { copy_node(&suffix_nodes[0], &node); cbm_store_free_nodes(suffix_nodes, suffix_count); + if (node.file_path) { + cbm_session_track_file_read(ensure_session(srv), node.file_path); + } char *result = build_snippet_response(srv, &node, "suffix", include_neighbors, NULL, 0); free_node_contents(&node); free(qn); @@ -3859,6 +3922,13 @@ static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { return not_indexed; } + /* Session tracking */ + { + cbm_session_state_t *ss = ensure_session(srv); + cbm_session_bump_query_count(ss); + cbm_session_track_area(ss, area); + } + char *contains_regex = build_contains_regex_pattern(area); char *file_glob = build_contains_glob_pattern(area); cbm_search_output_t name_out = {0}; @@ -4104,6 +4174,13 @@ static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { return not_indexed; } + /* Session tracking */ + { + cbm_session_state_t *ss = ensure_session(srv); + cbm_session_bump_query_count(ss); + cbm_session_track_symbol(ss, symbol); + } + cbm_search_output_t qn_out = {0}; cbm_search_output_t exact_name_out = {0}; cbm_search_output_t selected_out = {0}; @@ -4457,6 +4534,13 @@ static char *handle_prepare_change(cbm_mcp_server_t *srv, const char *args) { return not_indexed; } + /* Session tracking */ + { + cbm_session_state_t *ss = ensure_session(srv); + cbm_session_bump_query_count(ss); + cbm_session_track_impact(ss, symbol); + } + cbm_impact_analysis_t impact = {0}; review_scope_t scope = {0}; char *summary_text = NULL; @@ -5354,6 +5438,10 @@ static char *handle_detect_changes(cbm_mcp_server_t *srv, const char *args) { /* resolve_store already called via get_project_root above */ cbm_store_t *store = srv->store; + /* Session tracking: bump count once for the detect_changes call */ + cbm_session_state_t *dc_session = ensure_session(srv); + cbm_session_bump_query_count(dc_session); + char line[1024]; int file_count = 0; @@ -5366,6 +5454,9 @@ static char *handle_detect_changes(cbm_mcp_server_t *srv, const char *args) { continue; } + /* Session tracking: record each changed file */ + cbm_session_track_file_edited(dc_session, line); + yyjson_mut_arr_add_str(doc, changed, line); file_count++; @@ -5536,6 +5627,193 @@ static char *handle_ingest_traces(cbm_mcp_server_t *srv, const char *args) { return result; } +/* ── Session context (Phase 7A) ────────────────────────────────── */ + +/* Callback: free a strdup'd hash table key (for temporary candidate sets). */ +static void free_ht_key_cb(const char *key, void *value, void *userdata) { + (void)value; + (void)userdata; + free((void *)key); +} + +/* Callback: append key to a yyjson array. */ +typedef struct { + yyjson_mut_doc *doc; + yyjson_mut_val *arr; +} json_arr_ctx_t; + +static void append_key_to_json_arr(const char *key, void *userdata) { + json_arr_ctx_t *ctx = (json_arr_ctx_t *)userdata; + yyjson_mut_arr_add_strcpy(ctx->doc, ctx->arr, key); +} + +/* Callback: collect symbol names into a list for related_untouched lookup. */ +typedef struct { + const char **names; + int count; + int cap; +} name_collector_t; + +static void collect_symbol_name(const char *key, void *userdata) { + name_collector_t *nc = (name_collector_t *)userdata; + if (nc->count < nc->cap) { + nc->names[nc->count++] = key; + } +} + +static char *handle_get_session_context(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + bool include_related = cbm_mcp_get_bool_arg_default(args, "include_related", true); + int limit = cbm_mcp_get_int_arg(args, "limit", 10); + + cbm_session_state_t *ss = ensure_session(srv); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + /* Basic session stats */ + time_t start = cbm_session_start_time(ss); + time_t now = time(NULL); + yyjson_mut_obj_add_int(doc, root, "session_duration_s", (int64_t)(now - start)); + yyjson_mut_obj_add_int(doc, root, "query_count", cbm_session_query_count(ss)); + + /* Build set arrays using iteration */ + json_arr_ctx_t jctx; + jctx.doc = doc; + +#define EMIT_SET_ARRAY(field_name, foreach_fn) \ + do { \ + jctx.arr = yyjson_mut_arr(doc); \ + foreach_fn(ss, append_key_to_json_arr, &jctx); \ + yyjson_mut_obj_add_val(doc, root, field_name, jctx.arr); \ + } while (0) + + EMIT_SET_ARRAY("files_read", cbm_session_foreach_file_read); + EMIT_SET_ARRAY("files_edited", cbm_session_foreach_file_edited); + EMIT_SET_ARRAY("symbols_queried", cbm_session_foreach_symbol); + EMIT_SET_ARRAY("areas_explored", cbm_session_foreach_area); + EMIT_SET_ARRAY("impact_analyses_run", cbm_session_foreach_impact); + +#undef EMIT_SET_ARRAY + + /* related_untouched: graph neighbors of touched symbols not yet examined */ + yyjson_mut_val *related = yyjson_mut_arr(doc); + if (include_related && project) { + cbm_store_t *store = resolve_store(srv, project); + if (store) { + /* Collect queried symbols + impact symbols (cap at 20 total) */ + const int MAX_LOOKUP = 20; + const char *lookup_names[20]; + name_collector_t nc = {.names = lookup_names, .count = 0, .cap = MAX_LOOKUP}; + cbm_session_foreach_symbol(ss, collect_symbol_name, &nc); + cbm_session_foreach_impact(ss, collect_symbol_name, &nc); + + /* Temporary dedup set for candidates */ + CBMHashTable *candidates = cbm_ht_create(64); + + /* For each tracked symbol, find 1-hop neighbors */ + for (int i = 0; i < nc.count; i++) { + cbm_node_t *nodes = NULL; + int ncount = 0; + cbm_store_find_nodes_by_name(store, project, lookup_names[i], &nodes, &ncount); + + for (int j = 0; j < ncount; j++) { + char **callers = NULL; + char **callees = NULL; + int caller_count = 0, callee_count = 0; + cbm_store_node_neighbor_names(store, nodes[j].id, 10, &callers, &caller_count, + &callees, &callee_count); + + /* Add callers not already tracked */ + for (int k = 0; k < caller_count; k++) { + if (callers[k] && !cbm_session_has_symbol(ss, callers[k]) && + !cbm_ht_has(candidates, callers[k])) { + char *key = strdup(callers[k]); + if (key) { + /* Store the "reason" source symbol as the value */ + cbm_ht_set(candidates, key, (void *)lookup_names[i]); + } + } + } + /* Add callees not already tracked */ + for (int k = 0; k < callee_count; k++) { + if (callees[k] && !cbm_session_has_symbol(ss, callees[k]) && + !cbm_ht_has(candidates, callees[k])) { + char *key = strdup(callees[k]); + if (key) { + cbm_ht_set(candidates, key, (void *)lookup_names[i]); + } + } + } + + /* Free neighbor arrays */ + for (int k = 0; k < caller_count; k++) + free(callers[k]); + free(callers); + for (int k = 0; k < callee_count; k++) + free(callees[k]); + free(callees); + } + cbm_store_free_nodes(nodes, ncount); + } + + /* Rank candidates by PageRank: get key symbols and match against candidates */ + if (cbm_ht_count(candidates) > 0) { + cbm_key_symbol_t *key_syms = NULL; + int ks_count = 0; + cbm_store_get_key_symbols(store, project, NULL, 200, &key_syms, &ks_count); + + int emitted = 0; + for (int i = 0; i < ks_count && emitted < limit; i++) { + if (key_syms[i].name && cbm_ht_has(candidates, key_syms[i].name)) { + const char *reason_sym = + (const char *)cbm_ht_get(candidates, key_syms[i].name); + yyjson_mut_val *item = yyjson_mut_obj(doc); + /* Use strcpy variants: key_syms strings are freed below */ + yyjson_mut_obj_add_strcpy(doc, item, "name", key_syms[i].name); + if (key_syms[i].file_path) { + yyjson_mut_obj_add_strcpy(doc, item, "file", key_syms[i].file_path); + } + if (key_syms[i].pagerank > 0) { + yyjson_mut_obj_add_real(doc, item, "pagerank", key_syms[i].pagerank); + } + /* Build reason string (stack buffer — must copy) */ + char reason[256]; + snprintf(reason, sizeof(reason), "neighbor of %s, not yet examined", + reason_sym ? reason_sym : "queried symbol"); + yyjson_mut_obj_add_strcpy(doc, item, "reason", reason); + yyjson_mut_arr_add_val(related, item); + emitted++; + } + } + + /* Free key symbols */ + for (int i = 0; i < ks_count; i++) { + free((void *)key_syms[i].name); + free((void *)key_syms[i].qualified_name); + free((void *)key_syms[i].label); + free((void *)key_syms[i].file_path); + } + free(key_syms); + } + + /* Free candidate keys */ + cbm_ht_foreach(candidates, free_ht_key_cb, NULL); + cbm_ht_free(candidates); + } + } + yyjson_mut_obj_add_val(doc, root, "related_untouched", related); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + free(project); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + /* ── Tool dispatch ────────────────────────────────────────────── */ // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) @@ -5606,6 +5884,10 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "ingest_traces") == 0) { return handle_ingest_traces(srv, args_json); } + if (strcmp(tool_name, "get_session_context") == 0) { + return handle_get_session_context(srv, args_json); + } + char msg[256]; snprintf(msg, sizeof(msg), "unknown tool: %s", tool_name); return cbm_mcp_text_result(msg, true); diff --git a/src/mcp/mcp.h b/src/mcp/mcp.h index 1f24dd8c..da7643eb 100644 --- a/src/mcp/mcp.h +++ b/src/mcp/mcp.h @@ -121,6 +121,9 @@ bool cbm_mcp_server_has_cached_store(cbm_mcp_server_t *srv); /* Get the store handle from a server (for test setup). */ cbm_store_t *cbm_mcp_server_store(cbm_mcp_server_t *srv); +/* Get the session state from a server (for testing). Returns NULL if uninitialized. */ +struct cbm_session_state *cbm_mcp_server_session(cbm_mcp_server_t *srv); + /* Set the project name associated with the server's current store (for test setup). * This prevents resolve_store() from trying to open a .db file when tools specify a project. */ void cbm_mcp_server_set_project(cbm_mcp_server_t *srv, const char *project); diff --git a/src/mcp/session.c b/src/mcp/session.c new file mode 100644 index 00000000..b389480f --- /dev/null +++ b/src/mcp/session.c @@ -0,0 +1,225 @@ +/* + * session.c — Ephemeral session memory implementation (Phase 7A). + * + * Uses CBMHashTable as sets (value = sentinel, key = strdup'd string). + * Keys are owned by the session and freed on session_free(). + */ +#include "mcp/session.h" +#include "foundation/hash_table.h" + +#include +#include + +#define SESSION_SET_INITIAL_CAP 32 +#define SET_SENTINEL ((void *)1) + +struct cbm_session_state { + CBMHashTable *files_read; + CBMHashTable *files_edited; + CBMHashTable *symbols_queried; + CBMHashTable *areas_explored; + CBMHashTable *impact_analyses; + int query_count; + time_t start_time; +}; + +/* ── Internal helpers ──────────────────────────────────────────── */ + +/* Callback for cbm_ht_foreach: free the strdup'd key. */ +static void free_key_cb(const char *key, void *value, void *userdata) { + (void)value; + (void)userdata; + free((void *)key); +} + +/* Insert a key into a hash-table set if not already present. */ +static void set_add(CBMHashTable *ht, const char *key) { + if (!ht || !key) { + return; + } + if (cbm_ht_has(ht, key)) { + return; + } + char *owned = strdup(key); + if (owned) { + cbm_ht_set(ht, owned, SET_SENTINEL); + } +} + +/* Free all owned keys then free the table itself. */ +static void set_free(CBMHashTable *ht) { + if (!ht) { + return; + } + cbm_ht_foreach(ht, free_key_cb, NULL); + cbm_ht_free(ht); +} + +/* Adapter: wraps cbm_session_iter_fn for cbm_ht_foreach. */ +typedef struct { + cbm_session_iter_fn fn; + void *userdata; +} iter_adapter_t; + +static void iter_adapter_cb(const char *key, void *value, void *userdata) { + (void)value; + iter_adapter_t *a = (iter_adapter_t *)userdata; + a->fn(key, a->userdata); +} + +static void set_foreach(const CBMHashTable *ht, cbm_session_iter_fn fn, void *ud) { + if (!ht || !fn) { + return; + } + iter_adapter_t adapter = {.fn = fn, .userdata = ud}; + cbm_ht_foreach(ht, iter_adapter_cb, &adapter); +} + +/* ── Lifecycle ─────────────────────────────────────────────────── */ + +cbm_session_state_t *cbm_session_create(void) { + cbm_session_state_t *s = calloc(1, sizeof(*s)); + if (!s) { + return NULL; + } + s->files_read = cbm_ht_create(SESSION_SET_INITIAL_CAP); + s->files_edited = cbm_ht_create(SESSION_SET_INITIAL_CAP); + s->symbols_queried = cbm_ht_create(SESSION_SET_INITIAL_CAP); + s->areas_explored = cbm_ht_create(SESSION_SET_INITIAL_CAP); + s->impact_analyses = cbm_ht_create(SESSION_SET_INITIAL_CAP); + if (!s->files_read || !s->files_edited || !s->symbols_queried || !s->areas_explored || + !s->impact_analyses) { + cbm_session_free(s); + return NULL; + } + s->start_time = time(NULL); + return s; +} + +void cbm_session_free(cbm_session_state_t *s) { + if (!s) { + return; + } + set_free(s->files_read); + set_free(s->files_edited); + set_free(s->symbols_queried); + set_free(s->areas_explored); + set_free(s->impact_analyses); + free(s); +} + +/* ── Tracking ──────────────────────────────────────────────────── */ + +void cbm_session_track_file_read(cbm_session_state_t *s, const char *path) { + if (s) { + set_add(s->files_read, path); + } +} + +void cbm_session_track_file_edited(cbm_session_state_t *s, const char *path) { + if (s) { + set_add(s->files_edited, path); + } +} + +void cbm_session_track_symbol(cbm_session_state_t *s, const char *name) { + if (s) { + set_add(s->symbols_queried, name); + } +} + +void cbm_session_track_area(cbm_session_state_t *s, const char *keyword) { + if (s) { + set_add(s->areas_explored, keyword); + } +} + +void cbm_session_track_impact(cbm_session_state_t *s, const char *symbol) { + if (s) { + set_add(s->impact_analyses, symbol); + } +} + +void cbm_session_bump_query_count(cbm_session_state_t *s) { + if (s) { + s->query_count++; + } +} + +/* ── Counts ────────────────────────────────────────────────────── */ + +int cbm_session_query_count(const cbm_session_state_t *s) { + return s ? s->query_count : 0; +} + +int cbm_session_files_read_count(const cbm_session_state_t *s) { + return s ? (int)cbm_ht_count(s->files_read) : 0; +} + +int cbm_session_files_edited_count(const cbm_session_state_t *s) { + return s ? (int)cbm_ht_count(s->files_edited) : 0; +} + +int cbm_session_symbols_count(const cbm_session_state_t *s) { + return s ? (int)cbm_ht_count(s->symbols_queried) : 0; +} + +int cbm_session_areas_count(const cbm_session_state_t *s) { + return s ? (int)cbm_ht_count(s->areas_explored) : 0; +} + +int cbm_session_impacts_count(const cbm_session_state_t *s) { + return s ? (int)cbm_ht_count(s->impact_analyses) : 0; +} + +time_t cbm_session_start_time(const cbm_session_state_t *s) { + return s ? s->start_time : 0; +} + +/* ── Membership checks ─────────────────────────────────────────── */ + +bool cbm_session_has_file_read(const cbm_session_state_t *s, const char *path) { + return s && path && cbm_ht_has(s->files_read, path); +} + +bool cbm_session_has_file_edited(const cbm_session_state_t *s, const char *path) { + return s && path && cbm_ht_has(s->files_edited, path); +} + +bool cbm_session_has_symbol(const cbm_session_state_t *s, const char *name) { + return s && name && cbm_ht_has(s->symbols_queried, name); +} + +/* ── Iteration ─────────────────────────────────────────────────── */ + +void cbm_session_foreach_file_read(const cbm_session_state_t *s, cbm_session_iter_fn fn, + void *ud) { + if (s) { + set_foreach(s->files_read, fn, ud); + } +} + +void cbm_session_foreach_file_edited(const cbm_session_state_t *s, cbm_session_iter_fn fn, + void *ud) { + if (s) { + set_foreach(s->files_edited, fn, ud); + } +} + +void cbm_session_foreach_symbol(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud) { + if (s) { + set_foreach(s->symbols_queried, fn, ud); + } +} + +void cbm_session_foreach_area(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud) { + if (s) { + set_foreach(s->areas_explored, fn, ud); + } +} + +void cbm_session_foreach_impact(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud) { + if (s) { + set_foreach(s->impact_analyses, fn, ud); + } +} diff --git a/src/mcp/session.h b/src/mcp/session.h new file mode 100644 index 00000000..a440739a --- /dev/null +++ b/src/mcp/session.h @@ -0,0 +1,58 @@ +/* + * session.h — Ephemeral session memory for MCP server (Phase 7A). + * + * Tracks files read/edited, symbols queried, areas explored, and impact + * analyses run during a single Claude Code session. State lives in memory + * only — not persisted to SQLite — and resets on MCP server restart. + */ +#ifndef CBM_SESSION_H +#define CBM_SESSION_H + +#include +#include + +/* Opaque session state handle. */ +typedef struct cbm_session_state cbm_session_state_t; + +/* Iterator callback: receives (key, userdata) for each entry in a set. */ +typedef void (*cbm_session_iter_fn)(const char *key, void *userdata); + +/* ── Lifecycle ─────────────────────────────────────────────────── */ + +cbm_session_state_t *cbm_session_create(void); +void cbm_session_free(cbm_session_state_t *s); + +/* ── Tracking ──────────────────────────────────────────────────── */ + +void cbm_session_track_file_read(cbm_session_state_t *s, const char *path); +void cbm_session_track_file_edited(cbm_session_state_t *s, const char *path); +void cbm_session_track_symbol(cbm_session_state_t *s, const char *name); +void cbm_session_track_area(cbm_session_state_t *s, const char *keyword); +void cbm_session_track_impact(cbm_session_state_t *s, const char *symbol); +void cbm_session_bump_query_count(cbm_session_state_t *s); + +/* ── Counts ────────────────────────────────────────────────────── */ + +int cbm_session_query_count(const cbm_session_state_t *s); +int cbm_session_files_read_count(const cbm_session_state_t *s); +int cbm_session_files_edited_count(const cbm_session_state_t *s); +int cbm_session_symbols_count(const cbm_session_state_t *s); +int cbm_session_areas_count(const cbm_session_state_t *s); +int cbm_session_impacts_count(const cbm_session_state_t *s); +time_t cbm_session_start_time(const cbm_session_state_t *s); + +/* ── Membership checks ─────────────────────────────────────────── */ + +bool cbm_session_has_file_read(const cbm_session_state_t *s, const char *path); +bool cbm_session_has_file_edited(const cbm_session_state_t *s, const char *path); +bool cbm_session_has_symbol(const cbm_session_state_t *s, const char *name); + +/* ── Iteration ─────────────────────────────────────────────────── */ + +void cbm_session_foreach_file_read(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud); +void cbm_session_foreach_file_edited(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud); +void cbm_session_foreach_symbol(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud); +void cbm_session_foreach_area(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud); +void cbm_session_foreach_impact(const cbm_session_state_t *s, cbm_session_iter_fn fn, void *ud); + +#endif /* CBM_SESSION_H */ diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 62c21f0a..65277430 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -6,6 +6,7 @@ #include "../src/foundation/compat.h" #include "test_framework.h" #include +#include #include #include #include @@ -2637,6 +2638,214 @@ TEST(mcp_server_run_rapid_messages) { } #endif /* !_WIN32 */ +/* ══════════════════════════════════════════════════════════════════ + * SESSION MEMORY (Phase 7A) + * ══════════════════════════════════════════════════════════════════ */ + +TEST(session_create_free) { + cbm_session_state_t *s = cbm_session_create(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_session_query_count(s), 0); + ASSERT_EQ(cbm_session_files_read_count(s), 0); + ASSERT_EQ(cbm_session_files_edited_count(s), 0); + ASSERT_EQ(cbm_session_symbols_count(s), 0); + ASSERT_EQ(cbm_session_areas_count(s), 0); + ASSERT_EQ(cbm_session_impacts_count(s), 0); + ASSERT_TRUE(cbm_session_start_time(s) > 0); + cbm_session_free(s); + PASS(); +} + +TEST(session_track_dedup) { + cbm_session_state_t *s = cbm_session_create(); + ASSERT_NOT_NULL(s); + + /* Same path twice → count 1 */ + cbm_session_track_file_read(s, "app/services/OrderService.php"); + cbm_session_track_file_read(s, "app/services/OrderService.php"); + ASSERT_EQ(cbm_session_files_read_count(s), 1); + + /* Different path → count 2 */ + cbm_session_track_file_read(s, "app/models/Order.php"); + ASSERT_EQ(cbm_session_files_read_count(s), 2); + + /* Check membership */ + ASSERT_TRUE(cbm_session_has_file_read(s, "app/services/OrderService.php")); + ASSERT_TRUE(cbm_session_has_file_read(s, "app/models/Order.php")); + ASSERT_FALSE(cbm_session_has_file_read(s, "nonexistent.php")); + + cbm_session_free(s); + PASS(); +} + +TEST(session_track_all_types) { + cbm_session_state_t *s = cbm_session_create(); + ASSERT_NOT_NULL(s); + + cbm_session_track_file_read(s, "file1.php"); + cbm_session_track_file_edited(s, "file2.php"); + cbm_session_track_symbol(s, "processOrder"); + cbm_session_track_area(s, "payment"); + cbm_session_track_impact(s, "calculateTax"); + cbm_session_bump_query_count(s); + cbm_session_bump_query_count(s); + + ASSERT_EQ(cbm_session_files_read_count(s), 1); + ASSERT_EQ(cbm_session_files_edited_count(s), 1); + ASSERT_EQ(cbm_session_symbols_count(s), 1); + ASSERT_EQ(cbm_session_areas_count(s), 1); + ASSERT_EQ(cbm_session_impacts_count(s), 1); + ASSERT_EQ(cbm_session_query_count(s), 2); + + ASSERT_TRUE(cbm_session_has_file_edited(s, "file2.php")); + ASSERT_TRUE(cbm_session_has_symbol(s, "processOrder")); + + cbm_session_free(s); + PASS(); +} + +TEST(session_null_safety) { + /* All functions should be safe with NULL session */ + cbm_session_track_file_read(NULL, "x"); + cbm_session_track_symbol(NULL, "x"); + cbm_session_bump_query_count(NULL); + ASSERT_EQ(cbm_session_query_count(NULL), 0); + ASSERT_EQ(cbm_session_files_read_count(NULL), 0); + ASSERT_FALSE(cbm_session_has_symbol(NULL, "x")); + cbm_session_free(NULL); + + /* NULL key should not crash */ + cbm_session_state_t *s = cbm_session_create(); + cbm_session_track_file_read(s, NULL); + cbm_session_track_symbol(s, NULL); + ASSERT_EQ(cbm_session_files_read_count(s), 0); + ASSERT_EQ(cbm_session_symbols_count(s), 0); + cbm_session_free(s); + PASS(); +} + +TEST(get_session_context_empty) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "get_session_context", "{}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + + /* Should have query_count:0 (or 0 from the get_session_context call itself) */ + ASSERT_NOT_NULL(strstr(text, "\"query_count\":")); + ASSERT_NOT_NULL(strstr(text, "\"files_read\":[]")); + ASSERT_NOT_NULL(strstr(text, "\"symbols_queried\":[]")); + ASSERT_NOT_NULL(strstr(text, "\"areas_explored\":[]")); + ASSERT_NOT_NULL(strstr(text, "\"related_untouched\":[]")); + + free(text); + free(raw); + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(get_session_context_after_tools) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* Call explore → should track area */ + char *r1 = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\"}"); + free(r1); + + /* Call understand → should track symbol */ + char *r2 = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + free(r2); + + /* Call get_session_context → verify accumulated state */ + char *raw = cbm_mcp_handle_tool(srv, "get_session_context", + "{\"project\":\"impact\",\"include_related\":true}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + + /* Verify query count ≥ 2 (explore + understand) */ + ASSERT_NOT_NULL(strstr(text, "\"query_count\":")); + /* "Order" should be in areas_explored */ + ASSERT_NOT_NULL(strstr(text, "\"Order\"")); + /* "ProcessOrder" should be in symbols_queried */ + ASSERT_NOT_NULL(strstr(text, "\"ProcessOrder\"")); + /* related_untouched should be present (may be empty if no neighbors outside touched set) */ + ASSERT_NOT_NULL(strstr(text, "\"related_untouched\"")); + + free(text); + free(raw); + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(get_session_context_no_project) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* Call a tool first to populate session */ + char *r1 = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + free(r1); + + /* Call get_session_context without project → should return state but no related_untouched */ + char *raw = cbm_mcp_handle_tool(srv, "get_session_context", "{}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + + ASSERT_NOT_NULL(strstr(text, "\"ProcessOrder\"")); + /* related_untouched should be empty array (no project to query) */ + ASSERT_NOT_NULL(strstr(text, "\"related_untouched\":[]")); + + free(text); + free(raw); + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_accumulates_across_tools) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* Call explore */ + char *r1 = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\"}"); + free(r1); + + /* Call understand */ + char *r2 = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"HandleOrder\"}"); + free(r2); + + /* Call get_impact_analysis */ + char *r3 = cbm_mcp_handle_tool(srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + free(r3); + + /* Verify session accumulated everything */ + cbm_session_state_t *ss = cbm_mcp_server_session(srv); + ASSERT_NOT_NULL(ss); + ASSERT_TRUE(cbm_session_query_count(ss) >= 3); + ASSERT_TRUE(cbm_session_areas_count(ss) >= 1); + ASSERT_TRUE(cbm_session_symbols_count(ss) >= 1); /* HandleOrder at minimum */ + ASSERT_TRUE(cbm_session_impacts_count(ss) >= 1); /* ProcessOrder */ + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(get_session_context_tools_list_includes_tool) { + char *tools_json = cbm_mcp_tools_list(); + ASSERT_NOT_NULL(tools_json); + ASSERT_NOT_NULL(strstr(tools_json, "get_session_context")); + free(tools_json); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -2798,4 +3007,15 @@ SUITE(mcp) { RUN_TEST(snippet_auto_resolve_enabled); RUN_TEST(snippet_include_neighbors_default); RUN_TEST(snippet_include_neighbors_enabled); + + /* Session memory (Phase 7A) */ + RUN_TEST(session_create_free); + RUN_TEST(session_track_dedup); + RUN_TEST(session_track_all_types); + RUN_TEST(session_null_safety); + RUN_TEST(get_session_context_empty); + RUN_TEST(get_session_context_after_tools); + RUN_TEST(get_session_context_no_project); + RUN_TEST(session_accumulates_across_tools); + RUN_TEST(get_session_context_tools_list_includes_tool); } From a8f5d7d8743350b1c56f872e8a55101277c64e7b Mon Sep 17 00:00:00 2001 From: maplenk Date: Thu, 26 Mar 2026 10:46:27 +0530 Subject: [PATCH 12/14] Add proactive session hints to tool responses (Phase 7B) Enrich search_graph, explore, understand, prepare_change, and get_impact_analysis responses with session-aware hints when actionable: - Area re-exploration overlap with new/already-seen result counts - Symbol re-query detection with unexamined neighbor suggestions - Blast radius overlap with already-edited files warning - High-PageRank untouched symbol suggestion after 10+ queries Hints are budget-aware (skipped if response near max_tokens) and non-breaking (optional session_hint string field in JSON response). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp/mcp.c | 341 +++++++++++++++++++++++++++++++++++++++++++++- src/mcp/session.c | 4 + src/mcp/session.h | 1 + tests/test_mcp.c | 118 ++++++++++++++++ 4 files changed, 462 insertions(+), 2 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index bfca1e4b..48194fd9 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1132,6 +1132,16 @@ static cbm_session_state_t *ensure_session(cbm_mcp_server_t *srv) { return srv->session; } +/* Forward declarations for Phase 7B session hint builders (defined later in file). + * Note: build_understand_session_hint is declared later, after connected_symbol_t typedef. */ +static char *build_search_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, const cbm_search_output_t *out); +static char *build_impact_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, + const cbm_impact_analysis_t *impact); +static void maybe_add_session_hint(yyjson_mut_doc *doc, yyjson_mut_val *root, const char *hint, + size_t char_budget, size_t *used); + void cbm_mcp_server_set_project(cbm_mcp_server_t *srv, const char *project) { if (!srv) { return; @@ -1557,6 +1567,9 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { cbm_search_output_t out = {0}; cbm_store_search(store, ¶ms, &out); + /* Session: compute hint BEFORE tracking (so we can detect "already seen") */ + char *session_hint = build_search_session_hint(srv, store, project, &out); + /* Session tracking: record top result names as queried symbols (cap 20) */ { cbm_session_state_t *ss = ensure_session(srv); @@ -1582,6 +1595,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_val(doc, root, "results", results); yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + out.count); + maybe_add_session_hint(doc, root, session_hint, 0, NULL); char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -1622,12 +1636,14 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_val(doc, root, "results", results); yyjson_mut_obj_add_int(doc, root, "shown", shown); yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + shown); + maybe_add_session_hint(doc, root, session_hint, char_budget, &used); json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); } cbm_store_search_free(&out); + free(session_hint); free(project); free(label); free(name_pattern); @@ -2076,6 +2092,10 @@ static char *handle_get_impact_analysis(cbm_mcp_server_t *srv, const char *args) impact.risk_score ? impact.risk_score : ""); yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + /* Session hint (Phase 7B) */ + char *impact_hint = build_impact_session_hint(srv, store, project, &impact); + maybe_add_session_hint(doc, root, impact_hint, 0, NULL); + char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -2136,11 +2156,13 @@ static char *handle_get_impact_analysis(cbm_mcp_server_t *srv, const char *args) impact.risk_score ? impact.risk_score : ""); yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); yyjson_mut_obj_add_int(doc, root, "shown", shown); + maybe_add_session_hint(doc, root, impact_hint, char_budget, &used); json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); } + free(impact_hint); free(summary_text); cbm_store_impact_analysis_free(&impact); free(project); @@ -3144,6 +3166,18 @@ typedef struct { int test_count; } review_scope_t; +/* Forward declarations for Phase 7B hint builders that depend on above typedefs. */ +static char *build_explore_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, const char *area, + bool was_area_explored, + const cbm_search_result_t *const *matches, + int match_count); +static char *build_understand_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, const char *symbol, + bool was_already_queried, + const connected_symbol_t *connected, + int connected_count); + static bool compound_is_symbol_label(const char *label) { return label && (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0 || strcmp(label, "Class") == 0); @@ -3922,7 +3956,8 @@ static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { return not_indexed; } - /* Session tracking */ + /* Session tracking: check area overlap BEFORE tracking */ + bool was_area_explored = cbm_session_has_area(ensure_session(srv), area); { cbm_session_state_t *ss = ensure_session(srv); cbm_session_bump_query_count(ss); @@ -3937,6 +3972,7 @@ static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { const cbm_search_result_t **matches = NULL; int match_count = 0; int match_cap = 0; + char *explore_hint = NULL; explore_dependency_t *deps = NULL; int dep_count = 0; cbm_key_symbol_t *hotspots = NULL; @@ -4032,6 +4068,11 @@ static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { } yyjson_mut_obj_add_val(doc, root, "entry_points", entry_arr); + /* Compute session hint (Phase 7B) */ + explore_hint = + build_explore_session_hint(srv, store, project, area, was_area_explored, matches, match_count); + maybe_add_session_hint(doc, root, explore_hint, 0, NULL); + char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -4122,6 +4163,7 @@ static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { } yyjson_mut_obj_add_val(doc, root, "entry_points", entry_arr); yyjson_mut_obj_add_int(doc, root, "shown", shown); + maybe_add_session_hint(doc, root, explore_hint, char_budget, &used); json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -4131,6 +4173,7 @@ static char *handle_explore(cbm_mcp_server_t *srv, const char *args) { free(json); cleanup_explore: + free(explore_hint); free(contains_regex); free(file_glob); free(matches); @@ -4174,7 +4217,8 @@ static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { return not_indexed; } - /* Session tracking */ + /* Session tracking: check BEFORE tracking for hint */ + bool was_already_queried = cbm_session_has_symbol(ensure_session(srv), symbol); { cbm_session_state_t *ss = ensure_session(srv); cbm_session_bump_query_count(ss); @@ -4194,6 +4238,7 @@ static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { cbm_traverse_result_t callers = {0}; cbm_traverse_result_t callees = {0}; connected_symbol_t *connected = NULL; + char *understand_hint = NULL; int connected_count = 0; bool is_key_symbol = false; char *source = NULL; @@ -4314,6 +4359,11 @@ static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { } yyjson_mut_obj_add_val(doc, root, "connected_symbols", connected_arr); + /* Compute session hint (Phase 7B) */ + understand_hint = build_understand_session_hint(srv, store, project, symbol, + was_already_queried, connected, connected_count); + maybe_add_session_hint(doc, root, understand_hint, 0, NULL); + char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -4432,6 +4482,7 @@ static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { } yyjson_mut_obj_add_val(doc, root, "connected_symbols", connected_arr); yyjson_mut_obj_add_int(doc, root, "shown", shown); + maybe_add_session_hint(doc, root, understand_hint, char_budget, &used); json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -4441,6 +4492,7 @@ static char *handle_understand(cbm_mcp_server_t *srv, const char *args) { free(json); cleanup_understand: + free(understand_hint); free(project); free(symbol); free(source); @@ -4512,6 +4564,7 @@ static char *handle_prepare_change(cbm_mcp_server_t *srv, const char *args) { size_t char_budget = max_tokens_to_char_budget(max_tokens); cbm_store_t *store = resolve_store(srv, project); char *result = NULL; + char *pc_hint = NULL; if (!symbol || !symbol[0]) { free(project); @@ -4611,6 +4664,10 @@ static char *handle_prepare_change(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_str(doc, root, "risk_score", impact.risk_score ? impact.risk_score : ""); yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); + /* Session hint (Phase 7B) */ + pc_hint = build_impact_session_hint(srv, store, project, &impact); + maybe_add_session_hint(doc, root, pc_hint, 0, NULL); + char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -4740,6 +4797,7 @@ static char *handle_prepare_change(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_str(doc, root, "risk_score", impact.risk_score ? impact.risk_score : ""); yyjson_mut_obj_add_str(doc, root, "summary", summary_text ? summary_text : ""); yyjson_mut_obj_add_int(doc, root, "shown", shown); + maybe_add_session_hint(doc, root, pc_hint, char_budget, &used); json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); @@ -4749,6 +4807,7 @@ static char *handle_prepare_change(cbm_mcp_server_t *srv, const char *args) { free(json); cleanup_prepare_change: + free(pc_hint); free(summary_text); review_scope_free(&scope); cbm_store_impact_analysis_free(&impact); @@ -5627,6 +5686,284 @@ static char *handle_ingest_traces(cbm_mcp_server_t *srv, const char *args) { return result; } +/* ── Session hints (Phase 7B) ──────────────────────────────────── */ + +#define SESSION_HINT_MAX_CHARS 800 +#define SESSION_HINT_QUERY_THRESHOLD 10 + +/* Safe snprintf offset update: clamp to buffer size to prevent overflow. */ +#define HINT_SNPRINTF(buf, bufsz, offvar, ...) \ + do { \ + if ((offvar) < (int)(bufsz)) { \ + int _ret = snprintf((buf) + (offvar), (bufsz) - (size_t)(offvar), __VA_ARGS__); \ + if (_ret > 0) (offvar) += _ret; \ + if ((offvar) >= (int)(bufsz)) (offvar) = (int)(bufsz) - 1; \ + } \ + } while (0) + +/* Append a global "high-PageRank untouched" suggestion to buf. + * Returns chars written (0 if nothing to say). */ +static int append_global_pagerank_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, char *buf, int off, int bufsz) { + cbm_session_state_t *ss = srv->session; + if (!ss || !store || !project || off >= bufsz) { + return 0; + } + int qc = cbm_session_query_count(ss); + if (qc <= SESSION_HINT_QUERY_THRESHOLD) { + return 0; + } + + cbm_key_symbol_t *syms = NULL; + int count = 0; + cbm_store_get_key_symbols(store, project, NULL, 20, &syms, &count); + + int start_off = off; + for (int i = 0; i < count; i++) { + if (syms[i].name && !cbm_session_has_symbol(ss, syms[i].name) && + !cbm_session_has_file_read(ss, syms[i].file_path)) { + HINT_SNPRINTF(buf, (size_t)bufsz, off, + "%sYou've made %d queries but haven't examined %s" + " (PageRank #%d, %d inbound calls).", + start_off > 0 ? " " : "", qc, syms[i].name, i + 1, + syms[i].in_degree); + break; + } + } + + cbm_store_key_symbols_free(syms, count); + return off - start_off; +} + +/* Build hint for explore/search when an area was already explored. + * Returns heap string or NULL. Caller frees. */ +static char *build_explore_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, const char *area, + bool was_area_explored, + const cbm_search_result_t *const *matches, + int match_count) { + cbm_session_state_t *ss = srv->session; + if (!ss) { + return NULL; + } + + char buf[SESSION_HINT_MAX_CHARS]; + buf[0] = '\0'; + int off = 0; + + if (was_area_explored) { + /* Count new vs already-seen results */ + int new_count = 0; + char new_names[400]; + new_names[0] = '\0'; + int noff = 0; + for (int i = 0; i < match_count && new_count < 5; i++) { + if (matches[i]->node.name && !cbm_session_has_symbol(ss, matches[i]->node.name)) { + if (noff > 0) { + HINT_SNPRINTF(new_names, sizeof(new_names), noff, ", "); + } + HINT_SNPRINTF(new_names, sizeof(new_names), noff, "%s", + matches[i]->node.name); + new_count++; + } + } + if (new_count > 0) { + HINT_SNPRINTF(buf, sizeof(buf), off, + "You previously explored '%s'. New results not seen before: %s.", + area, new_names); + } else { + HINT_SNPRINTF(buf, sizeof(buf), off, + "You previously explored '%s'. All results were already examined.", + area); + } + } + + off += append_global_pagerank_hint(srv, store, project, buf, off, (int)sizeof(buf)); + + if (off == 0) { + return NULL; + } + return strdup(buf); +} + +/* Build hint for understand when symbol was already queried. + * Returns heap string or NULL. Caller frees. */ +static char *build_understand_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, const char *symbol, + bool was_already_queried, + const connected_symbol_t *connected, + int connected_count) { + cbm_session_state_t *ss = srv->session; + if (!ss) { + return NULL; + } + + char buf[SESSION_HINT_MAX_CHARS]; + buf[0] = '\0'; + int off = 0; + + if (was_already_queried) { + HINT_SNPRINTF(buf, sizeof(buf), off, + "You already queried '%s' earlier in this session.", symbol); + } + + /* Find connected symbols not yet examined */ + int unseen_count = 0; + char unseen_names[400]; + unseen_names[0] = '\0'; + int noff = 0; + for (int i = 0; i < connected_count && unseen_count < 5; i++) { + if (connected[i].node.name && !cbm_session_has_symbol(ss, connected[i].node.name)) { + if (noff > 0) { + HINT_SNPRINTF(unseen_names, sizeof(unseen_names), noff, ", "); + } + HINT_SNPRINTF(unseen_names, sizeof(unseen_names), noff, "%s", + connected[i].node.name); + unseen_count++; + } + } + if (unseen_count > 0) { + HINT_SNPRINTF(buf, sizeof(buf), off, + "%sRelated symbols not yet examined: %s.", off > 0 ? " " : "", + unseen_names); + } + + off += append_global_pagerank_hint(srv, store, project, buf, off, (int)sizeof(buf)); + + if (off == 0) { + return NULL; + } + return strdup(buf); +} + +/* Build hint for prepare_change / get_impact_analysis when blast radius + * overlaps with already-edited files. + * Returns heap string or NULL. Caller frees. */ +static char *build_impact_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, + const cbm_impact_analysis_t *impact) { + cbm_session_state_t *ss = srv->session; + if (!ss || !impact) { + return NULL; + } + + char buf[SESSION_HINT_MAX_CHARS]; + buf[0] = '\0'; + int off = 0; + + /* Check if any files in the blast radius were already edited. + * Track seen files by pointer to avoid strstr false positives. */ + const char *seen_files[5]; + int edited_count = 0; + char edited_files[400]; + edited_files[0] = '\0'; + int eoff = 0; + + /* Check the target symbol's file */ + if (impact->file && cbm_session_has_file_edited(ss, impact->file)) { + HINT_SNPRINTF(edited_files, sizeof(edited_files), eoff, "%s", impact->file); + seen_files[edited_count++] = impact->file; + } + + /* Check direct/indirect/transitive impact items */ + const struct { + const cbm_impact_item_t *items; + int count; + } groups[] = { + {impact->direct, impact->direct_count}, + {impact->indirect, impact->indirect_count}, + {impact->transitive, impact->transitive_count}, + }; + for (int g = 0; g < 3 && edited_count < 5; g++) { + for (int i = 0; i < groups[g].count && edited_count < 5; i++) { + const char *file = groups[g].items[i].file; + if (file && cbm_session_has_file_edited(ss, file)) { + /* Exact dedup against already-seen files */ + bool dup = false; + for (int s = 0; s < edited_count; s++) { + if (strcmp(seen_files[s], file) == 0) { + dup = true; + break; + } + } + if (!dup) { + if (eoff > 0) { + HINT_SNPRINTF(edited_files, sizeof(edited_files), eoff, ", "); + } + HINT_SNPRINTF(edited_files, sizeof(edited_files), eoff, "%s", file); + seen_files[edited_count++] = file; + } + } + } + } + + if (edited_count > 0) { + HINT_SNPRINTF(buf, sizeof(buf), off, + "Warning: %s %s already edited this session.", + edited_files, edited_count > 1 ? "were" : "was"); + } + + off += append_global_pagerank_hint(srv, store, project, buf, off, (int)sizeof(buf)); + + if (off == 0) { + return NULL; + } + return strdup(buf); +} + +/* Build hint for search_graph: note how many results were already examined. + * Returns heap string or NULL. Caller frees. */ +static char *build_search_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store, + const char *project, const cbm_search_output_t *out) { + cbm_session_state_t *ss = srv->session; + if (!ss || !out || out->count == 0) { + return NULL; + } + + char buf[SESSION_HINT_MAX_CHARS]; + int off = 0; + + /* Count already-seen results */ + int seen = 0; + for (int i = 0; i < out->count; i++) { + if (out->results[i].node.name && cbm_session_has_symbol(ss, out->results[i].node.name)) { + seen++; + } + } + if (seen > 0 && seen < out->count) { + off += snprintf(buf, sizeof(buf), + "%d of %d results were already examined this session.", seen, out->count); + } else if (seen > 0 && seen == out->count) { + off += snprintf(buf, sizeof(buf), + "All %d results were already examined this session.", out->count); + } + + off += append_global_pagerank_hint(srv, store, project, buf, off, (int)sizeof(buf)); + + if (off == 0) { + return NULL; + } + return strdup(buf); +} + +/* Inject session_hint into JSON doc if hint is non-NULL and fits the budget. + * For full-build (no budget): pass char_budget=0 and used=NULL. + * For truncated build: pass the tracked budget. */ +static void maybe_add_session_hint(yyjson_mut_doc *doc, yyjson_mut_val *root, const char *hint, + size_t char_budget, size_t *used) { + if (!hint || !hint[0]) { + return; + } + size_t hint_len = strlen(hint); + if (used && char_budget > 0 && (*used + hint_len + 30 > char_budget)) { + return; /* skip — would blow the token budget */ + } + yyjson_mut_obj_add_strcpy(doc, root, "session_hint", hint); + if (used) { + *used += hint_len + 30; + } +} + /* ── Session context (Phase 7A) ────────────────────────────────── */ /* Callback: free a strdup'd hash table key (for temporary candidate sets). */ diff --git a/src/mcp/session.c b/src/mcp/session.c index b389480f..eac74eb2 100644 --- a/src/mcp/session.c +++ b/src/mcp/session.c @@ -186,6 +186,10 @@ bool cbm_session_has_file_edited(const cbm_session_state_t *s, const char *path) return s && path && cbm_ht_has(s->files_edited, path); } +bool cbm_session_has_area(const cbm_session_state_t *s, const char *keyword) { + return s && keyword && cbm_ht_has(s->areas_explored, keyword); +} + bool cbm_session_has_symbol(const cbm_session_state_t *s, const char *name) { return s && name && cbm_ht_has(s->symbols_queried, name); } diff --git a/src/mcp/session.h b/src/mcp/session.h index a440739a..86ce3431 100644 --- a/src/mcp/session.h +++ b/src/mcp/session.h @@ -46,6 +46,7 @@ time_t cbm_session_start_time(const cbm_session_state_t *s); bool cbm_session_has_file_read(const cbm_session_state_t *s, const char *path); bool cbm_session_has_file_edited(const cbm_session_state_t *s, const char *path); bool cbm_session_has_symbol(const cbm_session_state_t *s, const char *name); +bool cbm_session_has_area(const cbm_session_state_t *s, const char *keyword); /* ── Iteration ─────────────────────────────────────────────────── */ diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 65277430..0bd48b92 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -2846,6 +2846,117 @@ TEST(get_session_context_tools_list_includes_tool) { PASS(); } +/* ══════════════════════════════════════════════════════════════════ + * SESSION HINTS (Phase 7B) + * ══════════════════════════════════════════════════════════════════ */ + +TEST(session_hint_explore_area_overlap) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* First explore — should NOT have session_hint about prior exploration */ + char *r1 = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\"}"); + char *t1 = extract_text_content(r1); + ASSERT_NOT_NULL(t1); + ASSERT_NULL(strstr(t1, "\"session_hint\"")); + free(t1); + free(r1); + + /* Second explore same area — should have overlap hint */ + char *r2 = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\"}"); + char *t2 = extract_text_content(r2); + ASSERT_NOT_NULL(t2); + ASSERT_NOT_NULL(strstr(t2, "\"session_hint\"")); + ASSERT_NOT_NULL(strstr(t2, "previously explored")); + free(t2); + free(r2); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_hint_understand_already_queried) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* First understand */ + char *r1 = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + free(r1); + + /* Second understand same symbol — should have "already queried" hint */ + char *r2 = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + char *t2 = extract_text_content(r2); + ASSERT_NOT_NULL(t2); + ASSERT_NOT_NULL(strstr(t2, "\"session_hint\"")); + ASSERT_NOT_NULL(strstr(t2, "already queried")); + free(t2); + free(r2); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_hint_prepare_change_edited_file) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* Trigger session creation via a tool call, then manually track file as edited */ + char *r0 = cbm_mcp_handle_tool(srv, "get_session_context", "{}"); + free(r0); + cbm_session_state_t *ss = cbm_mcp_server_session(srv); + ASSERT_NOT_NULL(ss); + cbm_session_track_file_edited(ss, "app/controllers/OrderController.php"); + + /* prepare_change for ProcessOrder — OrderController is in its blast radius */ + char *raw = cbm_mcp_handle_tool( + srv, "prepare_change", "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"session_hint\"")); + ASSERT_NOT_NULL(strstr(text, "already edited")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_hint_not_present_first_call) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* Fresh session: understand should NOT have session_hint (nothing to report) */ + char *raw = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + /* Might have "Related symbols not yet examined" which is valid, OR no hint at all. + * What we definitely should NOT have is "already queried" */ + ASSERT_NULL(strstr(text, "already queried")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_has_area_membership) { + cbm_session_state_t *s = cbm_session_create(); + ASSERT_NOT_NULL(s); + + ASSERT_FALSE(cbm_session_has_area(s, "payment")); + cbm_session_track_area(s, "payment"); + ASSERT_TRUE(cbm_session_has_area(s, "payment")); + ASSERT_FALSE(cbm_session_has_area(s, "inventory")); + + cbm_session_free(s); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -3018,4 +3129,11 @@ SUITE(mcp) { RUN_TEST(get_session_context_no_project); RUN_TEST(session_accumulates_across_tools); RUN_TEST(get_session_context_tools_list_includes_tool); + + /* Session hints (Phase 7B) */ + RUN_TEST(session_hint_explore_area_overlap); + RUN_TEST(session_hint_understand_already_queried); + RUN_TEST(session_hint_prepare_change_edited_file); + RUN_TEST(session_hint_not_present_first_call); + RUN_TEST(session_has_area_membership); } From 89caa5b6a8bdac0eb817c9d1ce0049b86a478cf6 Mon Sep 17 00:00:00 2001 From: maplenk Date: Thu, 26 Mar 2026 10:56:22 +0530 Subject: [PATCH 13/14] Fix build_search_session_hint: use HINT_SNPRINTF and init buffer Use HINT_SNPRINTF macro consistently across all hint builders and initialize buf[0] to match sibling functions (post-review P2 fixes). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp/mcp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 48194fd9..c13d108b 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -5921,6 +5921,7 @@ static char *build_search_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store } char buf[SESSION_HINT_MAX_CHARS]; + buf[0] = '\0'; int off = 0; /* Count already-seen results */ @@ -5931,11 +5932,11 @@ static char *build_search_session_hint(cbm_mcp_server_t *srv, cbm_store_t *store } } if (seen > 0 && seen < out->count) { - off += snprintf(buf, sizeof(buf), - "%d of %d results were already examined this session.", seen, out->count); + HINT_SNPRINTF(buf, sizeof(buf), off, + "%d of %d results were already examined this session.", seen, out->count); } else if (seen > 0 && seen == out->count) { - off += snprintf(buf, sizeof(buf), - "All %d results were already examined this session.", out->count); + HINT_SNPRINTF(buf, sizeof(buf), off, + "All %d results were already examined this session.", out->count); } off += append_global_pagerank_hint(srv, store, project, buf, off, (int)sizeof(buf)); From 8769d70359bb13775c2199906ad8a494c716ed0e Mon Sep 17 00:00:00 2001 From: maplenk Date: Thu, 26 Mar 2026 11:25:17 +0530 Subject: [PATCH 14/14] Add get_session_summary markdown tool for context recovery (Phase 7C) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New MCP tool returns a compact markdown summary of the session: files touched (read/edited), symbols investigated with PageRank enrichment, impact analyses run, areas explored, and suggested next steps (unexamined graph neighbors ranked by PageRank). Designed for context recovery after Claude Code compaction — call get_session_summary to instantly restore the full session picture. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp/mcp.c | 227 ++++++++++++++++++++++++++++++++++++++++++++++- tests/test_mcp.c | 91 +++++++++++++++++++ 2 files changed, 315 insertions(+), 3 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index c13d108b..002f8bdc 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -885,6 +885,15 @@ static const tool_def_t TOOLS[] = { "\"description\":\"Include graph neighbors of touched symbols that have not been " "examined yet.\"},\"limit\":{\"type\":\"integer\",\"default\":10," "\"description\":\"Max related_untouched items.\"}},\"required\":[]}"}, + + {"get_session_summary", + "Compact markdown session summary for context recovery after compaction. " + "Shows files touched, symbols investigated with PageRank, areas explored, " + "and suggested next steps.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"," + "\"description\":\"Project name (needed for PageRank enrichment and next-step " + "suggestions).\"},\"max_tokens\":{\"type\":\"integer\",\"default\":2000," + "\"description\":\"Maximum output size.\"}},\"required\":[]}"}, }; static const int TOOL_COUNT = sizeof(TOOLS) / sizeof(TOOLS[0]); @@ -5965,7 +5974,7 @@ static void maybe_add_session_hint(yyjson_mut_doc *doc, yyjson_mut_val *root, co } } -/* ── Session context (Phase 7A) ────────────────────────────────── */ +/* ── Session helpers (shared by 7A + 7C) ──────────────────────── */ /* Callback: free a strdup'd hash table key (for temporary candidate sets). */ static void free_ht_key_cb(const char *key, void *value, void *userdata) { @@ -5974,7 +5983,7 @@ static void free_ht_key_cb(const char *key, void *value, void *userdata) { free((void *)key); } -/* Callback: append key to a yyjson array. */ +/* Callback: append key to a yyjson array (used by get_session_context). */ typedef struct { yyjson_mut_doc *doc; yyjson_mut_val *arr; @@ -5985,7 +5994,7 @@ static void append_key_to_json_arr(const char *key, void *userdata) { yyjson_mut_arr_add_strcpy(ctx->doc, ctx->arr, key); } -/* Callback: collect symbol names into a list for related_untouched lookup. */ +/* Callback: collect symbol names into a list for neighbor lookup. */ typedef struct { const char **names; int count; @@ -5999,6 +6008,215 @@ static void collect_symbol_name(const char *key, void *userdata) { } } +/* ── Session summary (Phase 7C) ────────────────────────────────── */ + +/* Callback context for iterating session sets into markdown. */ +typedef struct { + markdown_builder_t *md; + int count; /* items emitted so far */ +} md_list_ctx_t; + +static void append_key_comma_separated(const char *key, void *userdata) { + md_list_ctx_t *ctx = (md_list_ctx_t *)userdata; + if (ctx->count > 0) { + (void)markdown_builder_append_raw(ctx->md, ", "); + } + (void)markdown_builder_append_raw(ctx->md, key); + ctx->count++; +} + +static void append_key_bullet(const char *key, void *userdata) { + md_list_ctx_t *ctx = (md_list_ctx_t *)userdata; + (void)markdown_builder_appendf(ctx->md, "- %s\n", key); + ctx->count++; +} + +static char *handle_get_session_summary(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + + cbm_session_state_t *ss = ensure_session(srv); + cbm_store_t *store = project ? resolve_store(srv, project) : NULL; + + size_t char_budget = max_tokens_to_char_budget(max_tokens); + markdown_builder_t md; + markdown_builder_init(&md, char_budget); + + /* ── Header ──────────────────────────────────────────────── */ + time_t start = cbm_session_start_time(ss); + time_t now = time(NULL); + int elapsed = (int)(now - start); + if (elapsed < 0) elapsed = 0; + int minutes = elapsed / 60; + int seconds = elapsed % 60; + int qc = cbm_session_query_count(ss); + + if (minutes > 0) { + (void)markdown_builder_appendf(&md, "## Session Summary (%d queries, %dm%ds)\n\n", + qc, minutes, seconds); + } else { + (void)markdown_builder_appendf(&md, "## Session Summary (%d queries, %ds)\n\n", + qc, seconds); + } + + /* ── Files touched ───────────────────────────────────────── */ + int read_count = cbm_session_files_read_count(ss); + int edited_count = cbm_session_files_edited_count(ss); + + if (read_count > 0 || edited_count > 0) { + (void)markdown_builder_append_raw(&md, "### Files touched\n"); + if (read_count > 0) { + (void)markdown_builder_append_raw(&md, "- **Read:** "); + md_list_ctx_t ctx = {.md = &md, .count = 0}; + cbm_session_foreach_file_read(ss, append_key_comma_separated, &ctx); + (void)markdown_builder_append_raw(&md, "\n"); + } + if (edited_count > 0) { + (void)markdown_builder_append_raw(&md, "- **Edited:** "); + md_list_ctx_t ctx = {.md = &md, .count = 0}; + cbm_session_foreach_file_edited(ss, append_key_comma_separated, &ctx); + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + /* ── Symbols investigated ────────────────────────────────── */ + int sym_count = cbm_session_symbols_count(ss); + int impact_count = cbm_session_impacts_count(ss); + + if (sym_count > 0 || impact_count > 0) { + (void)markdown_builder_append_raw(&md, "### Symbols investigated\n"); + + /* Collect queried symbol names */ + const char *sym_names[30]; + name_collector_t sc = {.names = sym_names, .count = 0, .cap = 30}; + cbm_session_foreach_symbol(ss, collect_symbol_name, &sc); + + for (int i = 0; i < sc.count; i++) { + const char *name = sc.names[i]; + + /* Look up PageRank if store available */ + if (store) { + cbm_key_symbol_t *ks = NULL; + int ks_count = 0; + cbm_store_get_key_symbols(store, project, name, 1, &ks, &ks_count); + if (ks_count > 0 && ks[0].name && strcmp(ks[0].name, name) == 0) { + (void)markdown_builder_appendf(&md, "- %s (%d callers, PageRank %.4f)", + name, ks[0].in_degree, ks[0].pagerank); + } else { + (void)markdown_builder_appendf(&md, "- %s", name); + } + cbm_store_key_symbols_free(ks, ks_count); + } else { + (void)markdown_builder_appendf(&md, "- %s", name); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + /* ── Impact analyses ─────────────────────────────────────── */ + if (impact_count > 0) { + (void)markdown_builder_append_raw(&md, "### Impact analyses run\n"); + md_list_ctx_t ctx = {.md = &md, .count = 0}; + cbm_session_foreach_impact(ss, append_key_bullet, &ctx); + (void)markdown_builder_append_raw(&md, "\n"); + } + + /* ── Areas explored ──────────────────────────────────────── */ + int area_count = cbm_session_areas_count(ss); + if (area_count > 0) { + (void)markdown_builder_append_raw(&md, "### Areas explored\n"); + md_list_ctx_t ctx = {.md = &md, .count = 0}; + cbm_session_foreach_area(ss, append_key_bullet, &ctx); + (void)markdown_builder_append_raw(&md, "\n"); + } + + /* ── Suggested next steps ────────────────────────────────── */ + if (store && sym_count > 0) { + { + /* Collect symbols for neighbor lookup */ + const char *lookup_names[20]; + name_collector_t nc = {.names = lookup_names, .count = 0, .cap = 20}; + cbm_session_foreach_symbol(ss, collect_symbol_name, &nc); + cbm_session_foreach_impact(ss, collect_symbol_name, &nc); + + /* Temporary dedup set for candidates */ + CBMHashTable *candidates = cbm_ht_create(64); + for (int i = 0; i < nc.count; i++) { + cbm_node_t *nodes = NULL; + int ncount = 0; + cbm_store_find_nodes_by_name(store, project, lookup_names[i], &nodes, &ncount); + for (int j = 0; j < ncount; j++) { + char **callers = NULL; + char **callees = NULL; + int caller_count = 0, callee_count = 0; + cbm_store_node_neighbor_names(store, nodes[j].id, 10, &callers, &caller_count, + &callees, &callee_count); + for (int k = 0; k < caller_count; k++) { + if (callers[k] && !cbm_session_has_symbol(ss, callers[k]) && + !cbm_ht_has(candidates, callers[k])) { + char *key = strdup(callers[k]); + if (key) cbm_ht_set(candidates, key, (void *)lookup_names[i]); + } + } + for (int k = 0; k < callee_count; k++) { + if (callees[k] && !cbm_session_has_symbol(ss, callees[k]) && + !cbm_ht_has(candidates, callees[k])) { + char *key = strdup(callees[k]); + if (key) cbm_ht_set(candidates, key, (void *)lookup_names[i]); + } + } + for (int k = 0; k < caller_count; k++) free(callers[k]); + free(callers); + for (int k = 0; k < callee_count; k++) free(callees[k]); + free(callees); + } + cbm_store_free_nodes(nodes, ncount); + } + + if (cbm_ht_count(candidates) > 0) { + cbm_key_symbol_t *key_syms = NULL; + int ks_count = 0; + cbm_store_get_key_symbols(store, project, NULL, 200, &key_syms, &ks_count); + + bool header_emitted = false; + int emitted = 0; + for (int i = 0; i < ks_count && emitted < 5; i++) { + if (key_syms[i].name && cbm_ht_has(candidates, key_syms[i].name)) { + if (!header_emitted) { + (void)markdown_builder_append_raw(&md, "### Suggested next steps\n"); + header_emitted = true; + } + const char *reason = + (const char *)cbm_ht_get(candidates, key_syms[i].name); + (void)markdown_builder_appendf( + &md, "- Examine %s%s%s (neighbor of %s, not yet examined)\n", + key_syms[i].name, + key_syms[i].file_path ? " in " : "", + key_syms[i].file_path ? key_syms[i].file_path : "", + reason ? reason : "queried symbol"); + emitted++; + } + } + cbm_store_key_symbols_free(key_syms, ks_count); + } + + cbm_ht_foreach(candidates, free_ht_key_cb, NULL); + cbm_ht_free(candidates); + } + } + + char *markdown = markdown_builder_finish(&md); + free(project); + + char *result = cbm_mcp_text_result(markdown ? markdown : "", false); + free(markdown); + return result; +} + +/* ── Session context (Phase 7A) ────────────────────────────────── */ + static char *handle_get_session_context(cbm_mcp_server_t *srv, const char *args) { char *project = cbm_mcp_get_string_arg(args, "project"); bool include_related = cbm_mcp_get_bool_arg_default(args, "include_related", true); @@ -6225,6 +6443,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_session_context") == 0) { return handle_get_session_context(srv, args_json); } + if (strcmp(tool_name, "get_session_summary") == 0) { + return handle_get_session_summary(srv, args_json); + } char msg[256]; snprintf(msg, sizeof(msg), "unknown tool: %s", tool_name); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 0bd48b92..1449b511 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -2957,6 +2957,91 @@ TEST(session_has_area_membership) { PASS(); } +/* ══════════════════════════════════════════════════════════════════ + * SESSION SUMMARY (Phase 7C) + * ══════════════════════════════════════════════════════════════════ */ + +TEST(session_summary_empty) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "get_session_summary", "{}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "Session Summary")); + ASSERT_NOT_NULL(strstr(text, "0 queries")); + free(text); + free(raw); + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_summary_after_tools) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* Call explore + understand to populate session */ + char *r1 = cbm_mcp_handle_tool(srv, "explore", + "{\"project\":\"impact\",\"area\":\"Order\"}"); + free(r1); + char *r2 = cbm_mcp_handle_tool(srv, "understand", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + free(r2); + + char *raw = cbm_mcp_handle_tool(srv, "get_session_summary", + "{\"project\":\"impact\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + + /* Should contain markdown structure */ + ASSERT_NOT_NULL(strstr(text, "Session Summary")); + /* Should mention areas explored */ + ASSERT_NOT_NULL(strstr(text, "Areas explored")); + ASSERT_NOT_NULL(strstr(text, "Order")); + /* Should mention symbols */ + ASSERT_NOT_NULL(strstr(text, "Symbols investigated")); + ASSERT_NOT_NULL(strstr(text, "ProcessOrder")); + + free(text); + free(raw); + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_summary_with_impact) { + cbm_mcp_server_t *srv = setup_impact_server(); + ASSERT_NOT_NULL(srv); + + /* Run impact analysis to populate session */ + char *r1 = cbm_mcp_handle_tool(srv, "get_impact_analysis", + "{\"project\":\"impact\",\"symbol\":\"ProcessOrder\"}"); + free(r1); + + char *raw = cbm_mcp_handle_tool(srv, "get_session_summary", + "{\"project\":\"impact\"}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "Session Summary")); + ASSERT_NOT_NULL(strstr(text, "Impact analyses")); + ASSERT_NOT_NULL(strstr(text, "ProcessOrder")); + + free(text); + free(raw); + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(session_summary_tools_list) { + char *tools_json = cbm_mcp_tools_list(); + ASSERT_NOT_NULL(tools_json); + ASSERT_NOT_NULL(strstr(tools_json, "get_session_summary")); + free(tools_json); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -3136,4 +3221,10 @@ SUITE(mcp) { RUN_TEST(session_hint_prepare_change_edited_file); RUN_TEST(session_hint_not_present_first_call); RUN_TEST(session_has_area_membership); + + /* Session summary (Phase 7C) */ + RUN_TEST(session_summary_empty); + RUN_TEST(session_summary_after_tools); + RUN_TEST(session_summary_with_impact); + RUN_TEST(session_summary_tools_list); }