From ac9ce2183dfc42a05027a602372bc64a2d940b38 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 16:58:45 +0530 Subject: [PATCH 1/6] Add architecture summary MCP tool --- src/main.c | 3 +- src/mcp/mcp.c | 361 +++++++++++- src/store/store.c | 1136 +++++++++++++++++++++++++++++++++++++- src/store/store.h | 63 +++ tests/test_integration.c | 15 + tests/test_mcp.c | 144 ++++- tests/test_store_arch.c | 214 +++++++ tests/test_store_nodes.c | 30 + 8 files changed, 1960 insertions(+), 6 deletions(-) diff --git a/src/main.c b/src/main.c index 70eadcdc..f442aa0c 100644 --- a/src/main.c +++ b/src/main.c @@ -149,7 +149,8 @@ static void print_help(void) { printf("\nSupported agents (auto-detected):\n"); printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); - printf(" get_code_snippet, get_graph_schema, get_architecture, search_code,\n"); + printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); + printf(" get_architecture_summary, search_code,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 3530acc3..47329a1c 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 14 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 15 graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -34,6 +34,7 @@ #endif #include #include // int64_t +#include #include #include #include @@ -79,6 +80,111 @@ static char *yy_doc_to_str(yyjson_mut_doc *doc) { return s; } +typedef struct { + char *buf; + size_t len; + size_t cap; + size_t limit; + bool truncated; +} markdown_builder_t; + +static void markdown_builder_init(markdown_builder_t *b, size_t limit) { + b->cap = 512; + b->buf = malloc(b->cap); + b->len = 0; + b->limit = limit; + b->truncated = false; + if (b->buf) { + b->buf[0] = '\0'; + } +} + +static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { + if (!b->buf) { + return false; + } + while (b->len + need + 1 > b->cap) { + b->cap *= 2; + b->buf = safe_realloc(b->buf, b->cap); + } + return true; +} + +static bool markdown_builder_append_raw(markdown_builder_t *b, const char *text) { + if (!b || !b->buf || !text || b->truncated) { + return false; + } + size_t add = strlen(text); + if (b->len + add > b->limit) { + b->truncated = true; + return false; + } + if (!markdown_builder_reserve(b, add)) { + return false; + } + memcpy(b->buf + b->len, text, add); + b->len += add; + b->buf[b->len] = '\0'; + return true; +} + +static bool markdown_builder_appendf(markdown_builder_t *b, const char *fmt, ...) { + if (!b || !b->buf || !fmt || b->truncated) { + return false; + } + + va_list ap; + va_start(ap, fmt); + va_list ap_copy; + va_copy(ap_copy, ap); + int needed = vsnprintf(NULL, 0, fmt, ap_copy); + va_end(ap_copy); + if (needed < 0) { + va_end(ap); + return false; + } + if (b->len + (size_t)needed > b->limit) { + b->truncated = true; + va_end(ap); + return false; + } + if (!markdown_builder_reserve(b, (size_t)needed)) { + va_end(ap); + return false; + } + vsnprintf(b->buf + b->len, b->cap - b->len, fmt, ap); + va_end(ap); + b->len += (size_t)needed; + return true; +} + +static char *markdown_builder_finish(markdown_builder_t *b) { + const char *note = "\n_Truncated at max_tokens._\n"; + if (!b || !b->buf) { + return NULL; + } + if (b->truncated) { + size_t note_len = strlen(note); + if (note_len <= b->limit) { + size_t keep_len = b->len; + size_t final_len = 0; + if (keep_len + note_len > b->limit) { + keep_len = b->limit - note_len; + } + final_len = keep_len + note_len; + if (final_len > b->len && + !markdown_builder_reserve(b, final_len - b->len)) { + return b->buf; + } + b->len = keep_len; + memcpy(b->buf + b->len, note, note_len); + b->len = final_len; + b->buf[b->len] = '\0'; + } + } + return b->buf; +} + /* ══════════════════════════════════════════════════════════════════ * JSON-RPC PARSING * ══════════════════════════════════════════════════════════════════ */ @@ -281,6 +387,17 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"aspects\":{\"type\":" "\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"project\"]}"}, + {"get_architecture_summary", + "Generate a structured markdown architecture summary from the existing SQLite graph, with " + "optional focus filtering and output size control.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\",\"description\":" + "\"Indexed project name (from list_projects).\"},\"project_path\":{\"type\":\"string\"," + "\"description\":\"Deprecated alias: path to the indexed project.\"},\"max_tokens\":{" + "\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output size. Controls " + "detail level.\"},\"focus\":{\"type\":\"string\",\"description\":\"Optional domain keyword " + "to zoom into (for example payment or inventory).\"}},\"anyOf\":[{\"required\":[" + "\"project\"]},{\"required\":[\"project_path\"]}]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -1218,6 +1335,245 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { return result; } +static bool same_project_path(const char *lhs, const char *rhs) { + if (!lhs || !rhs) { + return false; + } + + char lhs_real[4096]; + char rhs_real[4096]; +#ifdef _WIN32 + if (_fullpath(lhs_real, lhs, sizeof(lhs_real)) && _fullpath(rhs_real, rhs, sizeof(rhs_real))) { + return strcmp(lhs_real, rhs_real) == 0; + } +#else + if (realpath(lhs, lhs_real) && realpath(rhs, rhs_real)) { + return strcmp(lhs_real, rhs_real) == 0; + } +#endif + return strcmp(lhs, rhs) == 0; +} + +static char *handle_get_architecture_summary(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *project_path = cbm_mcp_get_string_arg(args, "project_path"); + char *focus = cbm_mcp_get_string_arg(args, "focus"); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", 2000); + char *display_path = NULL; + + if (!project && !project_path) { + free(focus); + return cbm_mcp_text_result("project is required", true); + } + if (max_tokens <= 0) { + max_tokens = 2000; + } + + if (!project) { + project = cbm_project_name_from_path(project_path); + if (!project) { + free(project_path); + free(focus); + return cbm_mcp_text_result("unable to derive project name from project_path", true); + } + } + + cbm_store_t *store = resolve_store(srv, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(project_path); + free(focus); + return _res; + } + + cbm_project_t proj_info = {0}; + if (cbm_store_get_project(store, project, &proj_info) != CBM_STORE_OK) { + cbm_project_free_fields(&proj_info); + free(project); + free(project_path); + free(focus); + return cbm_mcp_text_result("project is not indexed", true); + } + if (project_path && proj_info.root_path && proj_info.root_path[0] && + !same_project_path(project_path, proj_info.root_path)) { + cbm_project_free_fields(&proj_info); + free(project); + free(project_path); + free(focus); + return cbm_mcp_text_result("project_path does not match project", true); + } + if (proj_info.root_path && proj_info.root_path[0]) { + display_path = heap_strdup(proj_info.root_path); + } else if (project_path && project_path[0]) { + display_path = heap_strdup(project_path); + } + cbm_project_free_fields(&proj_info); + + cbm_architecture_summary_t summary = {0}; + if (cbm_store_get_architecture_summary(store, project, focus, &summary) != CBM_STORE_OK) { + free(project); + free(project_path); + free(display_path); + free(focus); + return cbm_mcp_text_result("failed to build architecture summary", true); + } + + size_t char_budget = (size_t)max_tokens * 4U; + if (char_budget < 512) { + char_budget = 512; + } + markdown_builder_t md; + markdown_builder_init(&md, char_budget); + + const char *display_name = display_path ? cbm_path_base(display_path) : project; + (void)markdown_builder_appendf(&md, "## Project: %s\n", display_name ? display_name : project); + if (focus && focus[0]) { + (void)markdown_builder_appendf(&md, "Focus: %s\n", focus); + } + (void)markdown_builder_appendf(&md, "Files: %d | Functions: %d | Classes: %d | Routes: %d\n\n", + summary.total_files, summary.total_functions, + summary.total_classes, summary.total_routes); + + (void)markdown_builder_append_raw(&md, "## Key Files (by connectivity)\n"); + if (summary.file_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching files.\n\n"); + } else { + for (int i = 0; i < summary.file_count; i++) { + if (!markdown_builder_appendf(&md, "%d. %s - %d inbound calls, %d outbound\n", i + 1, + summary.files[i].file ? summary.files[i].file : "", + summary.files[i].inbound_calls, + summary.files[i].outbound_calls)) { + break; + } + if (summary.files[i].symbol_count > 0) { + (void)markdown_builder_append_raw(&md, " Key methods: "); + for (int j = 0; j < summary.files[i].symbol_count; j++) { + if (j > 0 && !markdown_builder_append_raw(&md, ", ")) { + break; + } + if (summary.files[i].symbols[j].span_lines > 0) { + (void)markdown_builder_appendf( + &md, "%s (%d lines)", + summary.files[i].symbols[j].name ? summary.files[i].symbols[j].name : "", + summary.files[i].symbols[j].span_lines); + } else { + (void)markdown_builder_appendf( + &md, "%s", + summary.files[i].symbols[j].name ? summary.files[i].symbols[j].name : ""); + } + } + (void)markdown_builder_append_raw(&md, "\n"); + } + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Route Map\n"); + if (summary.route_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching routes.\n\n"); + } else { + for (int i = 0; i < summary.route_count; i++) { + (void)markdown_builder_appendf( + &md, "%s %s", summary.routes[i].method ? summary.routes[i].method : "", + summary.routes[i].path ? summary.routes[i].path : ""); + if (summary.routes[i].handler && summary.routes[i].handler[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].handler); + } + if (summary.routes[i].service && summary.routes[i].service[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].service); + } + if (summary.routes[i].next && summary.routes[i].next[0]) { + (void)markdown_builder_appendf(&md, " -> %s", summary.routes[i].next); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Module Clusters (Louvain communities)\n"); + if (summary.cluster_count == 0) { + (void)markdown_builder_append_raw(&md, "No multi-file clusters found.\n\n"); + } else { + for (int i = 0; i < summary.cluster_count; i++) { + (void)markdown_builder_appendf(&md, "Cluster %d (%d files)\n", summary.clusters[i].id, + summary.clusters[i].file_count); + if (summary.clusters[i].core_file_count > 0) { + (void)markdown_builder_append_raw(&md, "Core: "); + for (int j = 0; j < summary.clusters[i].core_file_count; j++) { + if (j > 0) { + (void)markdown_builder_append_raw(&md, ", "); + } + (void)markdown_builder_appendf( + &md, "%s", + summary.clusters[i].core_files[j] + ? cbm_path_base(summary.clusters[i].core_files[j]) + : ""); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + if (summary.clusters[i].entry_point_count > 0) { + (void)markdown_builder_append_raw(&md, "Entry: "); + for (int j = 0; j < summary.clusters[i].entry_point_count; j++) { + if (j > 0) { + (void)markdown_builder_append_raw(&md, ", "); + } + (void)markdown_builder_appendf( + &md, "%s", + summary.clusters[i].entry_points[j] + ? summary.clusters[i].entry_points[j] + : ""); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + } + + (void)markdown_builder_append_raw(&md, "## High-Connectivity Functions (in_degree >= 5)\n"); + if (summary.function_count == 0) { + (void)markdown_builder_append_raw(&md, "None above threshold.\n\n"); + } else { + for (int i = 0; i < summary.function_count; i++) { + (void)markdown_builder_appendf( + &md, "%s - called by %d functions", + summary.functions[i].name ? summary.functions[i].name : "", + summary.functions[i].in_degree); + if (summary.functions[i].file && summary.functions[i].file[0]) { + (void)markdown_builder_appendf(&md, " [%s]", summary.functions[i].file); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + (void)markdown_builder_append_raw(&md, "\n"); + } + + (void)markdown_builder_append_raw(&md, "## Entry Points\n"); + if (summary.entry_point_count == 0) { + (void)markdown_builder_append_raw(&md, "No matching entry points.\n"); + } else { + for (int i = 0; i < summary.entry_point_count; i++) { + (void)markdown_builder_appendf(&md, "%s: %d\n", + summary.entry_points[i].kind + ? summary.entry_points[i].kind + : "Other", + summary.entry_points[i].count); + } + } + + char *markdown = markdown_builder_finish(&md); + char *result = cbm_mcp_text_result(markdown ? markdown : "", false); + + free(markdown); + cbm_store_architecture_summary_free(&summary); + free(project); + free(project_path); + free(display_path); + free(focus); + return result; +} + static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *func_name = cbm_mcp_get_string_arg(args, "function_name"); char *project = cbm_mcp_get_string_arg(args, "project"); @@ -2704,6 +3060,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture") == 0) { return handle_get_architecture(srv, args_json); } + if (strcmp(tool_name, "get_architecture_summary") == 0) { + return handle_get_architecture_summary(srv, args_json); + } /* Pipeline-dependent tools */ if (strcmp(tool_name, "index_repository") == 0) { diff --git a/src/store/store.c b/src/store/store.c index 88aa7078..ca196255 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -9,11 +9,14 @@ // for ISO timestamp #include "store/store.h" +#include "foundation/hash_table.h" #include "foundation/platform.h" #include "foundation/compat.h" #include "foundation/compat_regex.h" #include +#include +#include #include #include #include @@ -246,6 +249,27 @@ static int configure_pragmas(cbm_store_t *s, bool in_memory) { return rc; } +static int configure_query_pragmas(cbm_store_t *s) { + int rc; + rc = exec_sql(s, "PRAGMA foreign_keys = ON;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA temp_store = MEMORY;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA busy_timeout = 10000;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA query_only = ON;"); + if (rc != CBM_STORE_OK) { + return rc; + } + return exec_sql(s, "PRAGMA mmap_size = 67108864;"); /* 64 MB */ +} + /* ── REGEXP function for SQLite ──────────────────────────────────── */ static void sqlite_regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) { @@ -375,8 +399,8 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { return NULL; } - /* Open read-write but do NOT create — returns SQLITE_CANTOPEN if absent. */ - int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READWRITE, NULL); + /* Open read-only and do NOT create — query tools should never need write access. */ + int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READONLY, NULL); if (rc != SQLITE_OK) { /* sqlite3_open_v2 allocates a handle even on failure — must close it. */ sqlite3_close(s->db); @@ -395,7 +419,7 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { sqlite3_create_function(s->db, "iregexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, sqlite_iregexp, NULL, NULL); - if (configure_pragmas(s, false) != CBM_STORE_OK) { + if (configure_query_pragmas(s) != CBM_STORE_OK) { sqlite3_close(s->db); free((void *)s->db_path); free(s); @@ -4085,6 +4109,1112 @@ void cbm_store_architecture_free(cbm_architecture_info_t *out) { memset(out, 0, sizeof(*out)); } +typedef struct { + int64_t node_id; + char *path; + int inbound_calls; + int outbound_calls; +} arch_summary_file_row_t; + +typedef struct { + int community; + int *member_indices; + int member_count; + int member_cap; +} arch_summary_cluster_row_t; + +static char *summary_focus_term(const char *focus) { + if (!focus || !focus[0]) { + return NULL; + } + size_t len = strlen(focus); + char *term = malloc(len + 1); + if (!term) { + return NULL; + } + for (size_t i = 0; i < len; i++) { + term[i] = (char)tolower((unsigned char)focus[i]); + } + term[len] = '\0'; + return term; +} + +static char *summary_focus_like(const char *focus) { + char *term = summary_focus_term(focus); + if (!term) { + return NULL; + } + size_t len = strlen(term); + char *like = malloc(len + 3); + if (!like) { + free(term); + return NULL; + } + like[0] = '%'; + memcpy(like + 1, term, len); + like[len + 1] = '%'; + like[len + 2] = '\0'; + free(term); + return like; +} + +static bool summary_text_matches(const char *focus_term, const char *text) { + if (!focus_term || !focus_term[0]) { + return true; + } + if (!text || !text[0]) { + return false; + } + return cbm_strcasestr(text, focus_term) != NULL; +} + +static int summary_count_nodes(cbm_store_t *s, const char *project, const char *label_sql, + const char *focus_like) { + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT COUNT(*) FROM nodes " + "WHERE project=?1 AND %s " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(name, '')) LIKE ?2 " + "OR lower(COALESCE(qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(file_path, '')) LIKE ?2 " + "OR lower(COALESCE(properties, '')) LIKE ?2);", + label_sql); + } else { + snprintf(sql, sizeof(sql), + "SELECT COUNT(*) FROM nodes " + "WHERE project=?1 AND %s " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%';", + label_sql); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_count_nodes"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + int count = 0; + if (sqlite3_step(stmt) == SQLITE_ROW) { + count = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return count; +} + +static int summary_file_row_cmp(const void *lhs, const void *rhs) { + const arch_summary_file_row_t *a = lhs; + const arch_summary_file_row_t *b = rhs; + int64_t a_score = (int64_t)a->inbound_calls + (int64_t)a->outbound_calls; + int64_t b_score = (int64_t)b->inbound_calls + (int64_t)b->outbound_calls; + if (a_score != b_score) { + return b_score > a_score ? 1 : -1; + } + if (a->inbound_calls != b->inbound_calls) { + return b->inbound_calls > a->inbound_calls ? 1 : -1; + } + if (!a->path) { + return 1; + } + if (!b->path) { + return -1; + } + return strcmp(a->path, b->path); +} + +static void summary_copy_json_scalar(yyjson_val *val, char *out, size_t out_sz) { + if (out_sz == 0) { + return; + } + out[0] = '\0'; + if (!val) { + return; + } + + if (yyjson_is_str(val)) { + snprintf(out, out_sz, "%s", yyjson_get_str(val)); + return; + } + if (yyjson_is_bool(val)) { + snprintf(out, out_sz, "%s", yyjson_get_bool(val) ? "true" : "false"); + return; + } + if (yyjson_is_int(val) || yyjson_is_sint(val)) { + snprintf(out, out_sz, "%lld", (long long)yyjson_get_sint(val)); + return; + } + if (yyjson_is_uint(val)) { + snprintf(out, out_sz, "%llu", (unsigned long long)yyjson_get_uint(val)); + return; + } + if (yyjson_is_real(val)) { + snprintf(out, out_sz, "%.17g", yyjson_get_real(val)); + } +} + +static void summary_extract_route_fields(const char *props, char *method, size_t method_sz, + char *path, size_t path_sz, char *handler, + size_t handler_sz) { + method[0] = '\0'; + path[0] = '\0'; + handler[0] = '\0'; + if (!props) { + return; + } + + yyjson_doc *doc = yyjson_read(props, strlen(props), 0); + if (!doc) { + return; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + if (yyjson_is_obj(root)) { + summary_copy_json_scalar(yyjson_obj_get(root, "method"), method, method_sz); + summary_copy_json_scalar(yyjson_obj_get(root, "path"), path, path_sz); + summary_copy_json_scalar(yyjson_obj_get(root, "handler"), handler, handler_sz); + } + yyjson_doc_free(doc); +} + +static int summary_collect_file_rows(cbm_store_t *s, const char *project, const char *focus_like, + arch_summary_file_row_t **out_rows, int *out_count, + cbm_louvain_edge_t **out_edges, int *out_edge_count) { + *out_rows = NULL; + *out_count = 0; + *out_edges = NULL; + *out_edge_count = 0; + + const char *files_sql_no_focus = + "SELECT id, COALESCE(NULLIF(file_path, ''), name) " + "FROM nodes " + "WHERE project=?1 AND label='File' " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%test%' " + "ORDER BY 2;"; + const char *files_sql_focus = + "SELECT n.id, COALESCE(NULLIF(n.file_path, ''), n.name) " + "FROM nodes n " + "WHERE n.project=?1 AND n.label='File' " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%test%' " + "AND (lower(COALESCE(n.file_path, '')) LIKE ?2 " + "OR lower(COALESCE(n.name, '')) LIKE ?2 " + "OR lower(COALESCE(n.qualified_name, '')) LIKE ?2 " + "OR EXISTS (SELECT 1 FROM nodes m " + " WHERE m.project = n.project " + " AND m.file_path = n.file_path " + " AND lower(COALESCE(m.file_path, '')) NOT LIKE '%test%' " + " AND (lower(COALESCE(m.name, '')) LIKE ?2 " + " OR lower(COALESCE(m.qualified_name, '')) LIKE ?2 " + " OR lower(COALESCE(m.file_path, '')) LIKE ?2 " + " OR lower(COALESCE(m.properties, '')) LIKE ?2))) " + "ORDER BY 2;"; + + sqlite3_stmt *stmt = NULL; + const char *files_sql = (focus_like && focus_like[0]) ? files_sql_focus : files_sql_no_focus; + if (sqlite3_prepare_v2(s->db, files_sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_files"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + int cap = 16; + int count = 0; + arch_summary_file_row_t *rows = calloc(cap, sizeof(arch_summary_file_row_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + int old_cap = cap; + cap *= 2; + rows = safe_realloc(rows, (size_t)cap * sizeof(arch_summary_file_row_t)); + memset(rows + old_cap, 0, (size_t)(cap - old_cap) * sizeof(arch_summary_file_row_t)); + } + rows[count].node_id = sqlite3_column_int64(stmt, 0); + rows[count].path = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + count++; + } + sqlite3_finalize(stmt); + + CBMHashTable *file_rows = cbm_ht_create(count > 0 ? (uint32_t)count * 2U : 32U); + if (!file_rows) { + for (int i = 0; i < count; i++) { + free(rows[i].path); + } + free(rows); + store_set_error(s, "summary_collect_file_rows: file lookup alloc failed"); + return CBM_STORE_ERR; + } + for (int i = 0; i < count; i++) { + if (rows[i].path && rows[i].path[0]) { + cbm_ht_set(file_rows, rows[i].path, (void *)((intptr_t)i + 1)); + } + } + + const char *edges_sql = + "SELECT src.file_path, dst.file_path " + "FROM edges e " + "JOIN nodes src ON src.id = e.source_id " + "JOIN nodes dst ON dst.id = e.target_id " + "WHERE e.project=?1 AND e.type='CALLS' " + "AND src.file_path <> '' AND dst.file_path <> '' " + "AND src.file_path <> dst.file_path " + "AND lower(src.file_path) NOT LIKE '%test%' " + "AND lower(dst.file_path) NOT LIKE '%test%';"; + + stmt = NULL; + if (sqlite3_prepare_v2(s->db, edges_sql, -1, &stmt, NULL) != SQLITE_OK) { + cbm_ht_free(file_rows); + for (int i = 0; i < count; i++) { + free(rows[i].path); + } + free(rows); + store_set_error_sqlite(s, "summary_collect_edges"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int edge_cap = 64; + int edge_count = 0; + cbm_louvain_edge_t *edges = malloc((size_t)edge_cap * sizeof(cbm_louvain_edge_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *src_file = (const char *)sqlite3_column_text(stmt, 0); + const char *dst_file = (const char *)sqlite3_column_text(stmt, 1); + intptr_t src_val = (intptr_t)(src_file ? cbm_ht_get(file_rows, src_file) : NULL); + intptr_t dst_val = (intptr_t)(dst_file ? cbm_ht_get(file_rows, dst_file) : NULL); + int src_idx = src_val ? (int)(src_val - 1) : -1; + int dst_idx = dst_val ? (int)(dst_val - 1) : -1; + if (src_idx < 0 || dst_idx < 0 || src_idx == dst_idx) { + continue; + } + + rows[src_idx].outbound_calls++; + rows[dst_idx].inbound_calls++; + + if (edge_count >= edge_cap) { + edge_cap *= 2; + edges = safe_realloc(edges, (size_t)edge_cap * sizeof(cbm_louvain_edge_t)); + } + edges[edge_count].src = rows[src_idx].node_id; + edges[edge_count].dst = rows[dst_idx].node_id; + edge_count++; + } + sqlite3_finalize(stmt); + cbm_ht_free(file_rows); + + *out_rows = rows; + *out_count = count; + *out_edges = edges; + *out_edge_count = edge_count; + return CBM_STORE_OK; +} + +static int summary_fill_key_symbols(cbm_store_t *s, const char *project, cbm_arch_summary_file_t *file) { + const char *sql = + "SELECT name, " + "CASE WHEN end_line >= start_line AND start_line > 0 " + "THEN end_line - start_line + 1 ELSE 0 END AS span " + "FROM nodes " + "WHERE project=?1 AND file_path=?2 " + "AND label IN ('Function','Method') " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "ORDER BY span DESC, name " + "LIMIT 3;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_fill_key_symbols"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + bind_text(stmt, 2, file->file); + + int cap = 4; + int count = 0; + cbm_arch_summary_symbol_t *symbols = calloc(cap, sizeof(cbm_arch_summary_symbol_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + cap *= 2; + symbols = safe_realloc(symbols, (size_t)cap * sizeof(cbm_arch_summary_symbol_t)); + } + symbols[count].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + symbols[count].span_lines = sqlite3_column_int(stmt, 1); + count++; + } + sqlite3_finalize(stmt); + + file->symbols = symbols; + file->symbol_count = count; + return CBM_STORE_OK; +} + +static int summary_find_handler_node(cbm_store_t *s, const char *project, const char *handler, + int64_t *out_id, char **out_file_path) { + *out_id = 0; + *out_file_path = NULL; + if (!handler || !handler[0]) { + return CBM_STORE_NOT_FOUND; + } + + char owner[256] = ""; + char member[256] = ""; + const char *sep = strchr(handler, '@'); + int sep_len = 1; + const char *scope = strstr(handler, "::"); + if (scope && (!sep || scope < sep)) { + sep = scope; + sep_len = 2; + } + + if (sep) { + size_t owner_len = (size_t)(sep - handler); + if (owner_len >= sizeof(owner)) { + owner_len = sizeof(owner) - 1; + } + memcpy(owner, handler, owner_len); + owner[owner_len] = '\0'; + snprintf(member, sizeof(member), "%s", sep + sep_len); + } else { + snprintf(member, sizeof(member), "%s", handler); + } + + cbm_node_t *nodes = NULL; + int count = 0; + if (cbm_store_find_nodes_by_name(s, project, member, &nodes, &count) != CBM_STORE_OK || + count == 0) { + cbm_store_free_nodes(nodes, count); + return CBM_STORE_NOT_FOUND; + } + + int pick = 0; + for (int i = 0; i < count; i++) { + if (owner[0] && + ((nodes[i].qualified_name && cbm_strcasestr(nodes[i].qualified_name, owner)) || + (nodes[i].file_path && cbm_strcasestr(nodes[i].file_path, owner)))) { + pick = i; + break; + } + if (!owner[0] && nodes[i].file_path && !cbm_is_test_file_path(nodes[i].file_path)) { + pick = i; + break; + } + } + + *out_id = nodes[pick].id; + if (nodes[pick].file_path && nodes[pick].file_path[0]) { + *out_file_path = heap_strdup(nodes[pick].file_path); + } + cbm_store_free_nodes(nodes, count); + return CBM_STORE_OK; +} + +static int summary_query_primary_callee(cbm_store_t *s, int64_t source_id, const char *source_file, + char **out_name, int64_t *out_id, char **out_file_path) { + *out_name = NULL; + if (out_id) { + *out_id = 0; + } + if (out_file_path) { + *out_file_path = NULL; + } + + const char *sql = + "SELECT n.id, n.name, COALESCE(n.file_path, '') " + "FROM edges e " + "JOIN nodes n ON n.id = e.target_id " + "WHERE e.source_id=?1 AND e.type='CALLS' " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "ORDER BY CASE " + " WHEN ?2 <> '' AND COALESCE(n.file_path, '') <> '' " + " AND COALESCE(n.file_path, '') <> ?2 THEN 0 " + " ELSE 1 " + " END, " + " CASE WHEN n.label IN ('Method','Function') THEN 0 ELSE 1 END, " + " n.name " + "LIMIT 1;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_query_primary_callee"); + return CBM_STORE_ERR; + } + sqlite3_bind_int64(stmt, 1, source_id); + bind_text(stmt, 2, source_file ? source_file : ""); + + if (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 1); + const char *file_path = (const char *)sqlite3_column_text(stmt, 2); + if (out_id) { + *out_id = sqlite3_column_int64(stmt, 0); + } + *out_name = heap_strdup(name); + if (out_file_path && file_path && file_path[0]) { + *out_file_path = heap_strdup(file_path); + } + } + sqlite3_finalize(stmt); + return CBM_STORE_OK; +} + +static int summary_collect_routes(cbm_store_t *s, const char *project, const char *focus_term, + cbm_arch_summary_route_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + + const char *sql = + "SELECT name, properties, COALESCE(file_path, '') " + "FROM nodes " + "WHERE project=?1 AND label='Route' " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "ORDER BY name;"; + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_routes"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 8; + int count = 0; + cbm_arch_summary_route_t *routes = calloc(cap, sizeof(cbm_arch_summary_route_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 0); + const char *props = (const char *)sqlite3_column_text(stmt, 1); + const char *route_file = (const char *)sqlite3_column_text(stmt, 2); + if (cbm_is_test_file_path(route_file)) { + continue; + } + + char method[64]; + char path[512]; + char handler[256]; + summary_extract_route_fields(props, method, sizeof(method), path, sizeof(path), handler, + sizeof(handler)); + + if (!path[0] && name) { + snprintf(path, sizeof(path), "%s", name); + } + + int64_t handler_id = 0; + char *handler_file = NULL; + if (handler[0]) { + (void)summary_find_handler_node(s, project, handler, &handler_id, &handler_file); + } + + char *service = NULL; + char *next = NULL; + char *service_file = NULL; + int64_t service_id = 0; + if (handler_id > 0) { + (void)summary_query_primary_callee(s, handler_id, handler_file, &service, &service_id, + &service_file); + if (service_id > 0) { + (void)summary_query_primary_callee(s, service_id, + service_file ? service_file : handler_file, &next, + NULL, NULL); + } + } + + if (focus_term && focus_term[0] && + !summary_text_matches(focus_term, method) && + !summary_text_matches(focus_term, path) && + !summary_text_matches(focus_term, handler) && + !summary_text_matches(focus_term, service) && + !summary_text_matches(focus_term, next) && + !summary_text_matches(focus_term, handler_file)) { + free(service_file); + free(handler_file); + free(service); + free(next); + continue; + } + + if (count >= cap) { + cap *= 2; + routes = safe_realloc(routes, (size_t)cap * sizeof(cbm_arch_summary_route_t)); + } + + routes[count].method = heap_strdup(method[0] ? method : ""); + routes[count].path = heap_strdup(path[0] ? path : ""); + routes[count].handler = heap_strdup(handler[0] ? handler : ""); + routes[count].service = service; + routes[count].next = next; + routes[count].handler_file = handler_file; + free(service_file); + count++; + } + sqlite3_finalize(stmt); + + *out_arr = routes; + *out_count = count; + return CBM_STORE_OK; +} + +static void summary_cluster_add_member(arch_summary_cluster_row_t *cluster, int member_index) { + if (cluster->member_count >= cluster->member_cap) { + cluster->member_cap = cluster->member_cap ? cluster->member_cap * 2 : 4; + cluster->member_indices = + safe_realloc(cluster->member_indices, (size_t)cluster->member_cap * sizeof(int)); + } + cluster->member_indices[cluster->member_count++] = member_index; +} + +static const char *summary_handler_owner_token(const char *handler, char *buf, size_t buf_size) { + if (!handler || !handler[0] || !buf || buf_size == 0) { + return NULL; + } + + const char *sep = strchr(handler, '@'); + size_t len = sep ? (size_t)(sep - handler) : strlen(handler); + const char *scope = strstr(handler, "::"); + if (scope && (!sep || scope < sep)) { + len = (size_t)(scope - handler); + } + if (len == 0) { + return NULL; + } + if (len >= buf_size) { + len = buf_size - 1; + } + memcpy(buf, handler, len); + buf[len] = '\0'; + return buf; +} + +static int summary_cluster_cmp(const void *lhs, const void *rhs) { + const arch_summary_cluster_row_t *a = lhs; + const arch_summary_cluster_row_t *b = rhs; + return b->member_count - a->member_count; +} + +static bool summary_cluster_has_file(const arch_summary_cluster_row_t *cluster, + const arch_summary_file_row_t *rows, const char *path) { + if (!cluster || !rows || !path) { + return false; + } + for (int i = 0; i < cluster->member_count; i++) { + const char *candidate = rows[cluster->member_indices[i]].path; + if (candidate && strcmp(candidate, path) == 0) { + return true; + } + } + return false; +} + +static bool summary_cluster_matches_route(const arch_summary_cluster_row_t *cluster, + const arch_summary_file_row_t *rows, + const cbm_arch_summary_route_t *route) { + char owner_buf[256]; + const char *owner = NULL; + + if (!cluster || !rows || !route) { + return false; + } + if (summary_cluster_has_file(cluster, rows, route->handler_file)) { + return true; + } + + owner = summary_handler_owner_token(route->handler, owner_buf, sizeof(owner_buf)); + if (!owner || !owner[0]) { + return false; + } + + for (int i = 0; i < cluster->member_count; i++) { + const char *candidate = rows[cluster->member_indices[i]].path; + if (candidate && cbm_strcasestr(candidate, owner)) { + return true; + } + } + return false; +} + +static int summary_build_clusters(const arch_summary_file_row_t *rows, int row_count, + const cbm_louvain_edge_t *edges, int edge_count, + const cbm_arch_summary_route_t *routes, int route_count, + cbm_arch_summary_cluster_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + if (row_count == 0 || edge_count == 0) { + return CBM_STORE_OK; + } + + int64_t *node_ids = malloc((size_t)row_count * sizeof(int64_t)); + for (int i = 0; i < row_count; i++) { + node_ids[i] = rows[i].node_id; + } + + cbm_louvain_result_t *results = NULL; + int result_count = 0; + int rc = cbm_louvain(node_ids, row_count, edges, edge_count, &results, &result_count); + free(node_ids); + if (rc != CBM_STORE_OK) { + return rc; + } + + CBMHashTable *row_by_id = cbm_ht_create(row_count > 0 ? (uint32_t)row_count * 2U : 32U); + char(*row_id_keys)[32] = NULL; + if (!row_by_id) { + free(results); + return CBM_STORE_ERR; + } + row_id_keys = calloc((size_t)row_count, sizeof(*row_id_keys)); + if (!row_id_keys) { + cbm_ht_free(row_by_id); + free(results); + return CBM_STORE_ERR; + } + for (int i = 0; i < row_count; i++) { + snprintf(row_id_keys[i], sizeof(row_id_keys[i]), "%lld", (long long)rows[i].node_id); + cbm_ht_set(row_by_id, row_id_keys[i], (void *)((intptr_t)i + 1)); + } + + int cap = 8; + int count = 0; + arch_summary_cluster_row_t *clusters = calloc(cap, sizeof(arch_summary_cluster_row_t)); + for (int i = 0; i < result_count; i++) { + char result_key[32]; + snprintf(result_key, sizeof(result_key), "%lld", (long long)results[i].node_id); + intptr_t row_val = (intptr_t)cbm_ht_get(row_by_id, result_key); + int member_index = row_val ? (int)(row_val - 1) : -1; + if (member_index < 0) { + continue; + } + + int slot = -1; + for (int j = 0; j < count; j++) { + if (clusters[j].community == results[i].community) { + slot = j; + break; + } + } + if (slot < 0) { + if (count >= cap) { + int old_cap = cap; + cap *= 2; + clusters = safe_realloc(clusters, (size_t)cap * sizeof(arch_summary_cluster_row_t)); + memset(clusters + old_cap, 0, + (size_t)(cap - old_cap) * sizeof(arch_summary_cluster_row_t)); + } + slot = count++; + clusters[slot].community = results[i].community; + } + summary_cluster_add_member(&clusters[slot], member_index); + } + free(row_id_keys); + cbm_ht_free(row_by_id); + free(results); + + int write_idx = 0; + for (int i = 0; i < count; i++) { + if (clusters[i].member_count < 2) { + free(clusters[i].member_indices); + continue; + } + if (write_idx != i) { + clusters[write_idx] = clusters[i]; + } + write_idx++; + } + count = write_idx; + if (count == 0) { + free(clusters); + return CBM_STORE_OK; + } + + qsort(clusters, (size_t)count, sizeof(arch_summary_cluster_row_t), summary_cluster_cmp); + if (count > 6) { + for (int i = 6; i < count; i++) { + free(clusters[i].member_indices); + } + count = 6; + } + + cbm_arch_summary_cluster_t *out = calloc((size_t)count, sizeof(cbm_arch_summary_cluster_t)); + for (int i = 0; i < count; i++) { + out[i].id = i + 1; + out[i].file_count = clusters[i].member_count; + + int core_count = clusters[i].member_count < 3 ? clusters[i].member_count : 3; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + out[i].core_files = calloc((size_t)core_count, sizeof(char *)); + out[i].core_file_count = core_count; + for (int core = 0; core < core_count; core++) { + int best_idx = -1; + int best_score = -1; + for (int m = 0; m < clusters[i].member_count; m++) { + int row_idx = clusters[i].member_indices[m]; + int score = rows[row_idx].inbound_calls + rows[row_idx].outbound_calls; + bool already_used = false; + for (int prev = 0; prev < core; prev++) { + const char *used = out[i].core_files[prev]; + if (used && rows[row_idx].path && strcmp(used, rows[row_idx].path) == 0) { + already_used = true; + break; + } + } + if (!already_used && score > best_score) { + best_score = score; + best_idx = row_idx; + } + } + if (best_idx >= 0) { + ((char **)out[i].core_files)[core] = heap_strdup(rows[best_idx].path); + } + } + + int entry_cap = 4; + int entry_count = 0; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **entries = calloc((size_t)entry_cap, sizeof(char *)); + for (int r = 0; r < route_count; r++) { + if (!summary_cluster_matches_route(&clusters[i], rows, &routes[r])) { + continue; + } + char label[768]; + if (routes[r].method && routes[r].method[0]) { + snprintf(label, sizeof(label), "%s %s", routes[r].method, + routes[r].path ? routes[r].path : ""); + } else { + snprintf(label, sizeof(label), "%s", routes[r].path ? routes[r].path : ""); + } + bool duplicate = false; + for (int e = 0; e < entry_count; e++) { + if (strcmp(entries[e], label) == 0) { + duplicate = true; + break; + } + } + if (duplicate) { + continue; + } + if (entry_count >= entry_cap) { + entry_cap *= 2; + entries = safe_realloc(entries, (size_t)entry_cap * sizeof(char *)); + } + entries[entry_count++] = heap_strdup(label); + } + out[i].entry_points = (const char **)entries; + out[i].entry_point_count = entry_count; + free(clusters[i].member_indices); + } + free(clusters); + + *out_arr = out; + *out_count = count; + return CBM_STORE_OK; +} + +static int summary_collect_hot_functions(cbm_store_t *s, const char *project, const char *focus_like, + cbm_arch_summary_function_t **out_arr, int *out_count) { + *out_arr = NULL; + *out_count = 0; + + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT n.name, COALESCE(n.file_path, ''), COUNT(*) AS fan_in " + "FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type='CALLS' " + "WHERE n.project=?1 " + "AND n.label IN ('Function','Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(n.name, '')) LIKE ?2 " + "OR lower(COALESCE(n.qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(n.file_path, '')) LIKE ?2 " + "OR lower(COALESCE(n.properties, '')) LIKE ?2) " + "GROUP BY n.id " + "HAVING fan_in >= 5 " + "ORDER BY fan_in DESC, n.name " + "LIMIT 15;"); + } else { + snprintf(sql, sizeof(sql), + "SELECT n.name, COALESCE(n.file_path, ''), COUNT(*) AS fan_in " + "FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type='CALLS' " + "WHERE n.project=?1 " + "AND n.label IN ('Function','Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND lower(COALESCE(n.file_path, '')) NOT LIKE '%%test%%' " + "GROUP BY n.id " + "HAVING fan_in >= 5 " + "ORDER BY fan_in DESC, n.name " + "LIMIT 15;"); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "summary_collect_hot_functions"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + int cap = 8; + int count = 0; + cbm_arch_summary_function_t *arr = calloc(cap, sizeof(cbm_arch_summary_function_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (count >= cap) { + cap *= 2; + arr = safe_realloc(arr, (size_t)cap * sizeof(cbm_arch_summary_function_t)); + } + arr[count].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[count].file = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + arr[count].in_degree = sqlite3_column_int(stmt, 2); + count++; + } + sqlite3_finalize(stmt); + + *out_arr = arr; + *out_count = count; + return CBM_STORE_OK; +} + +static const char *summary_entry_group_name(const char *label) { + if (!label || !label[0]) { + return "Other"; + } + if (strcmp(label, "Route") == 0) { + return "Routes"; + } + if (cbm_strcasestr(label, "command")) { + return "Commands"; + } + if (cbm_strcasestr(label, "job")) { + return "Jobs"; + } + if (cbm_strcasestr(label, "cron") || cbm_strcasestr(label, "schedule")) { + return "Crons"; + } + if (strcmp(label, "Function") == 0 || strcmp(label, "Method") == 0) { + return "Functions"; + } + return "Other"; +} + +static int summary_add_entry_group(cbm_arch_summary_entry_group_t **groups, int *count, int *cap, + const char *kind, int add_count) { + if (!kind || add_count <= 0) { + return CBM_STORE_OK; + } + for (int i = 0; i < *count; i++) { + if (strcmp((*groups)[i].kind, kind) == 0) { + (*groups)[i].count += add_count; + return CBM_STORE_OK; + } + } + if (*count >= *cap) { + *cap = *cap ? *cap * 2 : 4; + *groups = safe_realloc(*groups, (size_t)*cap * sizeof(cbm_arch_summary_entry_group_t)); + } + (*groups)[*count].kind = heap_strdup(kind); + (*groups)[*count].count = add_count; + (*count)++; + return CBM_STORE_OK; +} + +static int summary_collect_entry_points(cbm_store_t *s, const char *project, const char *focus_like, + int route_count, cbm_arch_summary_entry_group_t **out_arr, + int *out_count) { + *out_arr = NULL; + *out_count = 0; + + int cap = 4; + int count = 0; + cbm_arch_summary_entry_group_t *groups = + calloc((size_t)cap, sizeof(cbm_arch_summary_entry_group_t)); + if (route_count > 0) { + (void)summary_add_entry_group(&groups, &count, &cap, "Routes", route_count); + } + + char sql[2048]; + if (focus_like && focus_like[0]) { + snprintf(sql, sizeof(sql), + "SELECT label, COUNT(*) " + "FROM nodes " + "WHERE project=?1 " + "AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "AND (lower(COALESCE(name, '')) LIKE ?2 " + "OR lower(COALESCE(qualified_name, '')) LIKE ?2 " + "OR lower(COALESCE(file_path, '')) LIKE ?2 " + "OR lower(COALESCE(properties, '')) LIKE ?2) " + "GROUP BY label;"); + } else { + snprintf(sql, sizeof(sql), + "SELECT label, COUNT(*) " + "FROM nodes " + "WHERE project=?1 " + "AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND lower(COALESCE(file_path, '')) NOT LIKE '%%test%%' " + "GROUP BY label;"); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + free(groups); + store_set_error_sqlite(s, "summary_collect_entry_points"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like && focus_like[0]) { + bind_text(stmt, 2, focus_like); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *label = (const char *)sqlite3_column_text(stmt, 0); + int label_count = sqlite3_column_int(stmt, 1); + const char *kind = summary_entry_group_name(label); + if (strcmp(kind, "Routes") == 0) { + continue; + } + (void)summary_add_entry_group(&groups, &count, &cap, kind, label_count); + } + sqlite3_finalize(stmt); + + *out_arr = groups; + *out_count = count; + return CBM_STORE_OK; +} + +int cbm_store_get_architecture_summary(cbm_store_t *s, const char *project, const char *focus, + cbm_architecture_summary_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !project) { + return CBM_STORE_ERR; + } + + char *focus_like = summary_focus_like(focus); + char *focus_term = summary_focus_term(focus); + arch_summary_file_row_t *rows = NULL; + cbm_louvain_edge_t *edges = NULL; + int row_count = 0; + int edge_count = 0; + int rc = summary_collect_file_rows(s, project, focus_like, &rows, &row_count, &edges, &edge_count); + if (rc != CBM_STORE_OK) { + free(focus_like); + free(focus_term); + return rc; + } + out->total_files = summary_count_nodes(s, project, "label='File'", NULL); + out->total_functions = summary_count_nodes(s, project, "label IN ('Function','Method')", NULL); + out->total_classes = summary_count_nodes(s, project, "label IN ('Class','Interface')", NULL); + out->total_routes = summary_count_nodes(s, project, "label='Route'", NULL); + + qsort(rows, (size_t)row_count, sizeof(arch_summary_file_row_t), summary_file_row_cmp); + int top_file_count = row_count < 15 ? row_count : 15; + if (top_file_count > 0) { + out->files = calloc((size_t)top_file_count, sizeof(cbm_arch_summary_file_t)); + out->file_count = top_file_count; + for (int i = 0; i < top_file_count; i++) { + out->files[i].file = heap_strdup(rows[i].path); + out->files[i].inbound_calls = rows[i].inbound_calls; + out->files[i].outbound_calls = rows[i].outbound_calls; + rc = summary_fill_key_symbols(s, project, &out->files[i]); + if (rc != CBM_STORE_OK) { + break; + } + } + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_routes(s, project, focus_term, &out->routes, &out->route_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_build_clusters(rows, row_count, edges, edge_count, out->routes, out->route_count, + &out->clusters, &out->cluster_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_hot_functions(s, project, focus_like, &out->functions, + &out->function_count); + } + if (rc == CBM_STORE_OK) { + rc = summary_collect_entry_points(s, project, focus_like, out->total_routes, + &out->entry_points, &out->entry_point_count); + } + + for (int i = 0; i < row_count; i++) { + free(rows[i].path); + } + free(rows); + free(edges); + free(focus_like); + free(focus_term); + + if (rc != CBM_STORE_OK) { + cbm_store_architecture_summary_free(out); + } + return rc; +} + +void cbm_store_architecture_summary_free(cbm_architecture_summary_t *out) { + if (!out) { + return; + } + for (int i = 0; i < out->file_count; i++) { + free((void *)out->files[i].file); + for (int j = 0; j < out->files[i].symbol_count; j++) { + free((void *)out->files[i].symbols[j].name); + } + free(out->files[i].symbols); + } + free(out->files); + + for (int i = 0; i < out->route_count; i++) { + free((void *)out->routes[i].method); + free((void *)out->routes[i].path); + free((void *)out->routes[i].handler); + free((void *)out->routes[i].service); + free((void *)out->routes[i].next); + free((void *)out->routes[i].handler_file); + } + free(out->routes); + + for (int i = 0; i < out->cluster_count; i++) { + for (int j = 0; j < out->clusters[i].core_file_count; j++) { + free((void *)out->clusters[i].core_files[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].core_files); + for (int j = 0; j < out->clusters[i].entry_point_count; j++) { + free((void *)out->clusters[i].entry_points[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].entry_points); + } + free(out->clusters); + + for (int i = 0; i < out->function_count; i++) { + free((void *)out->functions[i].name); + free((void *)out->functions[i].file); + } + free(out->functions); + + for (int i = 0; i < out->entry_point_count; i++) { + free((void *)out->entry_points[i].kind); + } + free(out->entry_points); + memset(out, 0, sizeof(*out)); +} + /* ── ADR (Architecture Decision Record) ────────────────────────── */ static const char *canonical_sections[] = {"PURPOSE", "STACK", "ARCHITECTURE", diff --git a/src/store/store.h b/src/store/store.h index 17b0df11..d02fec77 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -514,6 +514,69 @@ int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char * int aspect_count, cbm_architecture_info_t *out); void cbm_store_architecture_free(cbm_architecture_info_t *out); +typedef struct { + const char *name; + int span_lines; +} cbm_arch_summary_symbol_t; + +typedef struct { + const char *file; + int inbound_calls; + int outbound_calls; + cbm_arch_summary_symbol_t *symbols; + int symbol_count; +} cbm_arch_summary_file_t; + +typedef struct { + const char *method; + const char *path; + const char *handler; + const char *service; + const char *next; + const char *handler_file; +} cbm_arch_summary_route_t; + +typedef struct { + int id; + int file_count; + const char **core_files; + int core_file_count; + const char **entry_points; + int entry_point_count; +} cbm_arch_summary_cluster_t; + +typedef struct { + const char *name; + const char *file; + int in_degree; +} cbm_arch_summary_function_t; + +typedef struct { + const char *kind; + int count; +} cbm_arch_summary_entry_group_t; + +typedef struct { + cbm_arch_summary_file_t *files; + cbm_arch_summary_route_t *routes; + cbm_arch_summary_cluster_t *clusters; + cbm_arch_summary_function_t *functions; + cbm_arch_summary_entry_group_t *entry_points; + int total_files; + int total_functions; + int total_classes; + int total_routes; + int file_count; + int route_count; + int cluster_count; + int function_count; + int entry_point_count; +} cbm_architecture_summary_t; + +int cbm_store_get_architecture_summary(cbm_store_t *s, const char *project, const char *focus, + cbm_architecture_summary_t *out); +void cbm_store_architecture_summary_free(cbm_architecture_summary_t *out); + /* ── ADR (Architecture Decision Record) ────────────────────────── */ #define CBM_ADR_MAX_LENGTH 8000 diff --git a/tests/test_integration.c b/tests/test_integration.c index 046cb856..318bce3e 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -367,6 +367,20 @@ TEST(integ_mcp_get_architecture) { PASS(); } +TEST(integ_mcp_get_architecture_summary) { + char args[512]; + snprintf(args, sizeof(args), + "{\"project\":\"%s\",\"max_tokens\":1200,\"focus\":\"main\"}", g_project); + + char *resp = call_tool("get_architecture_summary", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + ASSERT_NOT_NULL(strstr(resp, "## Key Files")); + ASSERT_NOT_NULL(strstr(resp, "main.py")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -554,6 +568,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_query_graph_calls); RUN_TEST(integ_mcp_get_graph_schema); RUN_TEST(integ_mcp_get_architecture); + RUN_TEST(integ_mcp_get_architecture_summary); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 77ec9c99..dd3c0bb2 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -6,10 +6,12 @@ #include "../src/foundation/compat.h" #include "test_framework.h" #include +#include #include #include #include #include +#include /* ══════════════════════════════════════════════════════════════════ * JSON-RPC PARSING @@ -129,7 +131,7 @@ TEST(mcp_initialize_response) { TEST(mcp_tools_list) { char *json = cbm_mcp_tools_list(); ASSERT_NOT_NULL(json); - /* Should contain all 14 tools */ + /* Should contain all 15 tools */ ASSERT_NOT_NULL(strstr(json, "index_repository")); ASSERT_NOT_NULL(strstr(json, "search_graph")); ASSERT_NOT_NULL(strstr(json, "query_graph")); @@ -137,6 +139,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_code_snippet")); ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); + ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); ASSERT_NOT_NULL(strstr(json, "delete_project")); @@ -490,6 +493,142 @@ TEST(tool_get_architecture_empty) { PASS(); } +TEST(tool_get_architecture_summary_missing_project) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + + char *resp = + cbm_mcp_server_handle(srv, "{\"jsonrpc\":\"2.0\",\"id\":25,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{}}}"); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "project is required")); + free(resp); + + cbm_mcp_server_free(srv); + PASS(); +} + +static cbm_mcp_server_t *setup_arch_summary_server(char *tmp_dir, size_t tmp_sz) { + snprintf(tmp_dir, tmp_sz, "/tmp/cbm_mcp_arch_XXXXXX"); + if (!cbm_mkdtemp(tmp_dir)) { + return NULL; + } + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + rmdir(tmp_dir); + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + rmdir(tmp_dir); + return NULL; + } + + char *proj_name = cbm_project_name_from_path(tmp_dir); + if (!proj_name) { + cbm_mcp_server_free(srv); + rmdir(tmp_dir); + return NULL; + } + + cbm_mcp_server_set_project(srv, proj_name); + cbm_store_upsert_project(st, proj_name, tmp_dir); + + int64_t prev_fn_id = 0; + for (int i = 0; i < 24; i++) { + char file_name[64]; + char file_qn[128]; + char fn_name[32]; + char fn_qn[160]; + + snprintf(file_name, sizeof(file_name), "pkg/file%02d.go", i); + snprintf(file_qn, sizeof(file_qn), "%s.pkg.file%02d", proj_name, i); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", i); + snprintf(fn_qn, sizeof(fn_qn), "%s.pkg.file%02d.%s", proj_name, i, fn_name); + + cbm_node_t file = {.project = proj_name, + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(st, &file); + + cbm_node_t fn = {.project = proj_name, + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name, + .start_line = 1, + .end_line = 40 + i}; + int64_t fn_id = cbm_store_upsert_node(st, &fn); + if (prev_fn_id > 0) { + cbm_edge_t edge = { + .project = proj_name, .source_id = prev_fn_id, .target_id = fn_id, .type = "CALLS"}; + cbm_store_insert_edge(st, &edge); + } + prev_fn_id = fn_id; + } + + free(proj_name); + return srv; +} + +static void cleanup_arch_summary_server(char *tmp_dir, cbm_mcp_server_t *srv) { + cbm_mcp_server_free(srv); + if (tmp_dir && tmp_dir[0]) { + rmdir(tmp_dir); + } +} + +TEST(tool_get_architecture_summary_truncated) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + char *proj_name = cbm_project_name_from_path(tmp_dir); + ASSERT_NOT_NULL(proj_name); + + char req[1024]; + snprintf(req, sizeof(req), + "{\"jsonrpc\":\"2.0\",\"id\":26,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{\"project\":\"%s\",\"max_tokens\":1}}}", + proj_name); + + char *resp = cbm_mcp_server_handle(srv, req); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + ASSERT_NOT_NULL(strstr(resp, "_Truncated at max_tokens._")); + free(resp); + free(proj_name); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + +TEST(tool_get_architecture_summary_project_path_alias) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + + char req[1024]; + snprintf(req, sizeof(req), + "{\"jsonrpc\":\"2.0\",\"id\":27,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture_summary\"," + "\"arguments\":{\"project_path\":\"%s\",\"max_tokens\":64}}}", + tmp_dir); + + char *resp = cbm_mcp_server_handle(srv, req); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "## Project:")); + free(resp); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + TEST(tool_query_graph_missing_query) { cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); @@ -1703,6 +1842,9 @@ SUITE(mcp) { RUN_TEST(tool_trace_missing_function_name); RUN_TEST(tool_delete_project_not_found); RUN_TEST(tool_get_architecture_empty); + RUN_TEST(tool_get_architecture_summary_missing_project); + RUN_TEST(tool_get_architecture_summary_truncated); + RUN_TEST(tool_get_architecture_summary_project_path_alias); RUN_TEST(tool_query_graph_missing_query); /* Pipeline-dependent tool handlers */ diff --git a/tests/test_store_arch.c b/tests/test_store_arch.c index 32663f3a..1427042a 100644 --- a/tests/test_store_arch.c +++ b/tests/test_store_arch.c @@ -379,6 +379,215 @@ TEST(arch_clusters) { PASS(); } +TEST(arch_summary_basic) { + cbm_store_t *s = setup_arch_test_store(); + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 5); + ASSERT_TRUE(summary.total_functions >= 5); + ASSERT_EQ(summary.total_routes, 1); + ASSERT_TRUE(summary.file_count > 0); + ASSERT_NOT_NULL(summary.files[0].file); + ASSERT_EQ(summary.route_count, 1); + ASSERT_STR_EQ(summary.routes[0].handler, "HandleRequest"); + ASSERT_STR_EQ(summary.routes[0].service, "ProcessOrder"); + ASSERT_STR_EQ(summary.routes[0].next, "ValidateOrder"); + ASSERT_TRUE(summary.entry_point_count > 0); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_focus) { + cbm_store_t *s = setup_arch_test_store(); + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", "service", &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 5); + ASSERT_TRUE(summary.total_functions >= 5); + ASSERT_TRUE(summary.file_count >= 1); + ASSERT_NOT_NULL(strstr(summary.files[0].file, "service.go")); + ASSERT_EQ(summary.total_routes, 1); + ASSERT_EQ(summary.route_count, 0); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_many_files) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + int64_t prev_fn_id = 0; + for (int i = 0; i < 20; i++) { + char file_name[64]; + char file_qn[96]; + char fn_name[32]; + char fn_qn[128]; + snprintf(file_name, sizeof(file_name), "pkg/file%02d.go", i); + snprintf(file_qn, sizeof(file_qn), "test.pkg.file%02d", i); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", i); + snprintf(fn_qn, sizeof(fn_qn), "test.pkg.file%02d.%s", i, fn_name); + + cbm_node_t file = {.project = "test", + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(s, &file); + + cbm_node_t fn = {.project = "test", + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name}; + int64_t fn_id = cbm_store_upsert_node(s, &fn); + if (prev_fn_id > 0) { + cbm_edge_t e = { + .project = "test", .source_id = prev_fn_id, .target_id = fn_id, .type = "CALLS"}; + cbm_store_insert_edge(s, &e); + } + prev_fn_id = fn_id; + } + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 20); + ASSERT_TRUE(summary.file_count > 0); + ASSERT_TRUE(summary.total_functions >= 20); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_cluster_growth) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + for (int pair = 0; pair < 9; pair++) { + int64_t pair_ids[2] = {0}; + for (int idx = 0; idx < 2; idx++) { + int file_no = pair * 2 + idx; + char file_name[64]; + char file_qn[96]; + char fn_name[32]; + char fn_qn[128]; + snprintf(file_name, sizeof(file_name), "cluster/file%02d.go", file_no); + snprintf(file_qn, sizeof(file_qn), "test.cluster.file%02d", file_no); + snprintf(fn_name, sizeof(fn_name), "Fn%02d", file_no); + snprintf(fn_qn, sizeof(fn_qn), "test.cluster.file%02d.%s", file_no, fn_name); + + cbm_node_t file = {.project = "test", + .label = "File", + .name = file_name, + .qualified_name = file_qn, + .file_path = file_name}; + cbm_store_upsert_node(s, &file); + + cbm_node_t fn = {.project = "test", + .label = "Function", + .name = fn_name, + .qualified_name = fn_qn, + .file_path = file_name}; + pair_ids[idx] = cbm_store_upsert_node(s, &fn); + } + + cbm_edge_t edge = { + .project = "test", .source_id = pair_ids[0], .target_id = pair_ids[1], .type = "CALLS"}; + cbm_store_insert_edge(s, &edge); + } + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.total_files, 18); + ASSERT_TRUE(summary.cluster_count > 0); + ASSERT_TRUE(summary.clusters[0].file_count >= 2); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + +TEST(arch_summary_cluster_entry_fallback) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "test", "/tmp/test"), CBM_STORE_OK); + + cbm_node_t controller_file = {.project = "test", + .label = "File", + .name = "app/Http/Controllers/OrderController.php", + .qualified_name = "test.app.controllers.OrderController", + .file_path = "app/Http/Controllers/OrderController.php"}; + cbm_node_t service_file = {.project = "test", + .label = "File", + .name = "app/Services/OrderService.php", + .qualified_name = "test.app.services.OrderService", + .file_path = "app/Services/OrderService.php"}; + cbm_store_upsert_node(s, &controller_file); + cbm_store_upsert_node(s, &service_file); + + cbm_node_t controller_fn = {.project = "test", + .label = "Method", + .name = "handle", + .qualified_name = "test.app.controllers.OrderController.handle", + .file_path = "app/Http/Controllers/OrderController.php"}; + cbm_node_t service_fn = {.project = "test", + .label = "Method", + .name = "processOrder", + .qualified_name = "test.app.services.OrderService.processOrder", + .file_path = "app/Services/OrderService.php"}; + int64_t controller_id = cbm_store_upsert_node(s, &controller_fn); + int64_t service_id = cbm_store_upsert_node(s, &service_fn); + + cbm_edge_t edge = { + .project = "test", .source_id = controller_id, .target_id = service_id, .type = "CALLS"}; + cbm_store_insert_edge(s, &edge); + + cbm_node_t route = {.project = "test", + .label = "Route", + .name = "/orders", + .qualified_name = "test.routes.orders", + .properties_json = + "{\"method\":\"POST\",\"path\":\"/orders\",\"handler\":" + "\"OrderController@store\"}"}; + cbm_store_upsert_node(s, &route); + + cbm_architecture_summary_t summary; + memset(&summary, 0, sizeof(summary)); + ASSERT_EQ(cbm_store_get_architecture_summary(s, "test", NULL, &summary), CBM_STORE_OK); + ASSERT_EQ(summary.route_count, 1); + ASSERT_TRUE(summary.routes[0].handler_file == NULL); + ASSERT_TRUE(summary.cluster_count > 0); + + bool found_entry = false; + for (int i = 0; i < summary.cluster_count; i++) { + for (int j = 0; j < summary.clusters[i].entry_point_count; j++) { + if (strcmp(summary.clusters[i].entry_points[j], "POST /orders") == 0) { + found_entry = true; + break; + } + } + if (found_entry) { + break; + } + } + ASSERT_TRUE(found_entry); + + cbm_store_architecture_summary_free(&summary); + cbm_store_close(s); + PASS(); +} + /* ── ADR tests ──────────────────────────────────────────────────── */ TEST(adr_store_and_retrieve) { @@ -978,6 +1187,11 @@ SUITE(store_arch) { RUN_TEST(arch_layers); RUN_TEST(arch_file_tree); RUN_TEST(arch_clusters); + RUN_TEST(arch_summary_basic); + RUN_TEST(arch_summary_focus); + RUN_TEST(arch_summary_many_files); + RUN_TEST(arch_summary_cluster_growth); + RUN_TEST(arch_summary_cluster_entry_fallback); /* ADR */ RUN_TEST(adr_store_and_retrieve); diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index b433ff2a..6cfc93f3 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include /* ── Schema / Open / Close ──────────────────────────────────────── */ @@ -111,6 +113,33 @@ TEST(store_project_delete) { PASS(); } +TEST(store_open_path_query_readonly_db) { + char path[] = "/tmp/cbm_store_query_XXXXXX"; + int fd = mkstemp(path); + ASSERT_TRUE(fd >= 0); + close(fd); + + cbm_store_t *writer = cbm_store_open_path(path); + ASSERT_NOT_NULL(writer); + ASSERT_EQ(cbm_store_upsert_project(writer, "readonly-proj", "/tmp/readonly-proj"), CBM_STORE_OK); + cbm_store_close(writer); + + ASSERT_EQ(chmod(path, 0444), 0); + + cbm_store_t *reader = cbm_store_open_path_query(path); + ASSERT_NOT_NULL(reader); + + cbm_project_t proj = {0}; + ASSERT_EQ(cbm_store_get_project(reader, "readonly-proj", &proj), CBM_STORE_OK); + ASSERT_STR_EQ(proj.root_path, "/tmp/readonly-proj"); + cbm_project_free_fields(&proj); + cbm_store_close(reader); + + chmod(path, 0644); + unlink(path); + PASS(); +} + /* ── Node CRUD ──────────────────────────────────────────────────── */ TEST(store_node_crud) { @@ -1511,6 +1540,7 @@ SUITE(store_nodes) { RUN_TEST(store_project_crud); RUN_TEST(store_project_update); RUN_TEST(store_project_delete); + RUN_TEST(store_open_path_query_readonly_db); RUN_TEST(store_node_crud); RUN_TEST(store_node_dedup); RUN_TEST(store_node_find_by_label); From b6f16cf472ba80a2539f167ac8cb4957e25619ce Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:27:28 +0530 Subject: [PATCH 2/6] Add PageRank ranking to graph tools --- src/main.c | 1 + src/mcp/mcp.c | 127 ++++++++- src/mcp/mcp.h | 2 +- src/pipeline/pipeline.c | 9 + src/pipeline/pipeline_incremental.c | 63 +++-- src/store/store.c | 419 +++++++++++++++++++++++++++- src/store/store.h | 23 +- tests/test_integration.c | 31 +- tests/test_mcp.c | 113 +++++++- tests/test_pipeline.c | 40 +++ tests/test_store_search.c | 77 +++++ 11 files changed, 865 insertions(+), 40 deletions(-) diff --git a/src/main.c b/src/main.c index f442aa0c..46c13e6f 100644 --- a/src/main.c +++ b/src/main.c @@ -151,6 +151,7 @@ static void print_help(void) { printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); printf(" get_code_snippet, get_graph_schema, get_architecture,\n"); printf(" get_architecture_summary, search_code,\n"); + printf(" get_key_symbols,\n"); printf(" list_projects, delete_project, index_status, detect_changes,\n"); printf(" manage_adr, ingest_traces\n"); } diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 47329a1c..8b313dbb 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,5 +1,5 @@ /* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 15 graph tools. + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 16 graph tools. * * Uses yyjson for fast JSON parsing/building. * Single-threaded event loop: read line → parse → dispatch → respond. @@ -348,7 +348,9 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"integer\"},\"max_degree\":{\"type\":\"integer\"},\"exclude_entry_points\":{" "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" "\"integer\",\"description\":\"Max results. Default: " - "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0}},\"required\":[\"project\"]}"}, + "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0},\"ranked\":{\"type\":\"boolean\"," + "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}}," + "\"required\":[\"project\"]}"}, {"query_graph", "Execute a Cypher query against the knowledge graph for complex multi-hop patterns, " @@ -365,8 +367,9 @@ static const tool_def_t TOOLS[] = { "{\"type\":\"object\",\"properties\":{\"function_name\":{\"type\":\"string\"},\"project\":{" "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" - "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"function_" - "name\",\"project\"]}"}, + "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"ranked\":{\"type\":\"boolean\"," + "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}}," + "\"required\":[\"function_name\",\"project\"]}"}, {"get_code_snippet", "Read source code for a function/class/symbol. IMPORTANT: First call search_graph to find the " @@ -398,6 +401,14 @@ static const tool_def_t TOOLS[] = { "to zoom into (for example payment or inventory).\"}},\"anyOf\":[{\"required\":[" "\"project\"]},{\"required\":[\"project_path\"]}]}"}, + {"get_key_symbols", + "Human-readable ranked symbol list: top functions/classes by PageRank importance. Use this " + "for fast first-session orientation and central entry-point discovery.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"limit\":{\"type\":" + "\"integer\",\"default\":20},\"focus\":{\"type\":\"string\",\"description\":\"Optional " + "keyword to narrow symbols by name, qualified name, or file path.\"}},\"required\":[" + "\"project\"]}"}, + {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " "the knowledge graph: deduplicates matches into containing functions, ranks by structural " @@ -611,6 +622,21 @@ bool cbm_mcp_get_bool_arg(const char *args_json, const char *key) { return result; } +static bool cbm_mcp_get_bool_arg_default(const char *args_json, const char *key, bool default_val) { + yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); + if (!doc) { + return default_val; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *val = yyjson_obj_get(root, key); + bool result = default_val; + if (val && yyjson_is_bool(val)) { + result = yyjson_get_bool(val); + } + yyjson_doc_free(doc); + return result; +} + /* ══════════════════════════════════════════════════════════════════ * MCP SERVER * ══════════════════════════════════════════════════════════════════ */ @@ -1061,6 +1087,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { int offset = cbm_mcp_get_int_arg(args, "offset", 0); int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); + bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); cbm_search_params_t params = { .project = project, @@ -1071,6 +1098,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { .offset = offset, .min_degree = min_degree, .max_degree = max_degree, + .sort_by = ranked ? "relevance" : "name", }; cbm_search_output_t out = {0}; @@ -1094,6 +1122,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { sr->node.file_path ? sr->node.file_path : ""); yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); yyjson_mut_arr_add_val(results, item); } yyjson_mut_obj_add_val(doc, root, "results", results); @@ -1335,6 +1364,82 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { return result; } +static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *focus = cbm_mcp_get_string_arg(args, "focus"); + int limit = cbm_mcp_get_int_arg(args, "limit", 20); + cbm_store_t *store = resolve_store(srv, project); + REQUIRE_STORE(store, project); + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(focus); + return not_indexed; + } + + cbm_key_symbol_t *symbols = NULL; + int count = 0; + if (cbm_store_get_key_symbols(store, project, focus, limit, &symbols, &count) != + CBM_STORE_OK) { + free(project); + free(focus); + return cbm_mcp_text_result("failed to load key symbols", true); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "project", project ? project : ""); + yyjson_mut_obj_add_int(doc, root, "count", count); + yyjson_mut_val *results = yyjson_mut_arr(doc); + for (int i = 0; i < count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", symbols[i].name ? symbols[i].name : ""); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + symbols[i].qualified_name ? symbols[i].qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", symbols[i].label ? symbols[i].label : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", + symbols[i].file_path ? symbols[i].file_path : ""); + yyjson_mut_obj_add_int(doc, item, "in_degree", symbols[i].in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", symbols[i].out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", symbols[i].pagerank); + yyjson_mut_arr_add_val(results, item); + } + yyjson_mut_obj_add_val(doc, root, "results", results); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + cbm_store_key_symbols_free(symbols, count); + free(project); + free(focus); + + { + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; + } +} + +static int node_hop_rank_cmp(const void *lhs, const void *rhs) { + const cbm_node_hop_t *a = lhs; + const cbm_node_hop_t *b = rhs; + if (a->pagerank < b->pagerank) { + return 1; + } + if (a->pagerank > b->pagerank) { + return -1; + } + if (a->hop != b->hop) { + return a->hop - b->hop; + } + if (!a->node.name || !b->node.name) { + return 0; + } + return strcmp(a->node.name, b->node.name); +} + static bool same_project_path(const char *lhs, const char *rhs) { if (!lhs || !rhs) { return false; @@ -1580,6 +1685,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_store_t *store = resolve_store(srv, project); char *direction = cbm_mcp_get_string_arg(args, "direction"); int depth = cbm_mcp_get_int_arg(args, "depth", 3); + bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); if (!func_name) { free(project); @@ -1645,6 +1751,10 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { if (do_outbound) { cbm_store_bfs(store, nodes[0].id, "outbound", edge_types, edge_type_count, depth, 100, &tr_out); + if (ranked && tr_out.visited_count > 1) { + qsort(tr_out.visited, (size_t)tr_out.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } yyjson_mut_val *callees = yyjson_mut_arr(doc); for (int i = 0; i < tr_out.visited_count; i++) { @@ -1655,6 +1765,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { doc, item, "qualified_name", tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); + yyjson_mut_obj_add_real(doc, item, "pagerank", tr_out.visited[i].pagerank); yyjson_mut_arr_add_val(callees, item); } yyjson_mut_obj_add_val(doc, root, "callees", callees); @@ -1663,6 +1774,10 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { if (do_inbound) { cbm_store_bfs(store, nodes[0].id, "inbound", edge_types, edge_type_count, depth, 100, &tr_in); + if (ranked && tr_in.visited_count > 1) { + qsort(tr_in.visited, (size_t)tr_in.visited_count, sizeof(cbm_node_hop_t), + node_hop_rank_cmp); + } yyjson_mut_val *callers = yyjson_mut_arr(doc); for (int i = 0; i < tr_in.visited_count; i++) { @@ -1673,6 +1788,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { doc, item, "qualified_name", tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); + yyjson_mut_obj_add_real(doc, item, "pagerank", tr_in.visited[i].pagerank); yyjson_mut_arr_add_val(callers, item); } yyjson_mut_obj_add_val(doc, root, "callers", callers); @@ -3060,6 +3176,9 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch if (strcmp(tool_name, "get_architecture") == 0) { return handle_get_architecture(srv, args_json); } + if (strcmp(tool_name, "get_key_symbols") == 0) { + return handle_get_key_symbols(srv, args_json); + } if (strcmp(tool_name, "get_architecture_summary") == 0) { return handle_get_architecture_summary(srv, args_json); } diff --git a/src/mcp/mcp.h b/src/mcp/mcp.h index 7e65912e..1f24dd8c 100644 --- a/src/mcp/mcp.h +++ b/src/mcp/mcp.h @@ -2,7 +2,7 @@ * mcp.h — MCP (Model Context Protocol) server for codebase-memory-mcp. * * Implements JSON-RPC 2.0 over stdio with the MCP tool calling protocol. - * Provides 14 graph analysis tools (search, trace, query, index, etc.) + * Provides 16 graph analysis tools (search, trace, query, index, etc.) */ #ifndef CBM_MCP_H #define CBM_MCP_H diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 66f47eac..41671775 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -816,8 +816,17 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { mtime_ns, fst.st_size); } } + if (cbm_store_compute_pagerank(hash_store, p->project_name, 20, 0.85) != + CBM_STORE_OK) { + cbm_log_error("pipeline.err", "phase", "pagerank", "project", p->project_name, + "error", cbm_store_error(hash_store)); + cbm_store_close(hash_store); + rc = -1; + goto cleanup; + } cbm_store_close(hash_store); cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); + cbm_log_info("pass.timing", "pass", "pagerank", "project", p->project_name); } } diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 1799f838..795f32d2 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -159,10 +159,11 @@ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_inf /* ── Incremental pipeline entry point ────────────────────────────── */ -int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_file_info_t *files, - int file_count) { - struct timespec t0; - cbm_clock_gettime(CLOCK_MONOTONIC, &t0); +int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_file_info_t *files, + int file_count) { + struct timespec t0; + struct timespec t; + cbm_clock_gettime(CLOCK_MONOTONIC, &t0); const char *project = cbm_pipeline_project_name(p); @@ -191,14 +192,25 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.classify", "changed", itoa_buf(n_changed), "unchanged", itoa_buf(n_unchanged), "deleted", itoa_buf(deleted_count)); - /* Fast path: nothing changed → skip */ - if (n_changed == 0 && deleted_count == 0) { - cbm_log_info("incremental.noop", "reason", "no_changes"); - free(is_changed); - free(deleted); - cbm_store_free_file_hashes(stored, stored_count); - cbm_store_close(store); - return 0; + /* Fast path: nothing changed → skip */ + if (n_changed == 0 && deleted_count == 0) { + cbm_log_info("incremental.noop", "reason", "no_changes"); + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { + cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + free(is_changed); + free(deleted); + cbm_store_free_file_hashes(stored, stored_count); + cbm_store_close(store); + return -1; + } + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + free(is_changed); + free(deleted); + cbm_store_free_file_hashes(stored, stored_count); + cbm_store_close(store); + return 0; } cbm_store_free_file_hashes(stored, stored_count); @@ -245,9 +257,8 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil .cancelled = cbm_pipeline_cancelled_ptr(p), }; - /* Run passes on changed files only */ - struct timespec t; - cbm_clock_gettime(CLOCK_MONOTONIC, &t); + /* Run passes on changed files only */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); cbm_pipeline_pass_definitions(&ctx, changed_files, ci); cbm_log_info("pass.timing", "pass", "incr_definitions", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); @@ -284,11 +295,23 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.merged", "nodes", itoa_buf(new_nodes), "edges", itoa_buf(new_edges)); - /* Persist updated file hashes for ALL files */ - persist_hashes(store, project, files, file_count); - - /* Cleanup */ - cbm_gbuf_free(gbuf); + /* Persist updated file hashes for ALL files */ + persist_hashes(store, project, files, file_count); + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { + cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + cbm_gbuf_free(gbuf); + cbm_registry_free(registry); + free(changed_files); + cbm_store_close(store); + return -1; + } + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + + /* Cleanup */ + cbm_gbuf_free(gbuf); cbm_registry_free(registry); free(changed_files); cbm_store_close(store); diff --git a/src/store/store.c b/src/store/store.c index ca196255..90ebc7d0 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -41,6 +41,8 @@ struct cbm_store { sqlite3 *db; const char *db_path; /* heap-allocated, or NULL for :memory: */ char errbuf[512]; + bool node_scores_checked; + bool node_scores_exists; /* Prepared statements (lazily initialized, cached for lifetime) */ sqlite3_stmt *stmt_upsert_node; @@ -125,6 +127,31 @@ static char *heap_strdup(const char *s) { return d; } +static bool store_has_node_scores_table(cbm_store_t *s) { + if (!s || !s->db) { + return false; + } + if (s->node_scores_checked) { + return s->node_scores_exists; + } + + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2( + s->db, + "SELECT 1 FROM sqlite_master WHERE type='table' AND name='node_scores' LIMIT 1;", -1, + &stmt, NULL); + if (rc != SQLITE_OK) { + s->node_scores_checked = true; + s->node_scores_exists = false; + return false; + } + + s->node_scores_exists = (sqlite3_step(stmt) == SQLITE_ROW); + s->node_scores_checked = true; + sqlite3_finalize(stmt); + return s->node_scores_exists; +} + /* Prepare a statement (cached). If already prepared, reset+clear. */ static sqlite3_stmt *prepare_cached(cbm_store_t *s, sqlite3_stmt **slot, const char *sql) { if (!s || !s->db) { @@ -200,6 +227,12 @@ static int init_schema(cbm_store_t *s) { " source_hash TEXT NOT NULL," " created_at TEXT NOT NULL," " updated_at TEXT NOT NULL" + ");" + "CREATE TABLE IF NOT EXISTS node_scores (" + " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," + " node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE," + " pagerank REAL NOT NULL," + " PRIMARY KEY (project, node_id)" ");"; return exec_sql(s, ddl); @@ -214,7 +247,8 @@ static int create_user_indexes(cbm_store_t *s) { "CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id, type);" "CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(project, type);" "CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(project, target_id, type);" - "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);"; + "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);" + "CREATE INDEX IF NOT EXISTS idx_node_scores_rank ON node_scores(project, pagerank DESC);"; return exec_sql(s, sql); } @@ -375,6 +409,9 @@ static cbm_store_t *store_open_internal(const char *path, bool in_memory) { return NULL; } + s->node_scores_checked = true; + s->node_scores_exists = true; + return s; } @@ -409,6 +446,8 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { } s->db_path = heap_strdup(db_path); + s->node_scores_checked = false; + s->node_scores_exists = false; /* Security: block ATTACH/DETACH to prevent file creation via SQL injection. */ sqlite3_set_authorizer(s->db, store_authorizer, NULL); @@ -599,7 +638,8 @@ int cbm_store_drop_indexes(cbm_store_t *s) { "DROP INDEX IF EXISTS idx_edges_target;" "DROP INDEX IF EXISTS idx_edges_type;" "DROP INDEX IF EXISTS idx_edges_target_type;" - "DROP INDEX IF EXISTS idx_edges_source_type;"); + "DROP INDEX IF EXISTS idx_edges_source_type;" + "DROP INDEX IF EXISTS idx_node_scores_rank;"); } int cbm_store_create_indexes(cbm_store_t *s) { @@ -1834,6 +1874,335 @@ int cbm_store_restore_from(cbm_store_t *dst, cbm_store_t *src) { return CBM_STORE_OK; } +/* ── PageRank ───────────────────────────────────────────────────── */ + +typedef struct { + int src_idx; + int dst_idx; +} cbm_pagerank_edge_ref_t; + +static int pagerank_find_node_index(const int64_t *node_ids, int count, int64_t node_id) { + int lo = 0; + int hi = count - 1; + while (lo <= hi) { + int mid = lo + ((hi - lo) / 2); + if (node_ids[mid] == node_id) { + return mid; + } + if (node_ids[mid] < node_id) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return -1; +} + +int cbm_store_compute_pagerank(cbm_store_t *s, const char *project, int iterations, double damping) { + int rc = CBM_STORE_OK; + sqlite3_stmt *stmt = NULL; + sqlite3_stmt *insert_stmt = NULL; + int64_t *node_ids = NULL; + int node_cap = 0; + int node_count = 0; + cbm_pagerank_edge_ref_t *edges = NULL; + int edge_cap = 0; + int edge_count = 0; + int *out_degree = NULL; + double *scores = NULL; + double *next_scores = NULL; + + if (!s || !s->db || !project) { + return CBM_STORE_ERR; + } + if (!store_has_node_scores_table(s)) { + store_set_error(s, "node_scores table is unavailable"); + return CBM_STORE_ERR; + } + if (iterations <= 0) { + iterations = 20; + } + if (damping <= 0.0 || damping >= 1.0) { + damping = 0.85; + } + + rc = sqlite3_prepare_v2( + s->db, + "SELECT id FROM nodes " + "WHERE project = ?1 AND label IN ('Function','Method','Class') " + "ORDER BY id;", + -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.nodes"); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (node_count >= node_cap) { + node_cap = node_cap > 0 ? node_cap * 2 : 128; + node_ids = safe_realloc(node_ids, (size_t)node_cap * sizeof(int64_t)); + } + node_ids[node_count++] = sqlite3_column_int64(stmt, 0); + } + sqlite3_finalize(stmt); + stmt = NULL; + + if (node_count > 0) { + out_degree = calloc((size_t)node_count, sizeof(int)); + scores = malloc((size_t)node_count * sizeof(double)); + next_scores = malloc((size_t)node_count * sizeof(double)); + if (!out_degree || !scores || !next_scores) { + store_set_error(s, "pagerank allocation failed"); + rc = CBM_STORE_ERR; + goto cleanup; + } + + rc = sqlite3_prepare_v2( + s->db, + "SELECT source_id, target_id FROM edges WHERE project = ?1 AND type = 'CALLS' " + "ORDER BY source_id, target_id;", + -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.edges"); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW) { + int src_idx = + pagerank_find_node_index(node_ids, node_count, sqlite3_column_int64(stmt, 0)); + int dst_idx = + pagerank_find_node_index(node_ids, node_count, sqlite3_column_int64(stmt, 1)); + if (src_idx < 0 || dst_idx < 0) { + continue; + } + if (edge_count >= edge_cap) { + edge_cap = edge_cap > 0 ? edge_cap * 2 : 256; + edges = safe_realloc(edges, (size_t)edge_cap * sizeof(cbm_pagerank_edge_ref_t)); + } + edges[edge_count].src_idx = src_idx; + edges[edge_count].dst_idx = dst_idx; + out_degree[src_idx]++; + edge_count++; + } + sqlite3_finalize(stmt); + stmt = NULL; + + for (int i = 0; i < node_count; i++) { + scores[i] = 1.0 / (double)node_count; + } + + for (int iter = 0; iter < iterations; iter++) { + double dangling_mass = 0.0; + double base = 0.0; + + for (int i = 0; i < node_count; i++) { + if (out_degree[i] == 0) { + dangling_mass += scores[i]; + } + } + + base = ((1.0 - damping) + (damping * dangling_mass)) / (double)node_count; + for (int i = 0; i < node_count; i++) { + next_scores[i] = base; + } + + for (int i = 0; i < edge_count; i++) { + int src_idx = edges[i].src_idx; + int dst_idx = edges[i].dst_idx; + if (out_degree[src_idx] > 0) { + next_scores[dst_idx] += + damping * (scores[src_idx] / (double)out_degree[src_idx]); + } + } + + { + double *tmp = scores; + scores = next_scores; + next_scores = tmp; + } + } + } + + rc = cbm_store_begin(s); + if (rc != CBM_STORE_OK) { + goto cleanup; + } + + rc = sqlite3_prepare_v2(s->db, "DELETE FROM node_scores WHERE project = ?1;", -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.delete"); + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "pagerank.delete"); + sqlite3_finalize(stmt); + stmt = NULL; + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + sqlite3_finalize(stmt); + stmt = NULL; + + if (node_count == 0) { + rc = cbm_store_commit(s); + goto cleanup; + } + + rc = sqlite3_prepare_v2( + s->db, "INSERT INTO node_scores (project, node_id, pagerank) VALUES (?1, ?2, ?3);", -1, + &insert_stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "pagerank.insert"); + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + + for (int i = 0; i < node_count; i++) { + sqlite3_reset(insert_stmt); + sqlite3_clear_bindings(insert_stmt); + bind_text(insert_stmt, 1, project); + sqlite3_bind_int64(insert_stmt, 2, node_ids[i]); + sqlite3_bind_double(insert_stmt, 3, scores[i]); + if (sqlite3_step(insert_stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "pagerank.insert"); + sqlite3_finalize(insert_stmt); + insert_stmt = NULL; + cbm_store_rollback(s); + rc = CBM_STORE_ERR; + goto cleanup; + } + } + + sqlite3_finalize(insert_stmt); + insert_stmt = NULL; + rc = cbm_store_commit(s); + if (rc != CBM_STORE_OK) { + goto cleanup; + } + + rc = CBM_STORE_OK; + +cleanup: + if (stmt) { + sqlite3_finalize(stmt); + } + if (insert_stmt) { + sqlite3_finalize(insert_stmt); + } + free(node_ids); + free(edges); + free(out_degree); + free(scores); + free(next_scores); + return rc; +} + +int cbm_store_get_key_symbols(cbm_store_t *s, const char *project, const char *focus, int limit, + cbm_key_symbol_t **out, int *count) { + sqlite3_stmt *stmt = NULL; + cbm_key_symbol_t *symbols = NULL; + int cap = 0; + int n = 0; + char *focus_like = NULL; + bool has_scores = false; + char sql[2048]; + + if (out) { + *out = NULL; + } + if (count) { + *count = 0; + } + if (!s || !s->db || !project || !out || !count) { + return CBM_STORE_ERR; + } + + if (limit <= 0) { + limit = 20; + } + has_scores = store_has_node_scores_table(s); + if (focus && focus[0]) { + size_t len = strlen(focus); + focus_like = malloc(len + 3); + if (!focus_like) { + return CBM_STORE_ERR; + } + focus_like[0] = '%'; + memcpy(focus_like + 1, focus, len); + focus_like[len + 1] = '%'; + focus_like[len + 2] = '\0'; + } + + snprintf( + sql, sizeof(sql), + "SELECT n.name, n.qualified_name, n.label, n.file_path, " + "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " + "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg, " + "%s " + "FROM nodes n %s " + "WHERE n.project = ?1 AND n.label IN ('Function','Method','Class') %s" + "ORDER BY pagerank DESC, in_deg DESC, out_deg DESC, n.name " + "LIMIT %d;", + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + focus_like ? "AND (n.name LIKE ?2 OR n.qualified_name LIKE ?2 OR n.file_path LIKE ?2) " + : "", + limit); + + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "key_symbols.prepare"); + free(focus_like); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + if (focus_like) { + bind_text(stmt, 2, focus_like); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap = cap > 0 ? cap * 2 : 16; + symbols = safe_realloc(symbols, (size_t)cap * sizeof(cbm_key_symbol_t)); + } + memset(&symbols[n], 0, sizeof(cbm_key_symbol_t)); + symbols[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + symbols[n].qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + symbols[n].label = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + symbols[n].file_path = heap_strdup((const char *)sqlite3_column_text(stmt, 3)); + symbols[n].in_degree = sqlite3_column_int(stmt, 4); + symbols[n].out_degree = sqlite3_column_int(stmt, 5); + symbols[n].pagerank = sqlite3_column_double(stmt, 6); + n++; + } + + sqlite3_finalize(stmt); + free(focus_like); + *out = symbols; + *count = n; + return CBM_STORE_OK; +} + +void cbm_store_key_symbols_free(cbm_key_symbol_t *symbols, int count) { + if (!symbols) { + return; + } + for (int i = 0; i < count; i++) { + free((void *)symbols[i].name); + free((void *)symbols[i].qualified_name); + free((void *)symbols[i].label); + free((void *)symbols[i].file_path); + } + free(symbols); +} + /* ── Search ─────────────────────────────────────────────────────── */ /* Convert a glob pattern to SQL LIKE pattern. */ @@ -1978,13 +2347,14 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear char sql[4096]; char count_sql[4096]; int bind_idx = 0; + bool has_scores = store_has_node_scores_table(s); /* We build a query that selects nodes with optional degree subqueries */ const char *select_cols = "SELECT n.id, n.project, n.label, n.name, n.qualified_name, " "n.file_path, n.start_line, n.end_line, n.properties, " "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " - "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg "; + "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg, "; /* Start building WHERE */ char where[2048] = ""; @@ -2067,9 +2437,18 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear /* Build full SQL */ if (nparams > 0) { - snprintf(sql, sizeof(sql), "%s FROM nodes n WHERE %s", select_cols, where); + snprintf(sql, sizeof(sql), "%s%s FROM nodes n %s WHERE %s", select_cols, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + where); } else { - snprintf(sql, sizeof(sql), "%s FROM nodes n", select_cols); + snprintf(sql, sizeof(sql), "%s%s FROM nodes n %s", select_cols, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores + ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : ""); } /* Degree filters: -1 = no filter, 0+ = active filter. @@ -2100,12 +2479,20 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear * When degree filter wraps in subquery, column refs lose the "n." prefix. */ int limit = params->limit > 0 ? params->limit : 500000; int offset = params->offset; - bool has_degree_wrap = has_degree_filter; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *name_col = has_degree_wrap ? "name" : "n.name"; + const char *sort_by = (params->sort_by && params->sort_by[0]) ? params->sort_by : "name"; char order_limit[128]; - snprintf(order_limit, sizeof(order_limit), " ORDER BY %s LIMIT %d OFFSET %d", name_col, limit, - offset); + if (strcmp(sort_by, "degree") == 0) { + snprintf(order_limit, sizeof(order_limit), + " ORDER BY (in_deg + out_deg) DESC, pagerank DESC, name LIMIT %d OFFSET %d", + limit, offset); + } else if (strcmp(sort_by, "relevance") == 0) { + snprintf(order_limit, sizeof(order_limit), + " ORDER BY pagerank DESC, (in_deg + out_deg) DESC, name LIMIT %d OFFSET %d", + limit, offset); + } else { + snprintf(order_limit, sizeof(order_limit), " ORDER BY name LIMIT %d OFFSET %d", limit, + offset); + } strncat(sql, order_limit, sizeof(sql) - strlen(sql) - 1); /* Execute count query */ @@ -2147,6 +2534,7 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear scan_node(main_stmt, &results[n].node); results[n].in_degree = sqlite3_column_int(main_stmt, 9); results[n].out_degree = sqlite3_column_int(main_stmt, 10); + results[n].pagerank = sqlite3_column_double(main_stmt, 11); n++; } @@ -2219,6 +2607,7 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const char sql[4096]; const char *join_cond; const char *next_id; + bool has_scores = store_has_node_scores_table(s); // NOLINTNEXTLINE(readability-implicit-bool-conversion) bool is_inbound = direction && strcmp(direction, "inbound") == 0; @@ -2240,13 +2629,18 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const " WHERE e.type IN (%s) AND bfs.hop < %d" ")" "SELECT DISTINCT n.id, n.project, n.label, n.name, n.qualified_name, " - "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop " + "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop, %s " "FROM bfs " "JOIN nodes n ON n.id = bfs.node_id " + "%s " "WHERE bfs.hop > 0 " /* exclude root */ "ORDER BY bfs.hop " "LIMIT %d;", - (long long)start_id, next_id, join_cond, types_clause, max_depth, max_results); + (long long)start_id, next_id, join_cond, types_clause, max_depth, + has_scores ? "COALESCE(ns.pagerank, 0.0) AS pagerank" : "0.0 AS pagerank", + has_scores ? "LEFT JOIN node_scores ns ON ns.project = n.project AND ns.node_id = n.id" + : "", + max_results); sqlite3_stmt *stmt = NULL; rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); @@ -2275,6 +2669,7 @@ int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const } scan_node(stmt, &visited[n].node); visited[n].hop = sqlite3_column_int(stmt, 9); + visited[n].pagerank = sqlite3_column_double(stmt, 10); n++; } diff --git a/src/store/store.h b/src/store/store.h index d02fec77..99c240e3 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -116,7 +116,7 @@ typedef struct { int offset; bool exclude_entry_points; bool include_connected; - const char *sort_by; /* "relevance" / "name" / "degree", NULL = relevance */ + const char *sort_by; /* "relevance" / "name" / "degree", NULL = name */ bool case_sensitive; const char **exclude_labels; /* NULL-terminated array, or NULL */ } cbm_search_params_t; @@ -125,6 +125,7 @@ typedef struct { cbm_node_t node; int in_degree; int out_degree; + double pagerank; /* connected_names: allocated array of strings, count in connected_count */ const char **connected_names; int connected_count; @@ -141,6 +142,7 @@ typedef struct { typedef struct { cbm_node_t node; int hop; /* BFS depth from root */ + double pagerank; } cbm_node_hop_t; typedef struct { @@ -360,6 +362,25 @@ int cbm_store_delete_file_hash(cbm_store_t *s, const char *project, const char * int cbm_store_delete_file_hashes(cbm_store_t *s, const char *project); +/* ── PageRank ───────────────────────────────────────────────────── */ + +int cbm_store_compute_pagerank(cbm_store_t *s, const char *project, int iterations, double damping); + +typedef struct { + const char *name; + const char *qualified_name; + const char *label; + const char *file_path; + int in_degree; + int out_degree; + double pagerank; +} cbm_key_symbol_t; + +int cbm_store_get_key_symbols(cbm_store_t *s, const char *project, const char *focus, int limit, + cbm_key_symbol_t **out, int *count); + +void cbm_store_key_symbols_free(cbm_key_symbol_t *symbols, int count); + /* ── Search ─────────────────────────────────────────────────────── */ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_search_output_t *out); diff --git a/tests/test_integration.c b/tests/test_integration.c index 318bce3e..d0a14dde 100644 --- a/tests/test_integration.c +++ b/tests/test_integration.c @@ -381,6 +381,18 @@ TEST(integ_mcp_get_architecture_summary) { PASS(); } +TEST(integ_mcp_get_key_symbols) { + char args[256]; + snprintf(args, sizeof(args), "{\"project\":\"%s\",\"limit\":5}", g_project); + + char *resp = call_tool("get_key_symbols", args); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "pagerank")); + ASSERT_TRUE(strstr(resp, "Add") || strstr(resp, "greet") || strstr(resp, "Multiply")); + free(resp); + PASS(); +} + TEST(integ_mcp_trace_call_path) { /* Trace outbound calls from Compute → should reach Add and Multiply */ char args[256]; @@ -539,6 +551,21 @@ TEST(integ_store_bfs_traversal) { PASS(); } +TEST(integ_store_key_symbols_ranked) { + cbm_store_t *store = cbm_store_open_path(g_dbpath); + ASSERT_NOT_NULL(store); + + cbm_key_symbol_t *symbols = NULL; + int count = 0; + ASSERT_EQ(cbm_store_get_key_symbols(store, g_project, NULL, 5, &symbols, &count), CBM_STORE_OK); + ASSERT_GT(count, 0); + ASSERT_TRUE(symbols[0].pagerank > 0.0); + + cbm_store_key_symbols_free(symbols, count); + cbm_store_close(store); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -548,7 +575,7 @@ SUITE(integration) { if (integration_setup() != 0) { printf(" %-50s", "integration_setup"); printf("SKIP (setup failed)\n"); - tf_skip_count += 16; /* skip all integration tests */ + tf_skip_count += 25; /* skip all integration tests */ integration_teardown(); return; } @@ -569,6 +596,7 @@ SUITE(integration) { RUN_TEST(integ_mcp_get_graph_schema); RUN_TEST(integ_mcp_get_architecture); RUN_TEST(integ_mcp_get_architecture_summary); + RUN_TEST(integ_mcp_get_key_symbols); RUN_TEST(integ_mcp_trace_call_path); RUN_TEST(integ_mcp_index_status); @@ -576,6 +604,7 @@ SUITE(integration) { RUN_TEST(integ_store_search_by_degree); RUN_TEST(integ_store_find_by_file); RUN_TEST(integ_store_bfs_traversal); + RUN_TEST(integ_store_key_symbols_ranked); /* Pipeline API tests (no db needed) */ RUN_TEST(integ_pipeline_fqn_compute); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index dd3c0bb2..8aeeb096 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -131,7 +131,7 @@ TEST(mcp_initialize_response) { TEST(mcp_tools_list) { char *json = cbm_mcp_tools_list(); ASSERT_NOT_NULL(json); - /* Should contain all 15 tools */ + /* Should contain all public tools */ ASSERT_NOT_NULL(strstr(json, "index_repository")); ASSERT_NOT_NULL(strstr(json, "search_graph")); ASSERT_NOT_NULL(strstr(json, "query_graph")); @@ -139,6 +139,7 @@ TEST(mcp_tools_list) { ASSERT_NOT_NULL(strstr(json, "get_code_snippet")); ASSERT_NOT_NULL(strstr(json, "get_graph_schema")); ASSERT_NOT_NULL(strstr(json, "get_architecture")); + ASSERT_NOT_NULL(strstr(json, "get_key_symbols")); ASSERT_NOT_NULL(strstr(json, "get_architecture_summary")); ASSERT_NOT_NULL(strstr(json, "search_code")); ASSERT_NOT_NULL(strstr(json, "list_projects")); @@ -583,6 +584,51 @@ static void cleanup_arch_summary_server(char *tmp_dir, cbm_mcp_server_t *srv) { } } +static cbm_mcp_server_t *setup_pagerank_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "test-rank", "/tmp/test-rank"); + cbm_mcp_server_set_project(srv, "test-rank"); + + cbm_node_t nodes[] = { + {.project = "test-rank", .label = "Function", .name = "Root", .qualified_name = "test-rank.Root"}, + {.project = "test-rank", .label = "Function", .name = "Small", .qualified_name = "test-rank.Small"}, + {.project = "test-rank", .label = "Function", .name = "Hub", .qualified_name = "test-rank.Hub"}, + {.project = "test-rank", .label = "Function", .name = "Leaf", .qualified_name = "test-rank.Leaf"}, + {.project = "test-rank", .label = "Function", .name = "CallerB", .qualified_name = "test-rank.CallerB"}, + {.project = "test-rank", .label = "Function", .name = "CallerC", .qualified_name = "test-rank.CallerC"}, + }; + int64_t ids[6]; + for (int i = 0; i < 6; i++) { + ids[i] = cbm_store_upsert_node(st, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "test-rank", .source_id = ids[0], .target_id = ids[1], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[1], .target_id = ids[2], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[4], .target_id = ids[2], .type = "CALLS"}, + {.project = "test-rank", .source_id = ids[5], .target_id = ids[2], .type = "CALLS"}, + }; + for (int i = 0; i < 4; i++) { + cbm_store_insert_edge(st, &edges[i]); + } + + if (cbm_store_compute_pagerank(st, "test-rank", 20, 0.85) != CBM_STORE_OK) { + cbm_mcp_server_free(srv); + return NULL; + } + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1120,6 +1166,68 @@ static char *extract_text_content(const char *mcp_result) { return result; } +TEST(tool_search_graph_ranked_pagerank) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool(srv, "search_graph", + "{\"project\":\"test-rank\",\"label\":\"Function\",\"limit\":10}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Small\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_get_key_symbols_ranked) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = + cbm_mcp_handle_tool(srv, "get_key_symbols", "{\"project\":\"test-rank\",\"limit\":3}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"results\"")); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_ranked_pagerank) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-rank\",\"function_name\":\"Root\",\"direction\":\"outbound\",\"depth\":3}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"callees\"")); + ASSERT_NOT_NULL(strstr(text, "\"pagerank\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Small\"")); + ASSERT_TRUE(strstr(text, "\"name\":\"Hub\"") < strstr(text, "\"name\":\"Small\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -1834,6 +1942,7 @@ SUITE(mcp) { RUN_TEST(tool_get_graph_schema_empty); RUN_TEST(tool_unknown_tool); RUN_TEST(tool_search_graph_basic); + RUN_TEST(tool_search_graph_ranked_pagerank); RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); @@ -1845,6 +1954,8 @@ SUITE(mcp) { RUN_TEST(tool_get_architecture_summary_missing_project); RUN_TEST(tool_get_architecture_summary_truncated); RUN_TEST(tool_get_architecture_summary_project_path_alias); + RUN_TEST(tool_get_key_symbols_ranked); + RUN_TEST(tool_trace_call_path_ranked_pagerank); RUN_TEST(tool_query_graph_missing_query); /* Pipeline-dependent tool handlers */ diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 569fa04e..8578bdc5 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -19,6 +19,7 @@ #include #include "graph_buffer/graph_buffer.h" #include "yyjson/yyjson.h" +#include /* ── Helper: create temp test repo with known layout ───────────── */ @@ -4971,6 +4972,44 @@ TEST(incremental_full_then_noop) { PASS(); } +TEST(incremental_noop_backfills_pagerank) { + if (setup_incremental_repo() != 0) { SKIP("setup failed"); } + + cbm_pipeline_t *p = cbm_pipeline_new(g_incr_tmpdir, g_incr_dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + char *project = strdup(cbm_pipeline_project_name(p)); + cbm_pipeline_free(p); + + cbm_store_t *s = cbm_store_open_path(g_incr_dbpath); + ASSERT_NOT_NULL(s); + ASSERT_EQ(sqlite3_exec(cbm_store_get_db(s), "DELETE FROM node_scores;", NULL, NULL, NULL), + SQLITE_OK); + cbm_store_close(s); + + p = cbm_pipeline_new(g_incr_tmpdir, g_incr_dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + s = cbm_store_open_path(g_incr_dbpath); + ASSERT_NOT_NULL(s); + sqlite3_stmt *stmt = NULL; + ASSERT_EQ(sqlite3_prepare_v2(cbm_store_get_db(s), + "SELECT COUNT(*) FROM node_scores WHERE project = ?1;", -1, + &stmt, NULL), + SQLITE_OK); + ASSERT_EQ(sqlite3_bind_text(stmt, 1, project, -1, SQLITE_STATIC), SQLITE_OK); + ASSERT_EQ(sqlite3_step(stmt), SQLITE_ROW); + ASSERT_GT(sqlite3_column_int(stmt, 0), 0); + sqlite3_finalize(stmt); + cbm_store_close(s); + free(project); + + cleanup_incremental_repo(); + PASS(); +} + TEST(incremental_detects_changed_file) { /* Full index, modify one file, re-index → changed file re-parsed */ if (setup_incremental_repo() != 0) { SKIP("setup failed"); } @@ -5914,6 +5953,7 @@ SUITE(pipeline) { RUN_TEST(pipeline_fastapi_depends_edges); /* Incremental */ RUN_TEST(incremental_full_then_noop); + RUN_TEST(incremental_noop_backfills_pagerank); RUN_TEST(incremental_detects_changed_file); RUN_TEST(incremental_detects_deleted_file); RUN_TEST(incremental_new_file_added); diff --git a/tests/test_store_search.c b/tests/test_store_search.c index 05df8680..4a5b7b3e 100644 --- a/tests/test_store_search.c +++ b/tests/test_store_search.c @@ -47,6 +47,41 @@ static cbm_store_t *setup_search_store(int64_t *ids) { return s; } +static cbm_store_t *setup_pagerank_store(int64_t *ids) { + cbm_store_t *s = cbm_store_open_memory(); + cbm_store_upsert_project(s, "test", "/tmp/test"); + + cbm_node_t nodes[] = { + {.project = "test", .label = "Function", .name = "Root", .qualified_name = "test.Root"}, + {.project = "test", .label = "Function", .name = "Small", .qualified_name = "test.Small"}, + {.project = "test", .label = "Function", .name = "Hub", .qualified_name = "test.Hub"}, + {.project = "test", .label = "Function", .name = "Leaf", .qualified_name = "test.Leaf"}, + {.project = "test", .label = "Function", .name = "CallerB", .qualified_name = "test.CallerB"}, + {.project = "test", .label = "Function", .name = "CallerC", .qualified_name = "test.CallerC"}, + }; + const int node_count = (int)(sizeof(nodes) / sizeof(nodes[0])); + for (int i = 0; i < node_count; i++) { + ids[i] = cbm_store_upsert_node(s, &nodes[i]); + } + + cbm_edge_t edges[] = { + {.project = "test", .source_id = ids[0], .target_id = ids[1], .type = "CALLS"}, + {.project = "test", .source_id = ids[1], .target_id = ids[2], .type = "CALLS"}, + {.project = "test", .source_id = ids[4], .target_id = ids[2], .type = "CALLS"}, + {.project = "test", .source_id = ids[5], .target_id = ids[2], .type = "CALLS"}, + }; + const int edge_count = (int)(sizeof(edges) / sizeof(edges[0])); + for (int i = 0; i < edge_count; i++) { + cbm_store_insert_edge(s, &edges[i]); + } + + if (cbm_store_compute_pagerank(s, "test", 20, 0.85) != CBM_STORE_OK) { + cbm_store_close(s); + return NULL; + } + return s; +} + /* ── Search by label ────────────────────────────────────────────── */ TEST(store_search_by_label) { @@ -595,6 +630,46 @@ TEST(store_search_case_insensitive) { PASS(); } +TEST(store_search_ranked_by_pagerank) { + int64_t ids[6]; + cbm_store_t *s = setup_pagerank_store(ids); + ASSERT_NOT_NULL(s); + + cbm_search_params_t params = {.project = "test", + .label = "Function", + .limit = 10, + .min_degree = -1, + .max_degree = -1, + .sort_by = "relevance"}; + cbm_search_output_t out = {0}; + int rc = cbm_store_search(s, ¶ms, &out); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_GTE(out.count, 4); + ASSERT_STR_EQ(out.results[0].node.name, "Hub"); + ASSERT_TRUE(out.results[0].pagerank > out.results[1].pagerank); + cbm_store_search_free(&out); + + cbm_store_close(s); + PASS(); +} + +TEST(store_get_key_symbols_ranked) { + int64_t ids[6]; + cbm_store_t *s = setup_pagerank_store(ids); + ASSERT_NOT_NULL(s); + cbm_key_symbol_t *symbols = NULL; + int count = 0; + + ASSERT_EQ(cbm_store_get_key_symbols(s, "test", NULL, 3, &symbols, &count), CBM_STORE_OK); + ASSERT_EQ(count, 3); + ASSERT_STR_EQ(symbols[0].name, "Hub"); + ASSERT_TRUE(symbols[0].pagerank > symbols[1].pagerank); + cbm_store_key_symbols_free(symbols, count); + + cbm_store_close(s); + PASS(); +} + /* ── Impact: HopToRisk ─────────────────────────────────────────── */ TEST(store_hop_to_risk) { @@ -1217,6 +1292,8 @@ SUITE(store_search) { RUN_TEST(store_search_all); RUN_TEST(store_search_exclude_labels); RUN_TEST(store_search_case_insensitive); + RUN_TEST(store_search_ranked_by_pagerank); + RUN_TEST(store_get_key_symbols_ranked); RUN_TEST(store_bfs_outbound); RUN_TEST(store_bfs_inbound); RUN_TEST(store_bfs_cross_service); From 0af23ec9d2f3f0c9cc4e00faa0bd024c788dd71d Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:35:23 +0530 Subject: [PATCH 3/6] Make PageRank failures non-fatal during indexing --- src/pipeline/pipeline.c | 9 ++++----- src/pipeline/pipeline_incremental.c | 27 +++++++++++---------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 41671775..5cddbcb1 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -818,11 +818,10 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { } if (cbm_store_compute_pagerank(hash_store, p->project_name, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("pipeline.err", "phase", "pagerank", "project", p->project_name, - "error", cbm_store_error(hash_store)); - cbm_store_close(hash_store); - rc = -1; - goto cleanup; + /* PageRank is a ranking enhancement; keep a successful index even if + * score computation fails on this pass. */ + cbm_log_warn("pipeline.warn", "phase", "pagerank", "project", p->project_name, + "error", cbm_store_error(hash_store)); } cbm_store_close(hash_store); cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index 795f32d2..fa802ba9 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -197,15 +197,13 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_log_info("incremental.noop", "reason", "no_changes"); cbm_clock_gettime(CLOCK_MONOTONIC, &t); if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", - cbm_store_error(store)); - free(is_changed); - free(deleted); - cbm_store_free_file_hashes(stored, stored_count); - cbm_store_close(store); - return -1; + /* Preserve the successful no-op index result even if ranking refresh fails. */ + cbm_log_warn("incremental.warn", "msg", "pagerank_failed", "project", project, + "error", cbm_store_error(store)); + } else { + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); } - cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); free(is_changed); free(deleted); cbm_store_free_file_hashes(stored, stored_count); @@ -300,15 +298,12 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_clock_gettime(CLOCK_MONOTONIC, &t); if (cbm_store_compute_pagerank(store, project, 20, 0.85) != CBM_STORE_OK) { - cbm_log_error("incremental.err", "msg", "pagerank_failed", "project", project, "error", - cbm_store_error(store)); - cbm_gbuf_free(gbuf); - cbm_registry_free(registry); - free(changed_files); - cbm_store_close(store); - return -1; + cbm_log_warn("incremental.warn", "msg", "pagerank_failed", "project", project, "error", + cbm_store_error(store)); + } else { + cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); } - cbm_log_info("pass.timing", "pass", "incr_pagerank", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); /* Cleanup */ cbm_gbuf_free(gbuf); From 408be5154a0041a1e8b84694225854bb6867d367 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 17:53:03 +0530 Subject: [PATCH 4/6] Fix read-only query opens for snapshot DBs --- src/store/store.c | 51 ++++++++++++++++++++++++++++++++++++++-- tests/test_store_nodes.c | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/src/store/store.c b/src/store/store.c index 90ebc7d0..00b857ac 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -127,6 +127,43 @@ static char *heap_strdup(const char *s) { return d; } +static bool sqlite_uri_path_safe_char(unsigned char c) { + return isalnum(c) || c == '/' || c == '.' || c == '_' || c == '-' || c == '~' || c == ':'; +} + +static char *sqlite_readonly_immutable_uri(const char *db_path) { + if (!db_path) { + return NULL; + } + + static const char suffix[] = "?mode=ro&immutable=1"; + size_t path_len = strlen(db_path); + size_t cap = strlen("file:") + (path_len * 3) + sizeof(suffix); + char *uri = malloc(cap); + if (!uri) { + return NULL; + } + + char *dst = uri; + memcpy(dst, "file:", strlen("file:")); + dst += strlen("file:"); + + static const char hex[] = "0123456789ABCDEF"; + for (size_t i = 0; i < path_len; i++) { + unsigned char c = (unsigned char)db_path[i]; + if (sqlite_uri_path_safe_char(c)) { + *dst++ = (char)c; + } else { + *dst++ = '%'; + *dst++ = hex[(c >> 4) & 0x0F]; + *dst++ = hex[c & 0x0F]; + } + } + + memcpy(dst, suffix, sizeof(suffix)); + return uri; +} + static bool store_has_node_scores_table(cbm_store_t *s) { if (!s || !s->db) { return false; @@ -436,8 +473,18 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) { return NULL; } - /* Open read-only and do NOT create — query tools should never need write access. */ - int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READONLY, NULL); + /* Query tools read atomically-written snapshot DBs. Open them via an + * immutable URI so SQLite does not try to create WAL/SHM sidecars when the + * file was produced by the direct page writer and later reopened in WAL + * mode by the indexing pipeline. */ + char *uri = sqlite_readonly_immutable_uri(db_path); + if (!uri) { + free(s); + return NULL; + } + + int rc = sqlite3_open_v2(uri, &s->db, SQLITE_OPEN_READONLY | SQLITE_OPEN_URI, NULL); + free(uri); if (rc != SQLITE_OK) { /* sqlite3_open_v2 allocates a handle even on failure — must close it. */ sqlite3_close(s->db); diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index 6cfc93f3..cef33bdb 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -5,6 +5,7 @@ * TestNodeDedup, TestProjectCRUD, TestUpsertNodeBatch, etc.) */ #include "test_framework.h" +#include "sqlite_writer.h" #include #include #include @@ -140,6 +141,54 @@ TEST(store_open_path_query_readonly_db) { PASS(); } +TEST(store_open_path_query_direct_writer_db) { + char path[] = "/tmp/cbm_store_query_writer_XXXXXX"; + int fd = mkstemp(path); + ASSERT_TRUE(fd >= 0); + close(fd); + + CBMDumpNode node = { + .id = 1, + .project = "writer-proj", + .label = "Function", + .name = "Hello", + .qualified_name = "writer-proj.main.Hello", + .file_path = "main.go", + .start_line = 1, + .end_line = 3, + .properties = "{}", + }; + ASSERT_EQ(cbm_write_db(path, "writer-proj", "/tmp/writer-proj", "2026-03-25T00:00:00Z", + &node, 1, NULL, 0), + 0); + + /* Reopen in the same way the pipeline does to add post-dump metadata. */ + cbm_store_t *writer = cbm_store_open_path(path); + ASSERT_NOT_NULL(writer); + ASSERT_EQ(cbm_store_upsert_file_hash(writer, "writer-proj", "main.go", "abc123", 1, 64), + CBM_STORE_OK); + cbm_store_close(writer); + + cbm_store_t *reader = cbm_store_open_path_query(path); + ASSERT_NOT_NULL(reader); + ASSERT_TRUE(cbm_store_check_integrity(reader)); + + cbm_project_t proj = {0}; + ASSERT_EQ(cbm_store_get_project(reader, "writer-proj", &proj), CBM_STORE_OK); + ASSERT_STR_EQ(proj.root_path, "/tmp/writer-proj"); + cbm_project_free_fields(&proj); + + cbm_node_t found = {0}; + ASSERT_EQ(cbm_store_find_node_by_qn(reader, "writer-proj", "writer-proj.main.Hello", &found), + CBM_STORE_OK); + ASSERT_STR_EQ(found.name, "Hello"); + cbm_node_free_fields(&found); + + cbm_store_close(reader); + unlink(path); + PASS(); +} + /* ── Node CRUD ──────────────────────────────────────────────────── */ TEST(store_node_crud) { @@ -1541,6 +1590,7 @@ SUITE(store_nodes) { RUN_TEST(store_project_update); RUN_TEST(store_project_delete); RUN_TEST(store_open_path_query_readonly_db); + RUN_TEST(store_open_path_query_direct_writer_db); RUN_TEST(store_node_crud); RUN_TEST(store_node_dedup); RUN_TEST(store_node_find_by_label); From 0559f34ca6460274e1afb82f821982f8005af5c4 Mon Sep 17 00:00:00 2001 From: Naman Khator Date: Wed, 25 Mar 2026 18:42:36 +0530 Subject: [PATCH 5/6] Refine MCP token-budget truncation Account for optional signatures in the search_graph and trace_call_path size estimators, and improve compact trace chains to report omitted-node counts. This also documents the normal-path output enrichment introduced with Task 4: search_graph results now include file_path, start_line, end_line, and signature, and trace_call_path hop items now include file_path, start_line, and signature. --- src/mcp/mcp.c | 491 +++++++++++++++++++++++++++++++++++++++++++---- tests/test_mcp.c | 163 ++++++++++++++++ 2 files changed, 616 insertions(+), 38 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 8b313dbb..05bc6a26 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -46,6 +46,12 @@ /* Default snippet fallback line count */ #define SNIPPET_DEFAULT_LINES 50 +/* Approximate output budget: 1 token ~= 4 chars. */ +#define DEFAULT_MAX_TOKENS 2000 +#define MIN_JSON_CHAR_BUDGET 128 +#define MAX_FULL_BUDGET_ITEMS 5 +#define MAX_COMPACT_QUERY_CELL_CHARS 96 + /* Idle store eviction: close cached project store after this many seconds * of inactivity to free SQLite memory during idle periods. */ #define STORE_IDLE_TIMEOUT_S 60 @@ -80,6 +86,243 @@ static char *yy_doc_to_str(yyjson_mut_doc *doc) { return s; } +static size_t max_tokens_to_char_budget(int max_tokens) { + if (max_tokens <= 0) { + max_tokens = DEFAULT_MAX_TOKENS; + } + size_t budget = (size_t)max_tokens * 4U; + if (budget < MIN_JSON_CHAR_BUDGET) { + budget = MIN_JSON_CHAR_BUDGET; + } + return budget; +} + +static char *json_string_field_dup(const char *json, const char *key) { + if (!json || !key) { + return NULL; + } + + yyjson_doc *doc = yyjson_read(json, strlen(json), 0); + if (!doc) { + return NULL; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *value = root ? yyjson_obj_get(root, key) : NULL; + const char *str = value ? yyjson_get_str(value) : NULL; + char *dup = str ? heap_strdup(str) : NULL; + yyjson_doc_free(doc); + return dup; +} + +static char *node_signature_dup(const cbm_node_t *node) { + if (!node) { + return NULL; + } + return json_string_field_dup(node->properties_json, "signature"); +} + +static size_t estimate_signature_field_chars(const cbm_node_t *node) { + char *signature = node_signature_dup(node); + if (!signature || !signature[0]) { + free(signature); + return 0; + } + + size_t size = strlen(signature) + 24; + free(signature); + return size; +} + +static char *truncate_text_copy(const char *text, size_t max_chars) { + if (!text) { + return heap_strdup(""); + } + + size_t len = strlen(text); + if (len <= max_chars) { + return heap_strdup(text); + } + if (max_chars <= 3) { + char *out = malloc(max_chars + 1); + if (!out) { + return NULL; + } + for (size_t i = 0; i < max_chars; i++) { + out[i] = '.'; + } + out[max_chars] = '\0'; + return out; + } + + char *out = malloc(max_chars + 1); + if (!out) { + return NULL; + } + size_t keep = max_chars - 3; + memcpy(out, text, keep); + memcpy(out + keep, "...", 4); + return out; +} + +static char *build_compact_hop_chain(const cbm_node_hop_t *hops, int count) { + if (!hops || count <= 0) { + return NULL; + } + + const char *first = NULL; + const char *last = NULL; + int named_count = 0; + for (int i = 0; i < count; i++) { + if (hops[i].node.name && hops[i].node.name[0]) { + if (!first) { + first = hops[i].node.name; + } + last = hops[i].node.name; + named_count++; + } + } + if (!first) { + return NULL; + } + if (named_count <= 1 || !last || strcmp(first, last) == 0) { + return heap_strdup(first); + } + if (named_count == 2) { + size_t len = strlen(first) + strlen(last) + strlen(" -> ") + 1; + char *chain = malloc(len); + if (!chain) { + return NULL; + } + snprintf(chain, len, "%s -> %s", first, last); + return chain; + } + + int omitted = named_count - 2; + char omitted_buf[32]; + snprintf(omitted_buf, sizeof(omitted_buf), "%d", omitted); + + size_t len = + strlen(first) + strlen(last) + strlen(omitted_buf) + strlen(" -> ... ( more) -> ") + 1; + char *chain = malloc(len); + if (!chain) { + return NULL; + } + snprintf(chain, len, "%s -> ... (%d more) -> %s", first, omitted, last); + return chain; +} + +static size_t estimate_search_result_chars(const cbm_search_result_t *sr, bool compact) { + size_t size = 96; + size += strlen(sr->node.name ? sr->node.name : ""); + size += strlen(sr->node.file_path ? sr->node.file_path : ""); + size += estimate_signature_field_chars(&sr->node); + if (!compact) { + size += strlen(sr->node.qualified_name ? sr->node.qualified_name : ""); + size += strlen(sr->node.label ? sr->node.label : ""); + size += 48; + } else { + size += 24; + } + return size; +} + +static void add_search_result_item(yyjson_mut_doc *doc, yyjson_mut_val *results, + const cbm_search_result_t *sr, bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", sr->node.file_path ? sr->node.file_path : ""); + yyjson_mut_obj_add_int(doc, item, "start_line", sr->node.start_line); + + char *signature = node_signature_dup(&sr->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, item, "signature", signature); + } + free(signature); + + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sr->node.qualified_name ? sr->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); + yyjson_mut_obj_add_int(doc, item, "end_line", sr->node.end_line); + yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); + yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); + } + + yyjson_mut_arr_add_val(results, item); +} + +static size_t estimate_node_hop_chars(const cbm_node_hop_t *hop, bool compact) { + size_t size = 80; + size += strlen(hop->node.name ? hop->node.name : ""); + size += strlen(hop->node.file_path ? hop->node.file_path : ""); + size += estimate_signature_field_chars(&hop->node); + if (!compact) { + size += strlen(hop->node.qualified_name ? hop->node.qualified_name : ""); + size += 40; + } else { + size += 20; + } + return size; +} + +static void add_node_hop_item(yyjson_mut_doc *doc, yyjson_mut_val *items, const cbm_node_hop_t *hop, + bool compact) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", hop->node.name ? hop->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", hop->node.file_path ? hop->node.file_path : ""); + yyjson_mut_obj_add_int(doc, item, "start_line", hop->node.start_line); + yyjson_mut_obj_add_int(doc, item, "hop", hop->hop); + + char *signature = node_signature_dup(&hop->node); + if (signature && signature[0]) { + yyjson_mut_obj_add_strcpy(doc, item, "signature", signature); + } + free(signature); + + if (compact) { + yyjson_mut_obj_add_bool(doc, item, "compact", true); + } else { + yyjson_mut_obj_add_str(doc, item, "qualified_name", + hop->node.qualified_name ? hop->node.qualified_name : ""); + yyjson_mut_obj_add_real(doc, item, "pagerank", hop->pagerank); + } + + yyjson_mut_arr_add_val(items, item); +} + +static size_t estimate_query_row_chars(const char *const *row, int col_count, bool compact) { + size_t size = 8; + for (int c = 0; c < col_count; c++) { + size += 4; + if (!row[c]) { + continue; + } + size += compact ? strnlen(row[c], MAX_COMPACT_QUERY_CELL_CHARS) : strlen(row[c]); + } + return size; +} + +static void add_query_row(yyjson_mut_doc *doc, yyjson_mut_val *rows, const char *const *row, + int col_count, + bool compact) { + yyjson_mut_val *out_row = yyjson_mut_arr(doc); + for (int c = 0; c < col_count; c++) { + const char *cell = row[c] ? row[c] : ""; + if (compact) { + char *clipped = truncate_text_copy(cell, MAX_COMPACT_QUERY_CELL_CHARS); + yyjson_mut_arr_add_strcpy(doc, out_row, clipped ? clipped : ""); + free(clipped); + } else { + yyjson_mut_arr_add_str(doc, out_row, cell); + } + } + yyjson_mut_arr_add_val(rows, out_row); +} + typedef struct { char *buf; size_t len; @@ -349,7 +592,9 @@ static const tool_def_t TOOLS[] = { "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" "\"integer\",\"description\":\"Max results. Default: " "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0},\"ranked\":{\"type\":\"boolean\"," - "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}}," + "\"default\":true,\"description\":\"Sort results by PageRank importance when available.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-ranked results when needed.\"}}," "\"required\":[\"project\"]}"}, {"query_graph", @@ -359,7 +604,9 @@ static const tool_def_t TOOLS[] = { "query\"},\"project\":{\"type\":\"string\"},\"max_rows\":{\"type\":\"integer\"," "\"description\":" "\"Optional row limit. Default: unlimited (100k " - "ceiling)\"}},\"required\":[\"query\",\"project\"]}"}, + "ceiling)\"},\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":" + "\"Maximum output size. Compacts lower-priority rows when needed.\"}}," + "\"required\":[\"query\",\"project\"]}"}, {"trace_call_path", "Trace function call paths — who calls a function and what it calls. Use INSTEAD OF grep when " @@ -368,7 +615,9 @@ static const tool_def_t TOOLS[] = { "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"ranked\":{\"type\":\"boolean\"," - "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}}," + "\"default\":true,\"description\":\"Sort callers/callees by PageRank importance.\"}," + "\"max_tokens\":{\"type\":\"integer\",\"default\":2000,\"description\":\"Maximum output " + "size. Truncates lower-ranked path results when needed.\"}}," "\"required\":[\"function_name\",\"project\"]}"}, {"get_code_snippet", @@ -1087,7 +1336,9 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { int offset = cbm_mcp_get_int_arg(args, "offset", 0); int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); + size_t char_budget = max_tokens_to_char_budget(max_tokens); cbm_search_params_t params = { .project = project, @@ -1112,24 +1363,55 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *results = yyjson_mut_arr(doc); for (int i = 0; i < out.count; i++) { - cbm_search_result_t *sr = &out.results[i]; - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); - yyjson_mut_obj_add_str(doc, item, "qualified_name", - sr->node.qualified_name ? sr->node.qualified_name : ""); - yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); - yyjson_mut_obj_add_str(doc, item, "file_path", - sr->node.file_path ? sr->node.file_path : ""); - yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); - yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); - yyjson_mut_obj_add_real(doc, item, "pagerank", sr->pagerank); - yyjson_mut_arr_add_val(results, item); + add_search_result_item(doc, results, &out.results[i], false); } yyjson_mut_obj_add_val(doc, root, "results", results); yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + out.count); char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_int(doc, root, "total", out.total); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", out.total); + + results = yyjson_mut_arr(doc); + size_t used = 96; + int shown = 0; + int full_items = 0; + for (int i = 0; i < out.count; i++) { + bool compact = full_items >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_search_result_chars(&out.results[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_search_result_chars(&out.results[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_search_result_item(doc, results, &out.results[i], compact); + used += estimate; + shown++; + if (!compact) { + full_items++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "results", results); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + cbm_store_search_free(&out); free(project); @@ -1147,6 +1429,8 @@ static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { char *project = cbm_mcp_get_string_arg(args, "project"); cbm_store_t *store = resolve_store(srv, project); int max_rows = cbm_mcp_get_int_arg(args, "max_rows", 0); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); + size_t char_budget = max_tokens_to_char_budget(max_tokens); if (!query) { free(project); @@ -1194,17 +1478,63 @@ static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { /* rows */ yyjson_mut_val *rows = yyjson_mut_arr(doc); for (int r = 0; r < result.row_count; r++) { - yyjson_mut_val *row = yyjson_mut_arr(doc); - for (int c = 0; c < result.col_count; c++) { - yyjson_mut_arr_add_str(doc, row, result.rows[r][c]); - } - yyjson_mut_arr_add_val(rows, row); + add_query_row(doc, rows, result.rows[r], result.col_count, false); } yyjson_mut_obj_add_val(doc, root, "rows", rows); yyjson_mut_obj_add_int(doc, root, "total", result.row_count); char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + cols = yyjson_mut_arr(doc); + for (int i = 0; i < result.col_count; i++) { + yyjson_mut_arr_add_str(doc, cols, result.columns[i]); + } + yyjson_mut_obj_add_val(doc, root, "columns", cols); + + rows = yyjson_mut_arr(doc); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + yyjson_mut_obj_add_int(doc, root, "total_results", result.row_count); + + size_t used = 96; + for (int i = 0; i < result.col_count; i++) { + used += strlen(result.columns[i] ? result.columns[i] : "") + 4; + } + int shown = 0; + int full_rows = 0; + for (int r = 0; r < result.row_count; r++) { + bool compact = full_rows >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_query_row_chars(result.rows[r], result.col_count, compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_query_row_chars(result.rows[r], result.col_count, true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_query_row(doc, rows, result.rows[r], result.col_count, compact); + used += estimate; + shown++; + if (!compact) { + full_rows++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "rows", rows); + yyjson_mut_obj_add_int(doc, root, "total", result.row_count); + yyjson_mut_obj_add_int(doc, root, "shown", shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + cbm_cypher_result_free(&result); free(query); free(project); @@ -1685,7 +2015,9 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_store_t *store = resolve_store(srv, project); char *direction = cbm_mcp_get_string_arg(args, "direction"); int depth = cbm_mcp_get_int_arg(args, "depth", 3); + int max_tokens = cbm_mcp_get_int_arg(args, "max_tokens", DEFAULT_MAX_TOKENS); bool ranked = cbm_mcp_get_bool_arg_default(args, "ranked", true); + size_t char_budget = max_tokens_to_char_budget(max_tokens); if (!func_name) { free(project); @@ -1758,15 +2090,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *callees = yyjson_mut_arr(doc); for (int i = 0; i < tr_out.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_out.visited[i].node.name ? tr_out.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); - yyjson_mut_obj_add_real(doc, item, "pagerank", tr_out.visited[i].pagerank); - yyjson_mut_arr_add_val(callees, item); + add_node_hop_item(doc, callees, &tr_out.visited[i], false); } yyjson_mut_obj_add_val(doc, root, "callees", callees); } @@ -1781,15 +2105,7 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_val *callers = yyjson_mut_arr(doc); for (int i = 0; i < tr_in.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_in.visited[i].node.name ? tr_in.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); - yyjson_mut_obj_add_real(doc, item, "pagerank", tr_in.visited[i].pagerank); - yyjson_mut_arr_add_val(callers, item); + add_node_hop_item(doc, callers, &tr_in.visited[i], false); } yyjson_mut_obj_add_val(doc, root, "callers", callers); } @@ -1798,6 +2114,105 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *json = yy_doc_to_str(doc); yyjson_mut_doc_free(doc); + if (json && strlen(json) > char_budget) { + free(json); + + doc = yyjson_mut_doc_new(NULL); + root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "function", func_name); + yyjson_mut_obj_add_str(doc, root, "direction", direction); + yyjson_mut_obj_add_bool(doc, root, "truncated", true); + + int total_results = 0; + if (do_outbound) { + total_results += tr_out.visited_count; + } + if (do_inbound) { + total_results += tr_in.visited_count; + } + yyjson_mut_obj_add_int(doc, root, "total_results", total_results); + + size_t used = 96 + strlen(func_name) + strlen(direction); + int shown = 0; + + if (do_outbound) { + yyjson_mut_val *callees = yyjson_mut_arr(doc); + int shown_callees = 0; + int full_callees = 0; + for (int i = 0; i < tr_out.visited_count; i++) { + bool compact = full_callees >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&tr_out.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&tr_out.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_node_hop_item(doc, callees, &tr_out.visited[i], compact); + used += estimate; + shown++; + shown_callees++; + if (!compact) { + full_callees++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callees", callees); + if (shown_callees < tr_out.visited_count) { + char *chain = build_compact_hop_chain(tr_out.visited + shown_callees, + tr_out.visited_count - shown_callees); + if (chain && chain[0]) { + yyjson_mut_obj_add_strcpy(doc, root, "callees_chain", chain); + } + free(chain); + } + } + + if (do_inbound) { + yyjson_mut_val *callers = yyjson_mut_arr(doc); + int shown_callers = 0; + int full_callers = 0; + for (int i = 0; i < tr_in.visited_count; i++) { + bool compact = full_callers >= MAX_FULL_BUDGET_ITEMS; + size_t estimate = estimate_node_hop_chars(&tr_in.visited[i], compact); + if (used + estimate > char_budget && !compact) { + compact = true; + estimate = estimate_node_hop_chars(&tr_in.visited[i], true); + } + if (used + estimate > char_budget && shown > 0) { + break; + } + if (used + estimate <= char_budget || shown == 0) { + add_node_hop_item(doc, callers, &tr_in.visited[i], compact); + used += estimate; + shown++; + shown_callers++; + if (!compact) { + full_callers++; + } + } + } + yyjson_mut_obj_add_val(doc, root, "callers", callers); + if (shown_callers < tr_in.visited_count) { + char *chain = + build_compact_hop_chain(tr_in.visited + shown_callers, + tr_in.visited_count - shown_callers); + if (chain && chain[0]) { + yyjson_mut_obj_add_strcpy(doc, root, "callers_chain", chain); + } + free(chain); + } + } + + yyjson_mut_obj_add_int(doc, root, "shown", shown); + json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + } + /* Now safe to free traversal data */ if (do_outbound) { cbm_store_traverse_free(&tr_out); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 8aeeb096..d140e84d 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -629,6 +629,52 @@ static cbm_mcp_server_t *setup_pagerank_server(void) { return srv; } +static cbm_mcp_server_t *setup_truncation_server(void) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_store_upsert_project(st, "test-budget", "/tmp/test-budget"); + cbm_mcp_server_set_project(srv, "test-budget"); + + const char *sig = + "{\"signature\":\"func BudgetedOperation(alpha int, beta int, gamma int, delta int, " + "epsilon int, zeta int, eta int, theta int, iota int) string\"}"; + const char *names[] = {"Root", "A", "B", "C", "D", "E"}; + int64_t ids[6] = {0}; + + for (int i = 0; i < 6; i++) { + char qn[128]; + snprintf(qn, sizeof(qn), "test-budget.%s", names[i]); + cbm_node_t node = { + .project = "test-budget", + .label = "Function", + .name = names[i], + .qualified_name = qn, + .file_path = "pkg/budget.go", + .start_line = 10 + (i * 5), + .end_line = 13 + (i * 5), + .properties_json = sig, + }; + ids[i] = cbm_store_upsert_node(st, &node); + } + + for (int i = 0; i < 5; i++) { + cbm_edge_t edge = { + .project = "test-budget", .source_id = ids[i], .target_id = ids[i + 1], .type = "CALLS"}; + cbm_store_insert_edge(st, &edge); + } + + return srv; +} + TEST(tool_get_architecture_summary_truncated) { char tmp_dir[256]; cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); @@ -1228,6 +1274,118 @@ TEST(tool_trace_call_path_ranked_pagerank) { PASS(); } +TEST(tool_search_graph_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "search_graph", + "{\"project\":\"test-rank\",\"label\":\"Function\",\"limit\":10,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"name\":\"Hub\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_max_tokens_truncates) { + cbm_mcp_server_t *srv = setup_pagerank_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-rank\",\"function_name\":\"Root\",\"direction\":\"outbound\"," + "\"depth\":3,\"max_tokens\":1}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"callees\"")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_search_graph_long_signature_budget_respected) { + cbm_mcp_server_t *srv = setup_truncation_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "search_graph", + "{\"project\":\"test-budget\",\"label\":\"Function\",\"limit\":10,\"max_tokens\":100}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\":1")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_call_path_chain_shows_omitted_count) { + cbm_mcp_server_t *srv = setup_truncation_server(); + ASSERT_NOT_NULL(srv); + + char *raw = cbm_mcp_handle_tool( + srv, "trace_call_path", + "{\"project\":\"test-budget\",\"function_name\":\"Root\",\"direction\":\"outbound\"," + "\"depth\":5,\"max_tokens\":100}"); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"callees_chain\":\"")); + ASSERT_NOT_NULL(strstr(text, "more) ->")); + free(text); + free(raw); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_query_graph_max_tokens_truncates) { + char tmp_dir[256]; + cbm_mcp_server_t *srv = setup_arch_summary_server(tmp_dir, sizeof(tmp_dir)); + ASSERT_NOT_NULL(srv); + char *proj_name = cbm_project_name_from_path(tmp_dir); + ASSERT_NOT_NULL(proj_name); + + char args[1024]; + snprintf(args, sizeof(args), + "{\"project\":\"%s\",\"query\":\"MATCH (f:Function) RETURN f.name, f.qualified_name, " + "f.file_path\",\"max_tokens\":1}", + proj_name); + + char *raw = cbm_mcp_handle_tool(srv, "query_graph", args); + ASSERT_NOT_NULL(raw); + char *text = extract_text_content(raw); + ASSERT_NOT_NULL(text); + ASSERT_NOT_NULL(strstr(text, "\"truncated\":true")); + ASSERT_NOT_NULL(strstr(text, "\"shown\"")); + ASSERT_NOT_NULL(strstr(text, "\"total_results\"")); + ASSERT_NOT_NULL(strstr(text, "\"columns\"")); + free(text); + free(raw); + free(proj_name); + + cleanup_arch_summary_server(tmp_dir, srv); + PASS(); +} + /* Call get_code_snippet and extract inner text content. * Caller must free returned string. */ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) { @@ -1943,6 +2101,8 @@ SUITE(mcp) { RUN_TEST(tool_unknown_tool); RUN_TEST(tool_search_graph_basic); RUN_TEST(tool_search_graph_ranked_pagerank); + RUN_TEST(tool_search_graph_max_tokens_truncates); + RUN_TEST(tool_search_graph_long_signature_budget_respected); RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); @@ -1956,7 +2116,10 @@ SUITE(mcp) { RUN_TEST(tool_get_architecture_summary_project_path_alias); RUN_TEST(tool_get_key_symbols_ranked); RUN_TEST(tool_trace_call_path_ranked_pagerank); + RUN_TEST(tool_trace_call_path_max_tokens_truncates); + RUN_TEST(tool_trace_call_path_chain_shows_omitted_count); RUN_TEST(tool_query_graph_missing_query); + RUN_TEST(tool_query_graph_max_tokens_truncates); /* Pipeline-dependent tool handlers */ RUN_TEST(tool_index_repository_missing_path); From f3e93e74d415d0d824255bd6d041a70b73e9a3de Mon Sep 17 00:00:00 2001 From: maplenk Date: Fri, 27 Mar 2026 18:16:35 +0530 Subject: [PATCH 6/6] fix: harden token-budget helpers and search_graph wiring - Guard cbm_mcp_text_result() against NULL text - Fix memory leak in handle_get_key_symbols() REQUIRE_STORE path (focus not freed) - Wire qn_pattern through handle_search_graph() - Fix OOM infinite loop in markdown_builder_reserve() - Return 0 instead of CBM_STORE_ERR from summary_count_nodes() on prepare fail Co-Authored-By: Claude Opus 4.6 --- src/mcp/mcp.c | 16 +++++++++++++++- src/store/store.c | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 05bc6a26..36f8fecf 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -349,6 +349,9 @@ static bool markdown_builder_reserve(markdown_builder_t *b, size_t need) { while (b->len + need + 1 > b->cap) { b->cap *= 2; b->buf = safe_realloc(b->buf, b->cap); + if (!b->buf) { + return false; + } } return true; } @@ -546,6 +549,7 @@ char *cbm_jsonrpc_format_error(int64_t id, int code, const char *message) { * ══════════════════════════════════════════════════════════════════ */ char *cbm_mcp_text_result(const char *text, bool is_error) { + if (!text) text = ""; yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); yyjson_mut_doc_set_root(doc, root); @@ -1331,6 +1335,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { char *label = cbm_mcp_get_string_arg(args, "label"); char *name_pattern = cbm_mcp_get_string_arg(args, "name_pattern"); + char *qn_pattern = cbm_mcp_get_string_arg(args, "qn_pattern"); char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); int limit = cbm_mcp_get_int_arg(args, "limit", 500000); int offset = cbm_mcp_get_int_arg(args, "offset", 0); @@ -1344,6 +1349,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { .project = project, .label = label, .name_pattern = name_pattern, + .qn_pattern = qn_pattern, .file_pattern = file_pattern, .limit = limit, .offset = offset, @@ -1417,6 +1423,7 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { free(project); free(label); free(name_pattern); + free(qn_pattern); free(file_pattern); char *result = cbm_mcp_text_result(json, false); @@ -1699,7 +1706,14 @@ static char *handle_get_key_symbols(cbm_mcp_server_t *srv, const char *args) { char *focus = cbm_mcp_get_string_arg(args, "focus"); int limit = cbm_mcp_get_int_arg(args, "limit", 20); cbm_store_t *store = resolve_store(srv, project); - REQUIRE_STORE(store, project); + if (!store) { + char *_err = build_project_list_error("project not found or not indexed"); + char *_res = cbm_mcp_text_result(_err, true); + free(_err); + free(project); + free(focus); + return _res; + } char *not_indexed = verify_project_indexed(store, project); if (not_indexed) { diff --git a/src/store/store.c b/src/store/store.c index 00b857ac..f3186cec 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -4638,7 +4638,7 @@ static int summary_count_nodes(cbm_store_t *s, const char *project, const char * sqlite3_stmt *stmt = NULL; if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "summary_count_nodes"); - return CBM_STORE_ERR; + return 0; } bind_text(stmt, 1, project); if (focus_like && focus_like[0]) {