From 7c4cbe12ea22abe7977858aad35a42c2cf7723a8 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Sat, 16 May 2026 09:16:49 -0700 Subject: [PATCH] fix(c-generative-ui): tighten plan_tools to call ONE tool for filter/scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Filter to cancelled flights only" was calling all 4 tools (kpis + trend + airlines + disruptions) and dumping the filtered result as plaintext instead of letting the data_grid component re-render. The per-turn plan_tools context just said "decide which tools to call" — too permissive; gpt-5-mini defaults to refreshing everything. Tighter rules now in the per-turn system context (not just the static prompt file, which the model demonstrably ignores for tool-selection): 1) FILTER / SCOPE → exactly ONE tool, the one backing the affected component, with new parameters. No spec regen. 2) STRUCTURAL → regen spec, then call only tools for NEW components. 3) QUESTION → no tools, no JSON, just prose. Calling all four is now explicitly reserved for "refresh" / "reload" / "update everything". Applied to both umbrella backend and standalone. Co-Authored-By: Claude Opus 4.7 --- .../chat/generative-ui/python/src/graph.py | 23 ++++++++++++++++--- .../streaming/python/src/dashboard_graph.py | 23 ++++++++++++++++--- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/cockpit/chat/generative-ui/python/src/graph.py b/cockpit/chat/generative-ui/python/src/graph.py index af02264d6..850bf8b35 100644 --- a/cockpit/chat/generative-ui/python/src/graph.py +++ b/cockpit/chat/generative-ui/python/src/graph.py @@ -47,11 +47,28 @@ async def generate_shell(state: DashboardState) -> DashboardState: async def plan_tools(state: DashboardState) -> DashboardState: - """On follow-up turns, let the LLM decide which tools to call.""" + """On follow-up turns, pick the MINIMAL set of tools — usually one.""" context = ( f"The current dashboard spec is:\n{state['dashboard_spec']}\n\n" - "Based on the user's message, decide which tools to call to update the dashboard data. " - "If the user asks a question about the data that doesn't need fresh data, just respond conversationally." + "Classify the user's message and act ONCE — do not refetch everything.\n" + "\n" + "1) FILTER / SCOPE existing data (e.g. 'filter to cancelled flights only',\n" + " 'show last 6 months', 'limit to enterprise', 'sort by date',\n" + " 'only show delayed', 'top 3'): call EXACTLY ONE tool — the one that\n" + " backs the affected component — with the new parameters. Do NOT call\n" + " the other tools. Do NOT regenerate the spec.\n" + "\n" + "2) STRUCTURAL change (e.g. 'add a card for X', 'remove the table',\n" + " 'split this into two columns'): regenerate the spec, then call only\n" + " the tools needed to populate NEW components.\n" + "\n" + "3) QUESTION about existing data (e.g. 'why', 'how', 'explain',\n" + " 'what does this mean'): respond conversationally in plain prose.\n" + " Call NO tools. Output NO JSON.\n" + "\n" + "If none of these fit, call only the smallest set of tools you need.\n" + "Calling all four tools is reserved for an explicit 'refresh' /\n" + "'reload' / 'update everything' request." ) messages = [SystemMessage(content=_PROMPT + "\n\n" + context)] + state["messages"] response = await _llm_with_tools.ainvoke(messages) diff --git a/cockpit/langgraph/streaming/python/src/dashboard_graph.py b/cockpit/langgraph/streaming/python/src/dashboard_graph.py index d9e849f54..56e18e715 100644 --- a/cockpit/langgraph/streaming/python/src/dashboard_graph.py +++ b/cockpit/langgraph/streaming/python/src/dashboard_graph.py @@ -47,11 +47,28 @@ async def generate_shell(state: DashboardState) -> DashboardState: async def plan_tools(state: DashboardState) -> DashboardState: - """On follow-up turns, let the LLM decide which tools to call.""" + """On follow-up turns, pick the MINIMAL set of tools — usually one.""" context = ( f"The current dashboard spec is:\n{state['dashboard_spec']}\n\n" - "Based on the user's message, decide which tools to call to update the dashboard data. " - "If the user asks a question about the data that doesn't need fresh data, just respond conversationally." + "Classify the user's message and act ONCE — do not refetch everything.\n" + "\n" + "1) FILTER / SCOPE existing data (e.g. 'filter to cancelled flights only',\n" + " 'show last 6 months', 'limit to enterprise', 'sort by date',\n" + " 'only show delayed', 'top 3'): call EXACTLY ONE tool — the one that\n" + " backs the affected component — with the new parameters. Do NOT call\n" + " the other tools. Do NOT regenerate the spec.\n" + "\n" + "2) STRUCTURAL change (e.g. 'add a card for X', 'remove the table',\n" + " 'split this into two columns'): regenerate the spec, then call only\n" + " the tools needed to populate NEW components.\n" + "\n" + "3) QUESTION about existing data (e.g. 'why', 'how', 'explain',\n" + " 'what does this mean'): respond conversationally in plain prose.\n" + " Call NO tools. Output NO JSON.\n" + "\n" + "If none of these fit, call only the smallest set of tools you need.\n" + "Calling all four tools is reserved for an explicit 'refresh' /\n" + "'reload' / 'update everything' request." ) messages = [SystemMessage(content=_PROMPT + "\n\n" + context)] + state["messages"] response = await _llm_with_tools.ainvoke(messages)