From c7cca5c2efd9a9622b3ebaeb2967d02cd43fcf7a Mon Sep 17 00:00:00 2001 From: Brian Love Date: Sat, 16 May 2026 10:50:23 -0700 Subject: [PATCH] fix(c-generative-ui): use gpt-5 + minimal reasoning for planner LLM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gpt-5-mini ignored the "EXACTLY ONE tool" directive added in PR #363 and kept calling all four data tools on every filter follow-up. Verified live: my prompt rewrite was strict and explicit ("call EXACTLY ONE tool", "Do NOT call the other tools") but gpt-5-mini still fanned out — the model's default reasoning prefers thoroughness over literal directive-following. Split the LLMs: - `_llm` (gpt-5-mini) — unchanged for shell-gen + respond. Cheap and good enough for prose + JSON-spec emission. - `_planner_llm` (gpt-5, reasoning_effort='minimal') — bound to tools, used in plan_tools. gpt-5 follows directives more precisely; reasoning_effort='minimal' suppresses the "let me be thorough" deliberation that drives the fan-out. Standalone smoke (separate from chrome): prompt: "Filter to cancelled flights only" result: ['query_recent_disruptions'] ← exactly one Chrome MCP end-to-end: backend log confirms AI tool_calls: ['query_recent_disruptions'], data grid updates to 3 cancelled rows. Co-Authored-By: Claude Opus 4.7 --- cockpit/chat/generative-ui/python/src/graph.py | 16 +++++++++++++++- .../streaming/python/src/dashboard_graph.py | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/cockpit/chat/generative-ui/python/src/graph.py b/cockpit/chat/generative-ui/python/src/graph.py index 850bf8b35..b64d8b5cf 100644 --- a/cockpit/chat/generative-ui/python/src/graph.py +++ b/cockpit/chat/generative-ui/python/src/graph.py @@ -20,7 +20,21 @@ _PROMPT = (Path(__file__).parent.parent / "prompts" / "dashboard.md").read_text() _llm = ChatOpenAI(model="gpt-5-mini", temperature=0, streaming=True) -_llm_with_tools = _llm.bind_tools(ALL_TOOLS) + +# Dedicated planner: full gpt-5 with minimal reasoning effort. +# gpt-5-mini at default reasoning ignores the "EXACTLY ONE tool" directive +# in plan_tools and reflexively calls all four data tools on every +# follow-up — verified in chrome MCP after PR #363 tightened the prompt +# but the model still over-called. Bumping the planner to gpt-5 sharpens +# instruction-following, and reasoning_effort='minimal' suppresses the +# "let me be thorough" deliberation that drives the fan-out. +_planner_llm = ChatOpenAI( + model="gpt-5", + temperature=0, + streaming=True, + reasoning_effort="minimal", +) +_llm_with_tools = _planner_llm.bind_tools(ALL_TOOLS) class DashboardState(MessagesState): diff --git a/cockpit/langgraph/streaming/python/src/dashboard_graph.py b/cockpit/langgraph/streaming/python/src/dashboard_graph.py index 56e18e715..652690b2d 100644 --- a/cockpit/langgraph/streaming/python/src/dashboard_graph.py +++ b/cockpit/langgraph/streaming/python/src/dashboard_graph.py @@ -20,7 +20,21 @@ _PROMPT = (Path(__file__).parent.parent / "prompts" / "dashboard.md").read_text() _llm = ChatOpenAI(model="gpt-5-mini", temperature=0, streaming=True) -_llm_with_tools = _llm.bind_tools(ALL_TOOLS) + +# Dedicated planner: full gpt-5 with minimal reasoning effort. +# gpt-5-mini at default reasoning ignores the "EXACTLY ONE tool" directive +# in plan_tools and reflexively calls all four data tools on every +# follow-up — verified in chrome MCP after PR #363 tightened the prompt +# but the model still over-called. Bumping the planner to gpt-5 sharpens +# instruction-following, and reasoning_effort='minimal' suppresses the +# "let me be thorough" deliberation that drives the fan-out. +_planner_llm = ChatOpenAI( + model="gpt-5", + temperature=0, + streaming=True, + reasoning_effort="minimal", +) +_llm_with_tools = _planner_llm.bind_tools(ALL_TOOLS) class DashboardState(MessagesState):