mxthematic · markm39 · Mar 30, 2026 · Mar 30, 2026
diff --git a/crates/openproof-cli/src/autonomous.rs b/crates/openproof-cli/src/autonomous.rs
@@ -1315,11 +1315,13 @@ pub async fn run_tactic_search_file(file_path: String) -> Result<()> {
     // Extract the type expression for the fallback goal
     let type_expr = extract_type_expr_from_content(&content).unwrap_or_default();
 
-    let mut node = openproof_protocol::ProofNode::default();
-    node.id = "root".to_string();
-    node.content = content.clone();
-    node.statement = type_expr;
-    node.status = openproof_protocol::ProofNodeStatus::Pending;
+    let node = openproof_protocol::ProofNode {
+        id: "root".to_string(),
+        content: content.clone(),
+        statement: type_expr,
+        status: openproof_protocol::ProofNodeStatus::Pending,
+        ..Default::default()
+    };
     let mut proof = openproof_protocol::ProofSessionState::default();
     proof.nodes.push(node);
     proof.active_node_id = Some("root".to_string());
@@ -2123,7 +2125,7 @@ fn extract_type_expr_from_content(content: &str) -> Option<String> {
     let mut kw_idx = None;
     for kw in ["theorem ", "lemma ", "def "] {
         if let Some(idx) = before.rfind(kw) {
-            if kw_idx.map_or(true, |prev| idx > prev) {
+            if kw_idx.is_none_or(|prev| idx > prev) {
                 kw_idx = Some(idx);
             }
         }

diff --git a/crates/openproof-cli/src/decomposition.rs b/crates/openproof-cli/src/decomposition.rs
@@ -368,7 +368,7 @@ pub fn check_decomposition_consistency(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use openproof_protocol::{ProofNodeKind, ProofNodeStatus};
+    use openproof_protocol::{ProofNodeKind, ProofNodeStatus, SearchAttemptMetrics};
 
     fn make_branch(
         id: &str,

diff --git a/crates/openproof-cli/src/main.rs b/crates/openproof-cli/src/main.rs
@@ -6,6 +6,7 @@ mod event_loop;
 mod export;
 mod helpers;
 mod key_handling;
+mod refusal;
 mod setup;
 mod shell;
 mod slash_autonomous;

diff --git a/crates/openproof-cli/src/refusal.rs b/crates/openproof-cli/src/refusal.rs
@@ -0,0 +1,165 @@
+//! Refusal detection and forced retry for LLM proof attempts.
+//!
+//! When the model recognizes a hard problem from training data (FrontierMath,
+//! IMO, competitions), it may refuse to attempt it. This module detects refusal
+//! signals in the model's text and provides retry messages to force continuation.
+
+pub const MAX_REFUSAL_RETRIES: u8 = 2;
+
+/// Strong refusal signals -- any single match triggers refusal detection.
+const STRONG_SIGNALS: &[&str] = &[
+    "open problem",
+    "unsolved problem",
+    "open question",
+    "no known proof",
+    "no known solution",
+    "frontiermath",
+    "frontier math",
+    "competition problem",
+    "benchmark problem",
+    "beyond current capabilities",
+    "beyond my capabilities",
+    "beyond the capabilities",
+    "i cannot solve this",
+    "i'm unable to prove",
+    "i am unable to prove",
+    "cannot be proven",
+    "is considered unsolved",
+    "this is an open",
+    "this problem is from",
+    "known to be undecidable",
+    "no elementary proof",
+    "this is a well-known",
+    "remains an open",
+];
+
+/// Weak refusal signals -- only trigger when combined with no tool usage
+/// and no Lean code in the response.
+const WEAK_SIGNALS: &[&str] = &[
+    "i apologize",
+    "i'm sorry",
+    "i am sorry",
+    "too difficult",
+    "unable to",
+    "cannot prove",
+    "cannot solve",
+    "i don't think",
+    "i do not think",
+    "not feasible",
+    "not possible to prove",
+];
+
+/// Detect whether the model's response is a refusal to attempt the proof.
+///
+/// Returns `true` if the text contains refusal signals. If `used_tools` is true,
+/// only strong signals trigger (the model at least tried before complaining).
+pub fn detect_refusal(text: &str, used_tools: bool) -> bool {
+    if text.trim().is_empty() {
+        return false;
+    }
+    let lower = text.to_ascii_lowercase();
+
+    // Strong signals always trigger.
+    if STRONG_SIGNALS.iter().any(|s| lower.contains(s)) {
+        return true;
+    }
+
+    // Weak signals only trigger if the model didn't use tools and wrote no Lean code.
+    if !used_tools {
+        let has_lean_code = text.contains("```lean");
+        if !has_lean_code && WEAK_SIGNALS.iter().any(|s| lower.contains(s)) {
+            return true;
+        }
+    }
+
+    false
+}
+
+/// Build an escalating retry message to force the model to attempt the proof.
+pub fn refusal_retry_message(retry_count: u8) -> String {
+    match retry_count {
+        1 => concat!(
+            "You MUST attempt this proof. Do not assess difficulty or discuss problem sources. ",
+            "Start by writing a sorry-skeleton: create a .lean file with the theorem statement ",
+            "and `sorry` placeholders, then use lean_verify to check it compiles. ",
+            "Decompose the problem into sub-have steps and fill them one at a time.",
+        )
+        .to_string(),
+        _ => concat!(
+            "FINAL ATTEMPT: Write Lean code NOW using file_write. Do not produce text without ",
+            "tool calls. Create a sorry-skeleton immediately with the theorem statement and sorry. ",
+            "Any response without a tool call wastes this proof attempt entirely.",
+        )
+        .to_string(),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn detects_strong_refusal() {
+        assert!(detect_refusal(
+            "This is an open problem in number theory.",
+            false
+        ));
+        assert!(detect_refusal(
+            "This problem is from FrontierMath and is unsolved.",
+            false
+        ));
+        assert!(detect_refusal("I cannot solve this theorem.", false));
+        assert!(detect_refusal(
+            "The conjecture remains an open question.",
+            false
+        ));
+    }
+
+    #[test]
+    fn detects_strong_even_with_tools() {
+        assert!(detect_refusal("This is an open problem.", true));
+    }
+
+    #[test]
+    fn detects_weak_refusal_without_tools() {
+        assert!(detect_refusal(
+            "I apologize, but this is too difficult for me.",
+            false
+        ));
+        assert!(detect_refusal("I'm sorry, I cannot prove this.", false));
+    }
+
+    #[test]
+    fn weak_signals_ignored_with_tools() {
+        assert!(!detect_refusal(
+            "I'm sorry, let me try a different approach.",
+            true
+        ));
+    }
+
+    #[test]
+    fn weak_signals_ignored_with_lean_code() {
+        let text =
+            "I'm sorry, this is hard. Here's my attempt:\n```lean\ntheorem foo := sorry\n```";
+        assert!(!detect_refusal(text, false));
+    }
+
+    #[test]
+    fn no_refusal_on_normal_text() {
+        assert!(!detect_refusal(
+            "Let me try using norm_num for this.",
+            false
+        ));
+        assert!(!detect_refusal("The proof follows from induction.", false));
+        assert!(!detect_refusal("", false));
+    }
+
+    #[test]
+    fn retry_messages_are_different() {
+        let m1 = refusal_retry_message(1);
+        let m2 = refusal_retry_message(2);
+        assert_ne!(m1, m2);
+        assert!(m1.contains("sorry-skeleton"));
+        assert!(m2.contains("FINAL ATTEMPT"));
+    }
+}
diff --git a/crates/openproof-cli/src/system_prompt.rs b/crates/openproof-cli/src/system_prompt.rs
@@ -148,6 +148,14 @@ pub fn build_system_prompt(session: Option<&SessionSnapshot>) -> String {
             "The theorem statement is FIXED.",
         ).to_string(),
         "When stuck, try a different approach rather than explaining why you are stuck. Decompose hard sorrys into sub-have steps.".to_string(),
+        concat!(
+            "MANDATORY: You MUST attempt every proof. NEVER refuse.\n",
+            "- Do NOT discuss whether a problem is open, unsolved, from a benchmark, or too hard.\n",
+            "- Do NOT reference problem sources (FrontierMath, IMO, competitions, etc.).\n",
+            "- Do NOT assess problem difficulty. Write Lean code, not commentary.\n",
+            "- NEVER produce a response without tool calls. Always write code.\n",
+            "- Even if you believe a proof is impossible, write the best sorry-skeleton you can.",
+        ).to_string(),
         "Use structured markers: TITLE, PROBLEM, FORMAL_TARGET, ACCEPTED_TARGET, PHASE, STATUS, THEOREM, LEMMA, and fenced ```lean``` blocks.".to_string(),
         "Maintain Paper.tex alongside Lean code using file_write/file_patch. Do not include LaTeX in response text.".to_string(),
     ];
@@ -478,7 +486,8 @@ pub async fn build_branch_turn_messages(
                         "then IMMEDIATELY create the sorry-skeleton in Lean and lean_verify it. ",
                         "Do not spend more than 5 tool calls on research. The compilable skeleton matters ",
                         "more than the research. Focus on decomposing the theorem into a chain of `have` ",
-                        "steps that captures the KEY MATHEMATICAL INSIGHT. The Prover branch will fill the sorrys.",
+                        "steps that captures the KEY MATHEMATICAL INSIGHT. The Prover branch will fill the sorrys. ",
+                        "Never refuse to plan a proof. Even for hard problems, write a decomposition skeleton.",
                     ).to_string()
                 }
                 AgentRole::Retriever => {
@@ -493,7 +502,8 @@ pub async fn build_branch_turn_messages(
                         "lean_screen_tactics (test simp, omega, ring, exact?, apply?, linarith, norm_num, aesop, decide, tauto) ",
                         "-> file_patch to apply the working tactic -> lean_verify to confirm. ",
                         "If stuck on a goal for more than 3 attempts, decompose it with sub-have steps. ",
-                        "Do NOT change theorem statements. Spend ALL your tool calls on the lean_goals/screen_tactics/patch/verify loop.",
+                        "Do NOT change theorem statements. Spend ALL your tool calls on the lean_goals/screen_tactics/patch/verify loop. ",
+                        "Never refuse to attempt. Even partial progress (filling some sorrys) is valuable.",
                     ).to_string()
                 }
                 AgentRole::Repairer => {

diff --git a/crates/openproof-cli/src/turn_handling.rs b/crates/openproof-cli/src/turn_handling.rs
@@ -98,6 +98,7 @@ pub async fn run_agentic_loop(
     }
     let mut turn_used_tools = false;
     let mut last_verify_ok = false;
+    let mut refusal_retries: u8 = 0;
 
     // Spawn lean-lsp-mcp for structured goal access (fallback when Pantograph unavailable).
     let lsp_mcp: Option<Arc<Mutex<LeanLspMcp>>> = LeanLspMcp::spawn(&project_dir)
@@ -138,7 +139,16 @@ pub async fn run_agentic_loop(
             Ok(result) => {
                 // If there are no tool calls, we are done.
                 if result.tool_calls.is_empty() {
-                    // Text was streamed via StreamDelta events.
+                    // Detect refusal and force retry before giving up.
+                    if refusal_retries < crate::refusal::MAX_REFUSAL_RETRIES
+                        && crate::refusal::detect_refusal(&result.text, turn_used_tools)
+                    {
+                        refusal_retries += 1;
+                        let retry = crate::refusal::refusal_retry_message(refusal_retries);
+                        messages.push(TurnMessage::chat("assistant", &result.text));
+                        messages.push(TurnMessage::chat("system", &retry));
+                        continue;
+                    }
                     break;
                 }
 
@@ -427,6 +437,7 @@ pub fn start_agent_branch_turn(
         let mut accumulated_text = String::new();
         let mut turn_used_tools = false;
         let mut last_verify_ok = false;
+        let mut refusal_retries: u8 = 0;
 
         let branch_lsp_mcp: Option<Arc<Mutex<LeanLspMcp>>> = LeanLspMcp::spawn(&project_dir)
             .map(|c| Arc::new(Mutex::new(c)))
@@ -452,6 +463,15 @@ pub fn start_agent_branch_turn(
                 Ok(turn) => {
                     accumulated_text.push_str(&turn.text);
                     if turn.tool_calls.is_empty() {
+                        if refusal_retries < crate::refusal::MAX_REFUSAL_RETRIES
+                            && crate::refusal::detect_refusal(&accumulated_text, turn_used_tools)
+                        {
+                            refusal_retries += 1;
+                            let retry = crate::refusal::refusal_retry_message(refusal_retries);
+                            all_messages.push(TurnMessage::chat("assistant", &turn.text));
+                            all_messages.push(TurnMessage::chat("system", &retry));
+                            continue;
+                        }
                         break;
                     }
                     turn_used_tools = true;

diff --git a/crates/openproof-model/src/lib.rs b/crates/openproof-model/src/lib.rs
@@ -304,7 +304,7 @@ fn build_turn_payload(request: &CodexTurnRequest<'_>) -> Value {
         "model": request.model,
         "store": false,
         "stream": true,
-        "instructions": "You are openproof, a local-first formal math assistant. Be concise, direct, and helpful. If the user asks a general question, answer directly. If the user gives a theorem-like statement, help formalize or prove it in Lean.",
+        "instructions": "You are openproof, a local-first formal math assistant. Be concise, direct, and helpful. If the user asks a general question, answer directly. If the user gives a theorem-like statement, help formalize or prove it in Lean. NEVER refuse to attempt a proof. Do not discuss problem difficulty, sources, or feasibility. Always use tools to write and verify Lean code.",
         "input": request.messages.iter().map(serialize_turn_message).collect::<Vec<_>>(),
         "include": ["reasoning.encrypted_content"],
         "tool_choice": if request.include_tools { "auto" } else { "none" },