Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions crates/openproof-cli/src/autonomous.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ enum TacticProposerBackend {
Standard,
Ollama,
Codex,
Mlx,
}

impl TacticProposerBackend {
Expand All @@ -50,6 +51,7 @@ impl TacticProposerBackend {
Self::Standard => "standard",
Self::Ollama => "ollama",
Self::Codex => "codex",
Self::Mlx => "mlx",
}
}
}
Expand Down Expand Up @@ -1345,6 +1347,7 @@ fn tactic_proposer_backend(config: Option<&crate::setup::SetupResult>) -> Tactic
return match normalized.as_str() {
"codex" => TacticProposerBackend::Codex,
"ollama" => TacticProposerBackend::Ollama,
"mlx" => TacticProposerBackend::Mlx,
"standard" | "fallback" | "none" => TacticProposerBackend::Standard,
_ => {
eprintln!(
Expand All @@ -1361,6 +1364,10 @@ fn tactic_proposer_backend(config: Option<&crate::setup::SetupResult>) -> Tactic
fn tactic_proposer_backend_from_config(
config: Option<&crate::setup::SetupResult>,
) -> TacticProposerBackend {
// Auto-detect MLX on macOS if model is installed.
if cfg!(target_os = "macos") && openproof_search::mlx::mlx_model_exists() {
return TacticProposerBackend::Mlx;
}
match config {
Some(cfg) if cfg.model_provider == "codex" => TacticProposerBackend::Codex,
Some(cfg) if cfg.prover_model.is_some() => TacticProposerBackend::Ollama,
Expand Down Expand Up @@ -1789,6 +1796,60 @@ fn spawn_tactic_search_for_sorrys(
None
};

// Check if MLX tactic model is available (macOS Apple Silicon).
let mlx_proposer = if proposer_backend == TacticProposerBackend::Mlx {
let proposer = openproof_search::mlx::MlxProposer::new();
if proposer.is_available() {
eprintln!("[tactic-search] Using MLX tactic proposer (already running)");
Some(Arc::new(proposer))
} else {
// Auto-spawn mlx_lm.server.
eprintln!("[tactic-search] Spawning MLX server...");
let port = proposer.port();
let model_path = proposer.model_path().to_string();
match std::process::Command::new("python3")
.args([
"-m",
"mlx_lm.server",
"--model",
&model_path,
"--port",
&port.to_string(),
])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.spawn()
{
Ok(_child) => {
// Poll for readiness (up to 30s).
let mut ready = false;
for _ in 0..60 {
std::thread::sleep(std::time::Duration::from_millis(500));
if proposer.is_available() {
ready = true;
break;
}
}
if ready {
eprintln!("[tactic-search] MLX server ready");
Some(Arc::new(proposer))
} else {
eprintln!(
"[tactic-search] MLX server failed to start, using fallback tactics"
);
None
}
}
Err(e) => {
eprintln!("[tactic-search] Failed to spawn MLX server: {e}");
None
}
}
}
} else {
None
};

if proposer_backend == TacticProposerBackend::Codex {
eprintln!(
"[tactic-search] Using Codex tactic proposer ({})",
Expand Down Expand Up @@ -1869,6 +1930,7 @@ fn spawn_tactic_search_for_sorrys(
let tactics = standard_tactics.clone();
let store_for_propose = store.clone();
let ollama = ollama_proposer.clone();
let mlx = mlx_proposer.clone();
let codex_model = codex_model.clone();
let codex_cache = codex_cache.clone();
let export = ExpertExportContext {
Expand Down Expand Up @@ -1936,6 +1998,13 @@ fn spawn_tactic_search_for_sorrys(
}
}
}
TacticProposerBackend::Mlx => {
if let Some(ref proposer) = mlx {
if let Ok(model_tactics) = proposer.propose_tactics(goal, k) {
candidates.extend(model_tactics);
}
}
}
TacticProposerBackend::Standard => {}
}

Expand Down
157 changes: 157 additions & 0 deletions crates/openproof-cli/src/expert_gen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
//! Batch generation of tactic proposals using Codex.
//!
//! Reads goal states from stdin (one per line), sends each to Codex asking for
//! smart goal-specific tactics, writes unverified pairs to stdout as JSONL.
//!
//! Usage:
//! openproof expert-gen < goals.txt > unverified_pairs.jsonl

use anyhow::Result;
use openproof_model::{CodexTurnRequest, TurnMessage};
use serde::{Deserialize, Serialize};
use std::io::{self, BufRead, Write};
use std::time::Duration;

const SYSTEM_PROMPT: &str = "\
You are an expert Lean 4 tactic advisor. Given a goal state, propose 1-5 specific tactics \
that would CLOSE the goal entirely (not just simplify it). Think carefully about the goal \
structure. Use specific Mathlib lemma names when appropriate. Prefer concrete tactics over \
generic ones -- e.g. 'nlinarith [sq_nonneg (a - b)]' over just 'nlinarith'. \
For algebraic goals, use ring/linarith with specific witnesses. \
For logical goals, use exact/apply with specific lemma names. \
Reply with ONLY a JSON object: {\"tactics\":[\"tactic1\",\"tactic2\"]}. \
No markdown, no explanation. Never use sorry, admit, or native_decide.";

const DEFAULT_MODEL: &str = "gpt-5.4";

#[derive(Serialize)]
struct UnverifiedPair {
goal_state: String,
proposed_tactic: String,
}

#[derive(Deserialize)]
struct TacticResponse {
tactics: Vec<String>,
}

fn parse_tactics(response: &str) -> Vec<String> {
// Try direct parse
if let Ok(parsed) = serde_json::from_str::<TacticResponse>(response) {
return parsed.tactics;
}

// Try stripping markdown
let trimmed = response
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
if let Ok(parsed) = serde_json::from_str::<TacticResponse>(trimmed) {
return parsed.tactics;
}

// Try finding JSON in text
if let Some(start) = trimmed.find('{') {
if let Some(end) = trimmed.rfind('}') {
if let Ok(parsed) = serde_json::from_str::<TacticResponse>(&trimmed[start..=end]) {
return parsed.tactics;
}
}
}

Vec::new()
}

pub async fn run_expert_gen() -> Result<()> {
let model =
std::env::var("OPENPROOF_TACTIC_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.to_string());

// Sync auth
openproof_model::sync_auth_from_codex_cli()?;

let stdin = io::stdin();
let stdout = io::stdout();
let mut out = io::BufWriter::new(stdout.lock());

let mut total = 0usize;
let mut generated = 0usize;
let mut errors = 0usize;

for line in stdin.lock().lines() {
let goal = match line {
Ok(l) => l.trim().to_string(),
Err(_) => break,
};
if goal.is_empty() {
continue;
}
total += 1;

let prompt = format!("Goal state:\n{goal}");
let messages = vec![
TurnMessage::chat("system", SYSTEM_PROMPT),
TurnMessage::chat("user", prompt),
];
let session_id = format!("expert-gen-{}", chrono::Utc::now().timestamp_millis());
let request = CodexTurnRequest {
session_id: &session_id,
messages: &messages,
model: &model,
reasoning_effort: "low",
include_tools: false,
};

match tokio::time::timeout(
Duration::from_secs(300),
openproof_model::run_codex_turn(request),
)
.await
{
Ok(Ok(response)) => {
let tactics = parse_tactics(&response);
for tactic in &tactics {
let tactic = tactic.trim();
if tactic.is_empty() {
continue;
}
let lower = tactic.to_lowercase();
if lower == "sorry" || lower == "admit" || lower == "native_decide" {
continue;
}
let pair = UnverifiedPair {
goal_state: goal.clone(),
proposed_tactic: tactic.to_string(),
};
if let Ok(json) = serde_json::to_string(&pair) {
let _ = writeln!(out, "{json}");
}
generated += 1;
}
if tactics.is_empty() {
errors += 1;
}
}
Ok(Err(e)) => {
eprintln!("[expert-gen] Error on goal {total}: {e}");
errors += 1;
}
Err(_) => {
eprintln!("[expert-gen] Timeout on goal {total}");
errors += 1;
}
}

#[allow(clippy::manual_is_multiple_of)]
if total % 50 == 0 {
eprintln!(
"[expert-gen] {total} goals processed, {generated} tactics generated, {errors} errors"
);
}
}

let _ = out.flush();
eprintln!("[expert-gen] Done: {total} goals, {generated} tactics, {errors} errors");
Ok(())
}
1 change: 1 addition & 0 deletions crates/openproof-search/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ license.workspace = true

[dependencies]
anyhow.workspace = true
directories.workspace = true
reqwest = { workspace = true, features = ["blocking"] }
serde.workspace = true
serde_json.workspace = true
Expand Down
1 change: 1 addition & 0 deletions crates/openproof-search/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
pub mod cache;
pub mod config;
pub mod lsp_search;
pub mod mlx;
pub mod ollama;
pub mod search;
Loading
Loading