diff --git a/src/arxiv_explorer/cli/main.py b/src/arxiv_explorer/cli/main.py index 9bad0a5..5e93c84 100644 --- a/src/arxiv_explorer/cli/main.py +++ b/src/arxiv_explorer/cli/main.py @@ -39,7 +39,7 @@ def main( # Import and register subcommands -from . import config, daily, export, lists, notes, preferences, search # noqa: E402 +from . import config, daily, export, lists, notes, preferences, review, search # noqa: E402 app.add_typer(preferences.app, name="prefs", help="Preference management") app.add_typer(lists.app, name="list", help="Reading list management") @@ -55,6 +55,7 @@ def main( app.command(name="dislike")(daily.dislike) app.command(name="show")(daily.show) app.command(name="translate")(daily.translate) +app.command(name="review")(review.review) @app.command() diff --git a/src/arxiv_explorer/cli/review.py b/src/arxiv_explorer/cli/review.py new file mode 100644 index 0000000..2d4fa49 --- /dev/null +++ b/src/arxiv_explorer/cli/review.py @@ -0,0 +1,191 @@ +"""Review command -- generate comprehensive AI paper review.""" + +from pathlib import Path +from typing import Optional + +import typer +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TaskProgressColumn, + TextColumn, + TimeElapsedColumn, +) + +from ..core.models import Language, ReviewSectionType +from ..services.paper_service import PaperService +from ..services.review_service import PaperReviewService +from ..services.settings_service import SettingsService +from ..utils.display import console, print_error, print_info, print_success + +# Human-readable names for review sections +_SECTION_NAMES: dict[ReviewSectionType, str] = { + ReviewSectionType.EXECUTIVE_SUMMARY: "Executive Summary", + ReviewSectionType.KEY_CONTRIBUTIONS: "Key Contributions", + ReviewSectionType.SECTION_SUMMARIES: "Section Summaries", + ReviewSectionType.METHODOLOGY: "Methodology Analysis", + ReviewSectionType.MATH_FORMULATIONS: "Math Formulations", + ReviewSectionType.FIGURES: "Figure Descriptions", + ReviewSectionType.TABLES: "Table Descriptions", + ReviewSectionType.EXPERIMENTAL_RESULTS: "Experimental Results", + ReviewSectionType.STRENGTHS_WEAKNESSES: "Strengths & Weaknesses", + ReviewSectionType.RELATED_WORK: "Related Work", + ReviewSectionType.GLOSSARY: "Glossary", + ReviewSectionType.QUESTIONS: "Questions", +} + + +def review( + arxiv_id: str = typer.Argument(..., help="arXiv ID (e.g., 2401.00001)"), + output: Optional[Path] = typer.Option( + None, "--output", "-o", help="Save review to file (default: print to console)" + ), + force: bool = typer.Option( + False, "--force", "-f", help="Regenerate all sections (ignore cache)" + ), + translate: bool = typer.Option( + False, "--translate", "-t", help="Translate review to configured language" + ), + language: Optional[str] = typer.Option( + None, "--language", "-L", help="Target language code (e.g., 'ko')" + ), + no_full_text: bool = typer.Option( + False, "--no-full-text", help="Skip full text extraction, use abstract only" + ), + status: bool = typer.Option( + False, "--status", "-s", help="Show cached review status without generating" + ), + delete: bool = typer.Option(False, "--delete", help="Delete cached review for this paper"), +): + """Generate a comprehensive AI review of an arXiv paper. + + Fetches the full paper text when possible (via arxiv-doc-builder), + then analyzes each section with AI to produce a detailed Markdown review. + Reviews are cached section-by-section -- interrupted reviews resume + automatically. + + Examples: + axp review 2401.00001 + axp review 2401.00001 -o review.md + axp review 2401.00001 --force --translate + axp review 2401.00001 --status + """ + review_service = PaperReviewService() + + # Handle --delete + if delete: + if review_service.delete_review(arxiv_id): + print_success(f"Deleted cached review for {arxiv_id}") + else: + print_info(f"No cached review found for {arxiv_id}") + return + + # Handle --status + if status: + cached = review_service.get_cached_review(arxiv_id) + if cached is None: + print_info(f"No cached review for {arxiv_id}") + else: + total = len(ReviewSectionType) + done = len(cached.sections) + console.print(f"[bold]Review status for {arxiv_id}[/bold]") + console.print(f"Sections: {done}/{total}") + for st in ReviewSectionType: + if st in cached.sections: + icon = "[green]\u2714[/green]" + else: + icon = "[dim]\u2022[/dim]" + console.print(f" {icon} {_SECTION_NAMES.get(st, st.value)}") + return + + # Fetch paper metadata + paper_service = PaperService() + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console, + ) as progress: + progress.add_task("Fetching paper metadata...", total=None) + paper = paper_service.get_paper(arxiv_id) + + if not paper: + print_error(f"Paper not found: {arxiv_id}") + raise typer.Exit(1) + + console.print(f"\n[bold]{paper.title}[/bold]") + console.print(f"[dim]{', '.join(paper.authors[:5])}[/dim]\n") + + # If --no-full-text, skip extraction + if no_full_text: + review_service._extract_full_text = lambda _: None # type: ignore[assignment] + + # Generate review with progress bar + succeeded = 0 + failed = 0 + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + TimeElapsedColumn(), + console=console, + ) as progress: + task = progress.add_task("Generating review...", total=len(ReviewSectionType)) + + def on_start(section_type: ReviewSectionType, idx: int, total: int) -> None: + name = _SECTION_NAMES.get(section_type, section_type.value) + progress.update(task, description=f"[cyan]{name}[/cyan]...") + + def on_complete(section_type: ReviewSectionType, success: bool) -> None: + nonlocal succeeded, failed + if success: + succeeded += 1 + else: + failed += 1 + progress.advance(task) + + paper_review = review_service.generate_review( + paper=paper, + force=force, + on_section_start=on_start, + on_section_complete=on_complete, + ) + + if not paper_review: + print_error("Review generation failed completely.") + raise typer.Exit(1) + + # Report results + print_info(f"Sections: {succeeded} succeeded, {failed} failed") + if paper_review.source_type == "abstract": + print_info("Note: Full text was not available. Review is based on abstract only.") + + # Resolve language + target_lang = Language.EN + if translate or language: + if language: + try: + target_lang = Language(language) + except ValueError: + supported = ", ".join(lang.value for lang in Language) + print_error(f"Unknown language: {language}. Supported: {supported}") + raise typer.Exit(1) from None + else: + target_lang = SettingsService().get_language() + + # Render markdown + markdown = review_service.render_markdown(paper_review, language=target_lang) + + # Output + if output: + output.parent.mkdir(parents=True, exist_ok=True) + output.write_text(markdown, encoding="utf-8") + print_success(f"Review saved: {output}") + else: + console.print() + from rich.markdown import Markdown + + console.print(Markdown(markdown)) diff --git a/src/arxiv_explorer/core/database.py b/src/arxiv_explorer/core/database.py index 9a4e2dd..523f65d 100644 --- a/src/arxiv_explorer/core/database.py +++ b/src/arxiv_explorer/core/database.py @@ -89,6 +89,17 @@ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); +-- Paper review sections (incremental cache) +CREATE TABLE IF NOT EXISTS paper_review_sections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + arxiv_id TEXT NOT NULL, + section_type TEXT NOT NULL, + content_json TEXT NOT NULL, + source_type TEXT NOT NULL DEFAULT 'abstract', + generated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(arxiv_id, section_type) +); + -- Paper cache CREATE TABLE IF NOT EXISTS papers ( arxiv_id TEXT PRIMARY KEY NOT NULL, @@ -109,6 +120,7 @@ CREATE INDEX IF NOT EXISTS idx_list_papers_list ON reading_list_papers(list_id); CREATE INDEX IF NOT EXISTS idx_translations_arxiv ON paper_translations(arxiv_id); CREATE INDEX IF NOT EXISTS idx_papers_cached_at ON papers(cached_at); +CREATE INDEX IF NOT EXISTS idx_review_sections_arxiv ON paper_review_sections(arxiv_id); """ diff --git a/src/arxiv_explorer/core/models.py b/src/arxiv_explorer/core/models.py index c70c0fb..184105e 100644 --- a/src/arxiv_explorer/core/models.py +++ b/src/arxiv_explorer/core/models.py @@ -38,6 +38,21 @@ class Language(str, Enum): KO = "ko" +class ReviewSectionType(str, Enum): + EXECUTIVE_SUMMARY = "executive_summary" + KEY_CONTRIBUTIONS = "key_contributions" + SECTION_SUMMARIES = "section_summaries" + METHODOLOGY = "methodology" + MATH_FORMULATIONS = "math_formulations" + FIGURES = "figures" + TABLES = "tables" + EXPERIMENTAL_RESULTS = "experimental_results" + STRENGTHS_WEAKNESSES = "strengths_weaknesses" + RELATED_WORK = "related_work" + GLOSSARY = "glossary" + QUESTIONS = "questions" + + @dataclass class Paper: """Paper data model.""" @@ -143,6 +158,41 @@ class KeywordInterest: source: str = "explicit" # 'explicit' or 'inferred' +@dataclass +class ReviewSection: + """One section of a paper review, cached individually.""" + + id: int + arxiv_id: str + section_type: ReviewSectionType + content_json: str + generated_at: datetime = field(default_factory=datetime.now) + + +@dataclass +class PaperReview: + """Assembled paper review.""" + + arxiv_id: str + title: str + authors: list[str] + categories: list[str] + published: datetime + abstract: str + sections: dict[ReviewSectionType, dict] = field(default_factory=dict) + pdf_url: Optional[str] = None + source_type: str = "abstract" + generated_at: datetime = field(default_factory=datetime.now) + + @property + def is_complete(self) -> bool: + return set(self.sections.keys()) == set(ReviewSectionType) + + @property + def missing_sections(self) -> list[ReviewSectionType]: + return [s for s in ReviewSectionType if s not in self.sections] + + @dataclass class RecommendedPaper: """Recommended paper with score.""" diff --git a/src/arxiv_explorer/services/review_service.py b/src/arxiv_explorer/services/review_service.py new file mode 100644 index 0000000..d5028fb --- /dev/null +++ b/src/arxiv_explorer/services/review_service.py @@ -0,0 +1,1034 @@ +"""Paper review service using map-reduce AI analysis.""" + +import json +import re +import subprocess +from datetime import datetime +from pathlib import Path +from typing import Callable, Optional + +from ..core.database import get_connection +from ..core.models import ( + Language, + Paper, + PaperReview, + ReviewSection, + ReviewSectionType, +) +from .providers import get_provider +from .settings_service import SettingsService + +# Language display names for translation prompts +_LANG_NAMES: dict[Language, str] = { + Language.KO: "Korean", +} + + +class PaperReviewService: + """Generate comprehensive AI paper reviews with incremental caching.""" + + # Ordered processing pipeline: (section_type, requires_full_text?) + SECTION_PIPELINE: list[tuple[ReviewSectionType, bool]] = [ + (ReviewSectionType.EXECUTIVE_SUMMARY, False), + (ReviewSectionType.KEY_CONTRIBUTIONS, False), + (ReviewSectionType.SECTION_SUMMARIES, True), + (ReviewSectionType.METHODOLOGY, True), + (ReviewSectionType.MATH_FORMULATIONS, True), + (ReviewSectionType.FIGURES, True), + (ReviewSectionType.TABLES, True), + (ReviewSectionType.EXPERIMENTAL_RESULTS, True), + (ReviewSectionType.STRENGTHS_WEAKNESSES, False), + (ReviewSectionType.RELATED_WORK, True), + (ReviewSectionType.GLOSSARY, False), + (ReviewSectionType.QUESTIONS, False), + ] + + def generate_review( + self, + paper: Paper, + force: bool = False, + on_section_start: Optional[Callable[[ReviewSectionType, int, int], None]] = None, + on_section_complete: Optional[Callable[[ReviewSectionType, bool], None]] = None, + ) -> PaperReview | None: + """Generate a full review. Resumes from cache if interrupted.""" + # Step 1: Attempt full text extraction + full_text_md = self._extract_full_text(paper.arxiv_id) + source_type = "full_text" if full_text_md else "abstract" + + # Step 2: Pre-parse full text if available + paper_sections = None + figure_captions = None + table_content = None + math_blocks = None + if full_text_md: + paper_sections = self._split_into_sections(full_text_md) + figure_captions = self._extract_figure_captions(full_text_md) + table_content = self._extract_table_content(full_text_md) + math_blocks = self._extract_math_blocks(full_text_md) + + # Step 3: Load existing cached sections + cached = self._get_all_cached_sections(paper.arxiv_id) + + # Step 4: Process each section + total = len(self.SECTION_PIPELINE) + sections_data: dict[ReviewSectionType, dict] = {} + + for idx, (section_type, needs_full_text) in enumerate(self.SECTION_PIPELINE): + if on_section_start: + on_section_start(section_type, idx, total) + + # Use cached if available and not forcing + if not force and section_type in cached: + sections_data[section_type] = json.loads(cached[section_type].content_json) + if on_section_complete: + on_section_complete(section_type, True) + continue + + # Skip data-dependent sections when no data exists + if needs_full_text and not full_text_md: + if section_type in ( + ReviewSectionType.FIGURES, + ReviewSectionType.TABLES, + ReviewSectionType.MATH_FORMULATIONS, + ): + empty = self._empty_section_data(section_type) + sections_data[section_type] = empty + self._save_section(paper.arxiv_id, section_type, empty, source_type) + if on_section_complete: + on_section_complete(section_type, True) + continue + + # Build prompt and invoke AI + prompt = self._build_prompt( + section_type=section_type, + paper=paper, + full_text_md=full_text_md, + paper_sections=paper_sections, + figure_captions=figure_captions, + table_content=table_content, + math_blocks=math_blocks, + ) + + data = self._invoke_ai(prompt) + if data: + sections_data[section_type] = data + self._save_section(paper.arxiv_id, section_type, data, source_type) + if on_section_complete: + on_section_complete(section_type, True) + else: + if on_section_complete: + on_section_complete(section_type, False) + + if not sections_data: + return None + + return PaperReview( + arxiv_id=paper.arxiv_id, + title=paper.title, + authors=paper.authors, + categories=paper.categories, + published=paper.published, + pdf_url=paper.pdf_url, + abstract=paper.abstract, + sections=sections_data, + source_type=source_type, + ) + + def get_cached_review(self, arxiv_id: str) -> PaperReview | None: + """Retrieve a partially or fully cached review.""" + cached = self._get_all_cached_sections(arxiv_id) + if not cached: + return None + + # We need paper metadata but don't have a Paper object here; + # return a minimal PaperReview with just sections populated. + first = next(iter(cached.values())) + return PaperReview( + arxiv_id=arxiv_id, + title="", + authors=[], + categories=[], + published=datetime.now(), + abstract="", + sections={st: json.loads(sec.content_json) for st, sec in cached.items()}, + source_type="cached", + generated_at=first.generated_at, + ) + + def delete_review(self, arxiv_id: str) -> bool: + """Delete all cached review sections for a paper.""" + with get_connection() as conn: + cursor = conn.execute( + "DELETE FROM paper_review_sections WHERE arxiv_id = ?", + (arxiv_id,), + ) + conn.commit() + return cursor.rowcount > 0 + + def render_markdown( + self, + review: PaperReview, + language: Language = Language.EN, + ) -> str: + """Render a PaperReview into final Markdown string.""" + parts: list[str] = [] + + # --- Header --- + parts.append(f"# {review.title}\n") + author_str = ", ".join(review.authors[:10]) + if len(review.authors) > 10: + author_str += f" (+{len(review.authors) - 10} more)" + parts.append(f"**Authors:** {author_str} ") + parts.append( + f"**arXiv ID:** [{review.arxiv_id}](https://arxiv.org/abs/{review.arxiv_id}) " + ) + parts.append(f"**Categories:** {', '.join(review.categories)} ") + parts.append(f"**Published:** {review.published.strftime('%Y-%m-%d')} ") + if review.pdf_url: + parts.append(f"**PDF:** [{review.pdf_url}]({review.pdf_url}) ") + source_label = ( + "Full text analysis" if review.source_type == "full_text" else "Abstract-only analysis" + ) + parts.append(f"**Source:** {source_label}") + parts.append("") + + # --- Executive Summary --- + es = review.sections.get(ReviewSectionType.EXECUTIVE_SUMMARY, {}) + if es: + parts.append("## Executive Summary\n") + if es.get("tldr"): + parts.append(f"**TL;DR:** {es['tldr']}\n") + if es.get("research_question"): + parts.append(f"**Research Question:** {es['research_question']}\n") + if es.get("approach_summary"): + parts.append(f"**Approach:** {es['approach_summary']}\n") + if es.get("main_result"): + parts.append(f"**Main Result:** {es['main_result']}\n") + + # --- Key Contributions --- + kc = review.sections.get(ReviewSectionType.KEY_CONTRIBUTIONS, {}) + if kc and kc.get("contributions"): + parts.append("## Key Contributions\n") + for c in kc["contributions"]: + ctype = c.get("type", "general") + parts.append(f"- **[{ctype}]** {c.get('contribution', '')}") + if c.get("significance"): + parts.append(f" - *Significance:* {c['significance']}") + parts.append("") + + # --- Section-by-Section Summary --- + ss = review.sections.get(ReviewSectionType.SECTION_SUMMARIES, {}) + if ss and ss.get("sections"): + parts.append("## Section-by-Section Summary\n") + for sec in ss["sections"]: + parts.append(f"### {sec.get('heading', 'Unknown Section')}\n") + parts.append(f"{sec.get('summary', '')}\n") + if sec.get("key_points"): + for kp in sec["key_points"]: + parts.append(f"- {kp}") + parts.append("") + + # --- Methodology Analysis --- + meth = review.sections.get(ReviewSectionType.METHODOLOGY, {}) + if meth: + parts.append("## Methodology Analysis\n") + if meth.get("overview"): + parts.append(f"{meth['overview']}\n") + if meth.get("steps"): + for step in meth["steps"]: + parts.append(f"### {step.get('step_name', 'Step')}\n") + parts.append(f"{step.get('description', '')}\n") + novelty = step.get("novelty", "") + if novelty and novelty != "standard": + parts.append(f"*Novelty:* {novelty}\n") + if meth.get("assumptions"): + parts.append("**Assumptions:**\n") + for a in meth["assumptions"]: + parts.append(f"- {a}") + parts.append("") + if meth.get("complexity_notes"): + parts.append(f"**Complexity:** {meth['complexity_notes']}\n") + + # --- Figure Descriptions --- + figs = review.sections.get(ReviewSectionType.FIGURES, {}) + if figs and figs.get("figures"): + parts.append("## Figure Descriptions\n") + for fig in figs["figures"]: + parts.append(f"### Figure {fig.get('figure_id', '?')}\n") + parts.append(f"{fig.get('description', '')}\n") + if fig.get("significance"): + parts.append(f"*Significance:* {fig['significance']}\n") + + # --- Table Descriptions --- + tbls = review.sections.get(ReviewSectionType.TABLES, {}) + if tbls and tbls.get("tables"): + parts.append("## Table Descriptions\n") + for tbl in tbls["tables"]: + cap = tbl.get("caption", "") + parts.append(f"### Table {tbl.get('table_id', '?')}: {cap}\n") + parts.append(f"{tbl.get('description', '')}\n") + if tbl.get("key_findings"): + parts.append(f"*Key findings:* {tbl['key_findings']}\n") + + # --- Mathematical Formulations --- + math = review.sections.get(ReviewSectionType.MATH_FORMULATIONS, {}) + if math and math.get("formulations"): + parts.append("## Mathematical Formulations\n") + for f in math["formulations"]: + parts.append(f"### {f.get('equation_label', 'Equation')}\n") + if f.get("latex"): + parts.append(f"$$\n{f['latex']}\n$$\n") + if f.get("plain_language"): + parts.append(f"**Plain language:** {f['plain_language']}\n") + if f.get("role"): + parts.append(f"*Role:* {f['role']}\n") + + # --- Experimental Results --- + exp = review.sections.get(ReviewSectionType.EXPERIMENTAL_RESULTS, {}) + if exp: + parts.append("## Experimental Results\n") + if exp.get("datasets"): + parts.append(f"**Datasets:** {', '.join(exp['datasets'])}\n") + if exp.get("baselines"): + parts.append(f"**Baselines:** {', '.join(exp['baselines'])}\n") + if exp.get("metrics"): + parts.append(f"**Metrics:** {', '.join(exp['metrics'])}\n") + if exp.get("main_results"): + parts.append(f"{exp['main_results']}\n") + if exp.get("ablation_studies"): + parts.append(f"**Ablation Studies:** {exp['ablation_studies']}\n") + if exp.get("notable_findings"): + parts.append("**Notable Findings:**\n") + for nf in exp["notable_findings"]: + parts.append(f"- {nf}") + parts.append("") + + # --- Strengths and Weaknesses --- + sw = review.sections.get(ReviewSectionType.STRENGTHS_WEAKNESSES, {}) + if sw: + parts.append("## Strengths and Weaknesses\n") + if sw.get("strengths"): + parts.append("### Strengths\n") + for s in sw["strengths"]: + parts.append(f"- **{s.get('point', '')}**") + if s.get("evidence"): + parts.append(f" - {s['evidence']}") + parts.append("") + if sw.get("weaknesses"): + parts.append("### Weaknesses\n") + for w in sw["weaknesses"]: + parts.append(f"- **{w.get('point', '')}**") + if w.get("evidence"): + parts.append(f" - {w['evidence']}") + parts.append("") + if sw.get("overall_assessment"): + parts.append(f"**Overall Assessment:** {sw['overall_assessment']}\n") + + # --- Related Work --- + rw = review.sections.get(ReviewSectionType.RELATED_WORK, {}) + if rw: + parts.append("## Related Work Context\n") + if rw.get("research_areas"): + parts.append(f"**Research Areas:** {', '.join(rw['research_areas'])}\n") + if rw.get("key_prior_works"): + for pw in rw["key_prior_works"]: + parts.append(f"- **{pw.get('work', '')}**") + if pw.get("relationship"): + parts.append(f" - {pw['relationship']}") + parts.append("") + if rw.get("positioning"): + parts.append(f"**Positioning:** {rw['positioning']}\n") + + # --- Glossary --- + gl = review.sections.get(ReviewSectionType.GLOSSARY, {}) + if gl and gl.get("terms"): + parts.append("## Glossary\n") + parts.append("| Term | Definition |") + parts.append("|------|-----------|") + for t in gl["terms"]: + term = t.get("term", "") + defn = t.get("definition", "").replace("|", "\\|") + parts.append(f"| **{term}** | {defn} |") + parts.append("") + + # --- Questions --- + qs = review.sections.get(ReviewSectionType.QUESTIONS, {}) + if qs and qs.get("questions"): + parts.append("## Questions for Further Investigation\n") + for q in qs["questions"]: + qtype = q.get("type", "general") + parts.append(f"- **[{qtype}]** {q.get('question', '')}") + if q.get("motivation"): + parts.append(f" - *Motivation:* {q['motivation']}") + parts.append("") + + # --- Footer --- + parts.append("---") + parts.append( + f"*Generated by arXiv Explorer | {review.generated_at.strftime('%Y-%m-%d %H:%M')}*" + ) + + markdown = "\n".join(parts) + + # --- Translation --- + if language != Language.EN: + translated = self._translate_markdown(markdown, language) + if translated: + return translated + + return markdown + + # ── Full Text Extraction ────────────────────────────────────────── + + def _extract_full_text(self, arxiv_id: str) -> str | None: + """Get full text: check existing file, then try conversion.""" + existing = self._find_existing_markdown(arxiv_id) + if existing: + return existing.read_text(encoding="utf-8") + + output_path = self._run_arxiv_doc_builder(arxiv_id) + if output_path and output_path.exists(): + return output_path.read_text(encoding="utf-8") + + return None + + def _find_existing_markdown(self, arxiv_id: str) -> Path | None: + """Check standard locations for existing conversion output.""" + normalized = arxiv_id.replace("/", "_") + candidates = [ + Path.cwd() / "papers" / normalized / f"{normalized}.md", + Path.cwd() / normalized / f"{normalized}.md", + ] + for p in candidates: + if p.exists(): + return p + return None + + def _run_arxiv_doc_builder(self, arxiv_id: str) -> Path | None: + """Run convert_paper.py, return output path on success.""" + script_path = ( + Path(__file__).parent.parent.parent.parent + / ".claude" + / "skills" + / "arxiv-doc-builder" + / "scripts" + / "convert_paper.py" + ) + if not script_path.exists(): + return None + + normalized = arxiv_id.replace("/", "_") + output_dir = Path.cwd() / "papers" + + try: + result = subprocess.run( + [ + "uv", + "run", + str(script_path), + arxiv_id, + "--output-dir", + str(output_dir), + ], + capture_output=True, + text=True, + timeout=300, + ) + if result.returncode == 0: + return output_dir / normalized / f"{normalized}.md" + except (subprocess.TimeoutExpired, Exception): + pass + + return None + + # ── Section Splitting ───────────────────────────────────────────── + + def _split_into_sections(self, full_text_md: str) -> dict[str, str]: + """Split markdown into named sections by ## headers.""" + sections: dict[str, str] = {} + current_heading = "_preamble" + current_lines: list[str] = [] + + for line in full_text_md.split("\n"): + match = re.match(r"^## (.+)$", line) + if match: + if current_lines: + sections[current_heading] = "\n".join(current_lines).strip() + current_heading = match.group(1).strip() + current_lines = [] + else: + current_lines.append(line) + + if current_lines: + sections[current_heading] = "\n".join(current_lines).strip() + + return sections + + def _extract_figure_captions(self, full_text_md: str) -> list[dict[str, str]]: + """Extract figure captions and surrounding context.""" + figures: list[dict[str, str]] = [] + + # Pattern 1: ![caption](path) followed by *Figure N: caption* + pattern1 = re.compile( + r"!\[([^\]]*)\]\([^)]+\)\s*\n\*Figure\s+(\d+):\s*([^*]+)\*", + re.MULTILINE, + ) + for m in pattern1.finditer(full_text_md): + figures.append( + { + "figure_id": m.group(2), + "caption": m.group(3).strip(), + "context": full_text_md[max(0, m.start() - 200) : m.end() + 200], + } + ) + + # Pattern 2: **Figure N:** or *Figure N:* without image + pattern2 = re.compile(r"\*\*?Figure\s+(\d+)[:.]\*?\*?\s*(.+?)(?:\n|$)", re.MULTILINE) + seen_ids = {f["figure_id"] for f in figures} + for m in pattern2.finditer(full_text_md): + fid = m.group(1) + if fid not in seen_ids: + figures.append( + { + "figure_id": fid, + "caption": m.group(2).strip(), + "context": full_text_md[max(0, m.start() - 200) : m.end() + 200], + } + ) + seen_ids.add(fid) + + return figures + + def _extract_table_content(self, full_text_md: str) -> list[dict[str, str]]: + """Extract markdown tables and their captions.""" + tables: list[dict[str, str]] = [] + + # Find markdown table blocks (consecutive lines starting with |) + table_pattern = re.compile( + r"((?:\|.+\|\n)+)(?:\s*\*?(?:\*?)Table\s+(\d+)[:.]\*?\*?\s*([^\n*]*))?", + re.MULTILINE, + ) + for i, m in enumerate(table_pattern.finditer(full_text_md), 1): + tables.append( + { + "table_id": m.group(2) or str(i), + "caption": (m.group(3) or "").strip(), + "content": m.group(1).strip(), + } + ) + return tables + + def _extract_math_blocks(self, full_text_md: str) -> list[str]: + """Extract display math blocks ($$...$$).""" + pattern = re.compile(r"\$\$\s*\n?(.*?)\n?\s*\$\$", re.DOTALL) + return [m.group(1).strip() for m in pattern.finditer(full_text_md)] + + # ── Prompt Builders ─────────────────────────────────────────────── + + def _build_prompt( + self, + section_type: ReviewSectionType, + paper: Paper, + full_text_md: str | None, + paper_sections: dict[str, str] | None, + figure_captions: list[dict] | None, + table_content: list[dict] | None, + math_blocks: list[str] | None, + ) -> str: + """Build the AI prompt for a given section type.""" + header = ( + f"Paper: {paper.title}\n" + f"Authors: {', '.join(paper.authors[:10])}\n" + f"arXiv ID: {paper.arxiv_id}\n" + f"Categories: {', '.join(paper.categories)}\n\n" + f"Abstract: {paper.abstract}" + ) + + builders = { + ReviewSectionType.EXECUTIVE_SUMMARY: self._prompt_executive_summary, + ReviewSectionType.KEY_CONTRIBUTIONS: self._prompt_contributions, + ReviewSectionType.SECTION_SUMMARIES: self._prompt_section_summaries, + ReviewSectionType.METHODOLOGY: self._prompt_methodology, + ReviewSectionType.MATH_FORMULATIONS: self._prompt_math, + ReviewSectionType.FIGURES: self._prompt_figures, + ReviewSectionType.TABLES: self._prompt_tables, + ReviewSectionType.EXPERIMENTAL_RESULTS: self._prompt_experiments, + ReviewSectionType.STRENGTHS_WEAKNESSES: self._prompt_strengths_weaknesses, + ReviewSectionType.RELATED_WORK: self._prompt_related_work, + ReviewSectionType.GLOSSARY: self._prompt_glossary, + ReviewSectionType.QUESTIONS: self._prompt_questions, + } + + return builders[section_type]( + header=header, + full_text_md=full_text_md, + paper_sections=paper_sections, + figure_captions=figure_captions, + table_content=table_content, + math_blocks=math_blocks, + ) + + def _prompt_executive_summary(self, header, full_text_md, **_) -> str: + context = full_text_md[:4000] if full_text_md else "" + context_block = f"Full text excerpt:\n{context}" if context else "" + return f"""{header} + +{context_block} + +Analyze this paper and provide an executive summary. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "tldr": "3-5 sentence TL;DR capturing the core contribution and result", + "research_question": "The main research question addressed", + "approach_summary": "1-2 sentence summary of the approach", + "main_result": "The most important quantitative or qualitative result" +}}""" + + def _prompt_contributions(self, header, full_text_md, **_) -> str: + context = full_text_md[:3000] if full_text_md else "" + context_block = f"Full text excerpt:\n{context}" if context else "" + return f"""{header} + +{context_block} + +List the key contributions of this paper. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "contributions": [ + {{ + "contribution": "Description of the contribution", + "type": "theoretical|methodological|empirical|system|dataset", + "significance": "Why this matters" + }} + ] +}}""" + + def _prompt_section_summaries(self, header, paper_sections, **_) -> str: + sections_text = "" + if paper_sections: + for heading, content in paper_sections.items(): + if heading == "_preamble": + continue + truncated = content[:1500] + sections_text += f"\n### {heading}\n{truncated}\n" + + return f"""{header} + +Paper sections: +{sections_text if sections_text else "(Full text not available -- analyze based on abstract)"} + +For each major section of the paper, provide a summary paragraph. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "sections": [ + {{ + "heading": "Section heading as it appears", + "summary": "2-4 sentence summary of this section", + "key_points": ["point 1", "point 2"] + }} + ] +}}""" + + def _prompt_methodology(self, header, paper_sections, full_text_md, **_) -> str: + method_text = "" + if paper_sections: + method_keywords = [ + "method", + "approach", + "model", + "framework", + "algorithm", + "architecture", + ] + for heading, content in paper_sections.items(): + if any(kw in heading.lower() for kw in method_keywords): + method_text += f"\n### {heading}\n{content[:2000]}\n" + if not method_text and full_text_md: + method_text = full_text_md[:5000] + + return f"""{header} + +Relevant sections: +{method_text if method_text else "(Analyze methodology from abstract)"} + +Provide a detailed methodology analysis. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "overview": "High-level description of the methodology", + "steps": [ + {{ + "step_name": "Name of this step/component", + "description": "Detailed explanation", + "novelty": "What is novel about this step (or 'standard' if not novel)" + }} + ], + "assumptions": ["Key assumption 1", "Key assumption 2"], + "complexity_notes": "Computational complexity or scalability notes if mentioned" +}}""" + + def _prompt_math(self, header, math_blocks, **_) -> str: + math_text = "" + if math_blocks: + for i, block in enumerate(math_blocks[:15], 1): + math_text += f"\nEquation {i}: {block}\n" + + return f"""{header} + +Key equations found: +{math_text if math_text else "(No display equations detected)"} + +Explain the key mathematical formulations in plain language. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "formulations": [ + {{ + "equation_label": "Equation number or name", + "latex": "Original LaTeX", + "plain_language": "What this equation means in plain English", + "role": "How it fits into the overall methodology" + }} + ] +}}""" + + def _prompt_figures(self, header, figure_captions, **_) -> str: + figs_text = "" + if figure_captions: + for fig in figure_captions: + figs_text += f"\nFigure {fig['figure_id']}: {fig['caption']}\n" + figs_text += f"Context: {fig['context'][:300]}\n" + + return f"""{header} + +Figure captions and context: +{figs_text if figs_text else "(No figures detected)"} + +Describe each figure based on its caption and surrounding context. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "figures": [ + {{ + "figure_id": "1", + "description": "What this figure likely shows based on caption and context", + "significance": "Why this figure is important for understanding the paper" + }} + ] +}}""" + + def _prompt_tables(self, header, table_content, **_) -> str: + tables_text = "" + if table_content: + for tbl in table_content: + tables_text += f"\nTable {tbl['table_id']}: {tbl['caption']}\n" + tables_text += f"{tbl['content'][:500]}\n" + + return f"""{header} + +Tables found in paper: +{tables_text if tables_text else "(No tables detected)"} + +Analyze each table and describe its contents and significance. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "tables": [ + {{ + "table_id": "1", + "caption": "Original caption", + "description": "What this table shows", + "key_findings": "Notable results or patterns in the data" + }} + ] +}}""" + + def _prompt_experiments(self, header, paper_sections, table_content, **_) -> str: + exp_text = "" + if paper_sections: + exp_keywords = [ + "experiment", + "result", + "evaluation", + "ablation", + "benchmark", + "performance", + ] + for heading, content in paper_sections.items(): + if any(kw in heading.lower() for kw in exp_keywords): + exp_text += f"\n### {heading}\n{content[:2000]}\n" + + tables_summary = "" + if table_content: + for tbl in table_content[:5]: + tables_summary += f"\nTable {tbl['table_id']}: {tbl['content'][:300]}\n" + + return f"""{header} + +Experimental sections: +{exp_text if exp_text else "(Analyze from abstract)"} + +Result tables: +{tables_summary} + +Analyze the experimental setup and results. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "datasets": ["Dataset names used"], + "baselines": ["Baseline methods compared against"], + "metrics": ["Evaluation metrics used"], + "main_results": "Summary of main quantitative results", + "ablation_studies": "Summary of ablation studies if present", + "notable_findings": ["Finding 1", "Finding 2"] +}}""" + + def _prompt_strengths_weaknesses(self, header, full_text_md, **_) -> str: + context = "" + if full_text_md: + context = full_text_md[:3000] + "\n...\n" + full_text_md[-2000:] + context_block = f"Paper content:\n{context}" if context else "" + + return f"""{header} + +{context_block} + +Provide a critical analysis of the paper's strengths and weaknesses. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "strengths": [ + {{ + "point": "Strength description", + "evidence": "Supporting evidence or reasoning" + }} + ], + "weaknesses": [ + {{ + "point": "Weakness description", + "evidence": "Supporting evidence or reasoning" + }} + ], + "overall_assessment": "1-2 sentence overall assessment" +}}""" + + def _prompt_related_work(self, header, paper_sections, **_) -> str: + rw_text = "" + if paper_sections: + rw_keywords = [ + "related", + "background", + "prior", + "previous", + "literature", + ] + for heading, content in paper_sections.items(): + if any(kw in heading.lower() for kw in rw_keywords): + rw_text += f"\n### {heading}\n{content[:2500]}\n" + + return f"""{header} + +Related work sections: +{rw_text if rw_text else "(Analyze related work context from abstract)"} + +Summarize the related work landscape and how this paper positions itself. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "research_areas": ["Area 1", "Area 2"], + "key_prior_works": [ + {{ + "work": "Author et al. (Year) - brief description", + "relationship": "How this paper relates to or differs from it" + }} + ], + "positioning": "How the paper positions itself within the field" +}}""" + + def _prompt_glossary(self, header, full_text_md, **_) -> str: + context = full_text_md[:5000] if full_text_md else "" + context_block = f"Paper content:\n{context}" if context else "" + + return f"""{header} + +{context_block} + +Extract key technical terms and provide definitions. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "terms": [ + {{ + "term": "Technical term", + "definition": "Clear definition as used in this paper", + "first_occurrence": "Section where it first appears (if known)" + }} + ] +}}""" + + def _prompt_questions(self, header, full_text_md, **_) -> str: + context = "" + if full_text_md: + context = full_text_md[:2000] + "\n...\n" + full_text_md[-2000:] + context_block = f"Paper content:\n{context}" if context else "" + + return f"""{header} + +{context_block} + +Suggest questions for further investigation based on this paper. +IMPORTANT: Respond ONLY with valid JSON, no other text. +{{ + "questions": [ + {{ + "question": "The question", + "motivation": "Why this question is interesting or important", + "type": "clarification|extension|limitation|application" + }} + ] +}}""" + + # ── AI Invocation ───────────────────────────────────────────────── + + def _invoke_ai(self, prompt: str) -> dict | None: + """Invoke AI provider, extract JSON, parse.""" + settings = SettingsService() + provider = get_provider(settings.get_provider()) + if not provider.is_available(): + return None + + output = provider.invoke( + prompt, + model=settings.get_model(), + timeout=settings.get_timeout(), + ) + if output is None: + return None + + # Extract JSON block + if "```json" in output: + output = output.split("```json")[1].split("```")[0] + elif "```" in output: + output = output.split("```")[1].split("```")[0] + + output = output.strip() + + try: + return json.loads(output) + except json.JSONDecodeError: + return None + + # ── Cache Operations ────────────────────────────────────────────── + + def _get_cached_section( + self, arxiv_id: str, section_type: ReviewSectionType + ) -> ReviewSection | None: + with get_connection() as conn: + row = conn.execute( + "SELECT * FROM paper_review_sections WHERE arxiv_id = ? AND section_type = ?", + (arxiv_id, section_type.value), + ).fetchone() + if row: + return ReviewSection( + id=row["id"], + arxiv_id=row["arxiv_id"], + section_type=ReviewSectionType(row["section_type"]), + content_json=row["content_json"], + generated_at=datetime.fromisoformat(row["generated_at"]), + ) + return None + + def _get_all_cached_sections(self, arxiv_id: str) -> dict[ReviewSectionType, ReviewSection]: + with get_connection() as conn: + rows = conn.execute( + "SELECT * FROM paper_review_sections WHERE arxiv_id = ?", + (arxiv_id,), + ).fetchall() + return { + ReviewSectionType(row["section_type"]): ReviewSection( + id=row["id"], + arxiv_id=row["arxiv_id"], + section_type=ReviewSectionType(row["section_type"]), + content_json=row["content_json"], + generated_at=datetime.fromisoformat(row["generated_at"]), + ) + for row in rows + } + + def _save_section( + self, + arxiv_id: str, + section_type: ReviewSectionType, + data: dict, + source_type: str, + ) -> None: + with get_connection() as conn: + conn.execute( + """INSERT OR REPLACE INTO paper_review_sections + (arxiv_id, section_type, content_json, source_type) + VALUES (?, ?, ?, ?)""", + ( + arxiv_id, + section_type.value, + json.dumps(data, ensure_ascii=False), + source_type, + ), + ) + conn.commit() + + @staticmethod + def _empty_section_data(section_type: ReviewSectionType) -> dict: + """Return empty but valid JSON for sections with no data.""" + empty_maps: dict[ReviewSectionType, dict] = { + ReviewSectionType.FIGURES: {"figures": []}, + ReviewSectionType.TABLES: {"tables": []}, + ReviewSectionType.MATH_FORMULATIONS: {"formulations": []}, + } + return empty_maps.get(section_type, {}) + + # ── Translation ─────────────────────────────────────────────────── + + def _translate_markdown(self, markdown: str, target_language: Language) -> str | None: + """Translate final markdown, chunking by ## headers if needed.""" + lang_name = _LANG_NAMES.get(target_language, target_language.value) + max_chunk = 6000 + + if len(markdown) <= max_chunk: + return self._translate_chunk(markdown, lang_name) + + # Split by ## headers to maintain structure + chunks = re.split(r"(^## .+$)", markdown, flags=re.MULTILINE) + translated_parts: list[str] = [] + current_chunk = "" + + for chunk in chunks: + if len(current_chunk) + len(chunk) > max_chunk and current_chunk: + result = self._translate_chunk(current_chunk, lang_name) + translated_parts.append(result or current_chunk) + current_chunk = chunk + else: + current_chunk += chunk + + if current_chunk: + result = self._translate_chunk(current_chunk, lang_name) + translated_parts.append(result or current_chunk) + + return "".join(translated_parts) + + def _translate_chunk(self, text: str, lang_name: str) -> str | None: + """Translate a single chunk of markdown.""" + prompt = f"""Translate the following Markdown document into {lang_name}. + +IMPORTANT RULES: +- Preserve ALL Markdown formatting (headers, bold, italic, tables, links, code blocks) +- Keep ALL technical terms, model names, dataset names, proper nouns, and acronyms in English +- Keep mathematical notation ($...$, $$...$$) as-is +- Keep URLs and arXiv IDs as-is +- The translation should read naturally in {lang_name} + +Text to translate: +{text} + +Respond with ONLY the translated markdown, no other text.""" + + settings = SettingsService() + provider = get_provider(settings.get_provider()) + if not provider.is_available(): + return None + return provider.invoke( + prompt, + model=settings.get_model(), + timeout=settings.get_timeout(), + ) diff --git a/src/arxiv_explorer/tui/screens/daily.py b/src/arxiv_explorer/tui/screens/daily.py index 9723309..d0a28a2 100644 --- a/src/arxiv_explorer/tui/screens/daily.py +++ b/src/arxiv_explorer/tui/screens/daily.py @@ -68,6 +68,7 @@ class DailyPane(Vertical): ("d", "dislike", "Dislike"), ("s", "summarize", "Summarize"), ("t", "translate", "Translate"), + ("w", "review", "Review"), ] def compose(self) -> ComposeResult: @@ -94,6 +95,7 @@ def compose(self) -> ComposeResult: yield Button("Dislike [d]", id="btn-dislike", variant="error") yield Button("Summarize [s]", id="btn-summarize", variant="warning") yield Button("Translate [t]", id="btn-translate") + yield Button("Review [w]", id="btn-review", variant="primary") yield Static("", id="daily-status") yield PaperPanel(id="daily-panel") @@ -123,6 +125,10 @@ def _on_summarize_clicked(self) -> None: def _on_translate_clicked(self) -> None: self.action_translate() + @on(Button.Pressed, "#btn-review") + def _on_review_clicked(self) -> None: + self.action_review() + @on(PaperTable.PaperHighlighted) def _on_paper_highlighted(self, event: PaperTable.PaperHighlighted) -> None: panel = self.query_one("#daily-panel", PaperPanel) @@ -272,3 +278,11 @@ def _show_translation(self, rec, translation) -> None: panel = self.query_one("#daily-panel", PaperPanel) panel.show_translation(translation) self._set_status(f"Translation done: {rec.paper.arxiv_id}") + + def action_review(self) -> None: + rec = self._get_current() + if not rec: + return + from .review_screen import ReviewScreen + + self.app.push_screen(ReviewScreen(rec)) diff --git a/src/arxiv_explorer/tui/screens/paper_detail.py b/src/arxiv_explorer/tui/screens/paper_detail.py index 0ac60f1..4b2f7dc 100644 --- a/src/arxiv_explorer/tui/screens/paper_detail.py +++ b/src/arxiv_explorer/tui/screens/paper_detail.py @@ -23,6 +23,7 @@ class PaperDetailScreen(ModalScreen): ("d", "dislike", "Dislike"), ("s", "summarize", "Summarize"), ("t", "translate", "Translate"), + ("w", "review", "Review"), ("n", "add_note", "Note"), ("a", "add_to_list", "Add to List"), ] @@ -60,6 +61,7 @@ def compose(self) -> ComposeResult: yield Button("Dislike [d]", id="md-dislike", variant="error") yield Button("Summarize [s]", id="md-summarize", variant="warning") yield Button("Translate [t]", id="md-translate") + yield Button("Review [w]", id="md-review", variant="primary") yield Button("Note [n]", id="md-note") yield Button("List [a]", id="md-list") yield Button("Close", id="md-close") @@ -111,6 +113,10 @@ def _on_summarize(self) -> None: def _on_translate(self) -> None: self.action_translate() + @on(Button.Pressed, "#md-review") + def _on_review(self) -> None: + self.action_review() + @on(Button.Pressed, "#md-note") def _on_note(self) -> None: self.action_add_note() @@ -185,6 +191,11 @@ def _render_translation(self, translation: PaperTranslation) -> None: ] self.query_one("#detail-translation", Static).update("\n".join(lines)) + def action_review(self) -> None: + from .review_screen import ReviewScreen + + self.app.push_screen(ReviewScreen(self.rec)) + def action_add_to_list(self) -> None: from .list_picker import ListPickerScreen diff --git a/src/arxiv_explorer/tui/screens/review_screen.py b/src/arxiv_explorer/tui/screens/review_screen.py new file mode 100644 index 0000000..75d4085 --- /dev/null +++ b/src/arxiv_explorer/tui/screens/review_screen.py @@ -0,0 +1,259 @@ +"""Paper review modal screen.""" + +from __future__ import annotations + +from pathlib import Path + +from textual import on, work +from textual.app import ComposeResult +from textual.containers import Horizontal, Vertical +from textual.screen import ModalScreen +from textual.widgets import Button, Markdown, Static + +from ...core.models import Language, RecommendedPaper, ReviewSectionType + +# Human-readable section names for progress display +_SECTION_NAMES: dict[ReviewSectionType, str] = { + ReviewSectionType.EXECUTIVE_SUMMARY: "Executive Summary", + ReviewSectionType.KEY_CONTRIBUTIONS: "Key Contributions", + ReviewSectionType.SECTION_SUMMARIES: "Section Summaries", + ReviewSectionType.METHODOLOGY: "Methodology", + ReviewSectionType.MATH_FORMULATIONS: "Math Formulations", + ReviewSectionType.FIGURES: "Figures", + ReviewSectionType.TABLES: "Tables", + ReviewSectionType.EXPERIMENTAL_RESULTS: "Experiments", + ReviewSectionType.STRENGTHS_WEAKNESSES: "Strengths & Weaknesses", + ReviewSectionType.RELATED_WORK: "Related Work", + ReviewSectionType.GLOSSARY: "Glossary", + ReviewSectionType.QUESTIONS: "Questions", +} + + +class ReviewScreen(ModalScreen): + """Full-screen modal displaying a comprehensive paper review. + + Shows progress while generating, then renders the final markdown. + """ + + DEFAULT_CSS = """ + ReviewScreen { + align: center middle; + } + ReviewScreen > Vertical { + width: 90%; + height: 90%; + border: thick $accent; + background: $surface; + } + ReviewScreen .review-title { + text-align: center; + padding: 1 2; + text-style: bold; + color: $accent; + } + ReviewScreen #review-status { + height: 1; + padding: 0 2; + color: $text-muted; + background: $surface-darken-1; + } + ReviewScreen #review-content { + height: 1fr; + padding: 1 2; + } + ReviewScreen .review-buttons { + height: 3; + padding: 0 1; + align: center middle; + background: $surface; + } + ReviewScreen .review-buttons Button { + min-width: 14; + margin: 0 1; + } + """ + + BINDINGS = [ + ("escape", "dismiss", "Close"), + ] + + def __init__(self, rec: RecommendedPaper) -> None: + super().__init__() + self.rec = rec + self._review = None + self._generating = False + self._translated_md: str | None = None + self._translated_lang: Language | None = None + + def compose(self) -> ComposeResult: + with Vertical(): + yield Static(f"Review: {self.rec.paper.title}", classes="review-title") + yield Static("Loading...", id="review-status") + yield Markdown("", id="review-content") + with Horizontal(classes="review-buttons"): + yield Button("Save [s]", id="review-save", variant="primary") + yield Button("Translate [t]", id="review-translate", variant="warning") + yield Button("Close [esc]", id="review-close") + + def on_mount(self) -> None: + self._load_or_generate() + + def _set_status(self, text: str) -> None: + try: + self.query_one("#review-status", Static).update(text) + except Exception: + pass + + # ── Generation ──────────────────────────────────────────────────── + + @work(thread=True, exclusive=True, group="review-generate") + def _load_or_generate(self) -> None: + """Load cached review instantly, then generate missing sections.""" + self._generating = True + bridge = self.app.bridge + paper = self.rec.paper + + # Check for cached review (even partial) + cached = bridge.review.get_cached_review(paper.arxiv_id) + if cached and cached.sections: + # Fill in paper metadata (cached review has empty metadata) + cached.title = paper.title + cached.authors = paper.authors + cached.categories = paper.categories + cached.published = paper.published + cached.pdf_url = paper.pdf_url + cached.abstract = paper.abstract + self._review = cached + md = bridge.review.render_markdown(cached) + self.app.call_from_thread(self._show_review, md) + + if cached.is_complete: + done = len(cached.sections) + self.app.call_from_thread(self._set_status, f"Loaded from cache — {done} sections") + self._generating = False + return + + # Partial cache: show what we have, generate the rest + missing = cached.missing_sections + self.app.call_from_thread( + self._set_status, + f"Loaded {len(cached.sections)} sections — generating {len(missing)} remaining...", + ) + + # Generate (all or remaining) with progress + def on_start(section_type: ReviewSectionType, idx: int, total: int) -> None: + name = _SECTION_NAMES.get(section_type, section_type.value) + self.app.call_from_thread( + self._set_status, + f"Generating [{idx + 1}/{total}]: {name}...", + ) + + succeeded = [0] + failed = [0] + + def on_complete(section_type: ReviewSectionType, success: bool) -> None: + if success: + succeeded[0] += 1 + else: + failed[0] += 1 + + review = bridge.review.generate_review( + paper=paper, + on_section_start=on_start, + on_section_complete=on_complete, + ) + + self._generating = False + + if review: + self._review = review + md = bridge.review.render_markdown(review) + self.app.call_from_thread(self._show_review, md) + self.app.call_from_thread( + self._set_status, + f"Done — {succeeded[0]} sections" + (f" ({failed[0]} failed)" if failed[0] else ""), + ) + else: + self.app.call_from_thread(self._set_status, "Review generation failed") + self.app.call_from_thread( + self.app.notify, + "Could not generate review", + severity="warning", + ) + + def _show_review(self, markdown_text: str) -> None: + self.query_one("#review-content", Markdown).update(markdown_text) + + # ── Button Events ───────────────────────────────────────────────── + + @on(Button.Pressed, "#review-close") + def _on_close(self) -> None: + self.dismiss() + + @on(Button.Pressed, "#review-save") + def _on_save(self) -> None: + self._save_review() + + @on(Button.Pressed, "#review-translate") + def _on_translate(self) -> None: + self._translate_review() + + def key_s(self) -> None: + self._save_review() + + def key_t(self) -> None: + self._translate_review() + + # ── Save ────────────────────────────────────────────────────────── + + def _save_review(self) -> None: + if not self._review: + self.app.notify("No review to save", severity="warning") + return + self._do_save() + + @work(thread=True, group="review-save") + def _do_save(self) -> None: + review = self._review + md = self.app.bridge.review.render_markdown(review) + normalized = review.arxiv_id.replace("/", "_") + out_dir = Path.cwd() / "reviews" + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / f"{normalized}_review.md" + out_path.write_text(md, encoding="utf-8") + saved_files = [str(out_path)] + + # Also save translated version if available + if self._translated_md and self._translated_lang: + lang_code = self._translated_lang.value + trans_path = out_dir / f"{normalized}_review_{lang_code}.md" + trans_path.write_text(self._translated_md, encoding="utf-8") + saved_files.append(str(trans_path)) + + self.app.call_from_thread(self.app.notify, f"Saved: {', '.join(saved_files)}") + + # ── Translate ───────────────────────────────────────────────────── + + def _translate_review(self) -> None: + if not self._review: + self.app.notify("No review to translate", severity="warning") + return + lang = self.app.bridge.settings.get_language() + if lang == Language.EN: + self.app.notify( + "Language is set to English. Change in Prefs tab.", + severity="warning", + ) + return + self.app.notify("Translating review...") + self._do_translate(lang) + + @work(thread=True, exclusive=True, group="review-translate") + def _do_translate(self, lang: Language) -> None: + review = self._review + md = self.app.bridge.review.render_markdown(review, language=lang) + self._translated_md = md + self._translated_lang = lang + self.app.call_from_thread(self._show_review, md) + self.app.call_from_thread(self._set_status, f"Translated to {lang.value}") + self.app.call_from_thread(self.app.notify, "Translation complete") diff --git a/src/arxiv_explorer/tui/screens/search.py b/src/arxiv_explorer/tui/screens/search.py index 0e60118..e65b92b 100644 --- a/src/arxiv_explorer/tui/screens/search.py +++ b/src/arxiv_explorer/tui/screens/search.py @@ -69,6 +69,7 @@ class SearchPane(Vertical): ("d", "dislike", "Dislike"), ("s", "summarize", "Summarize"), ("t", "translate", "Translate"), + ("w", "review", "Review"), ] def compose(self) -> ComposeResult: @@ -87,6 +88,7 @@ def compose(self) -> ComposeResult: yield Button("Dislike [d]", id="btn-s-dislike", variant="error") yield Button("Summarize [s]", id="btn-s-summarize", variant="warning") yield Button("Translate [t]", id="btn-s-translate") + yield Button("Review [w]", id="btn-s-review", variant="primary") yield Static("Enter a search query", id="search-status") yield PaperPanel(id="search-panel") @@ -121,6 +123,10 @@ def _on_summarize_clicked(self) -> None: def _on_translate_clicked(self) -> None: self.action_translate() + @on(Button.Pressed, "#btn-s-review") + def _on_review_clicked(self) -> None: + self.action_review() + @on(PaperTable.PaperHighlighted) def _on_paper_highlighted(self, event: PaperTable.PaperHighlighted) -> None: panel = self.query_one("#search-panel", PaperPanel) @@ -248,3 +254,11 @@ def _show_translation(self, rec, translation) -> None: panel = self.query_one("#search-panel", PaperPanel) panel.show_translation(translation) self._set_status(f"Translation done: {rec.paper.arxiv_id}") + + def action_review(self) -> None: + rec = self._get_current() + if not rec: + return + from .review_screen import ReviewScreen + + self.app.push_screen(ReviewScreen(rec)) diff --git a/src/arxiv_explorer/tui/workers.py b/src/arxiv_explorer/tui/workers.py index ef6f4ed..59bf409 100644 --- a/src/arxiv_explorer/tui/workers.py +++ b/src/arxiv_explorer/tui/workers.py @@ -4,6 +4,7 @@ from ..services.paper_service import PaperService from ..services.preference_service import PreferenceService from ..services.reading_list_service import ReadingListService +from ..services.review_service import PaperReviewService from ..services.settings_service import SettingsService from ..services.summarization import SummarizationService from ..services.translation import TranslationService @@ -24,3 +25,4 @@ def __init__(self) -> None: self.summarization = SummarizationService() self.translation = TranslationService() self.settings = SettingsService() + self.review = PaperReviewService() diff --git a/tests/test_database.py b/tests/test_database.py index 3fa1e8a..c46133e 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -18,6 +18,7 @@ "paper_translations", "app_settings", "papers", + "paper_review_sections", } @@ -60,6 +61,7 @@ def test_creates_indexes(self, tmp_config: Config): "idx_list_papers_list", "idx_translations_arxiv", "idx_papers_cached_at", + "idx_review_sections_arxiv", } assert indexes == expected_indexes diff --git a/tests/test_review_service.py b/tests/test_review_service.py new file mode 100644 index 0000000..4d5a17d --- /dev/null +++ b/tests/test_review_service.py @@ -0,0 +1,668 @@ +"""Tests for the paper review service.""" + +import json +from unittest.mock import MagicMock + +import pytest + +from arxiv_explorer.core.config import Config +from arxiv_explorer.core.models import ( + PaperReview, + ReviewSectionType, +) +from arxiv_explorer.services.review_service import PaperReviewService + +# ── Fixtures ────────────────────────────────────────────────────────── + + +@pytest.fixture() +def review_service() -> PaperReviewService: + return PaperReviewService() + + +@pytest.fixture() +def sample_full_text() -> str: + """Minimal arxiv-doc-builder output for testing section splitting.""" + return """# Test Paper Title + +**Authors:** Alice, Bob +**arXiv ID:** 2401.00001 + +## Abstract + +This is the abstract text. + +## 1. Introduction + +Introduction content here with some detail about the problem. + +## 2. Methodology + +We propose a novel method for solving the problem. + +$$ +E = mc^2 +$$ + +$$ +F = ma +$$ + +## 3. Experiments + +We test on CIFAR-10 dataset. + +| Method | Accuracy | +|--------|----------| +| Ours | 95.3 | +| Other | 90.1 | + +*Table 1: Results comparison* + +![Architecture](figures/fig1.png) +*Figure 1: System architecture overview* + +**Figure 2:** Training loss curve over 100 epochs. + +## 4. Related Work + +Previous work by Smith et al. addressed similar problems. + +## 5. Conclusion + +We conclude the work with promising results. + +## References + +[1] Smith et al. Some paper. 2023. +""" + + +# ── Section Splitting Tests ─────────────────────────────────────────── + + +class TestSectionSplitting: + """Test the _split_into_sections method.""" + + def test_splits_by_h2_headers(self, review_service, sample_full_text): + sections = review_service._split_into_sections(sample_full_text) + assert "1. Introduction" in sections + assert "2. Methodology" in sections + assert "3. Experiments" in sections + assert "4. Related Work" in sections + assert "5. Conclusion" in sections + + def test_preamble_captured(self, review_service, sample_full_text): + sections = review_service._split_into_sections(sample_full_text) + assert "_preamble" in sections + assert "Test Paper Title" in sections["_preamble"] + + def test_section_content(self, review_service, sample_full_text): + sections = review_service._split_into_sections(sample_full_text) + assert "novel method" in sections["2. Methodology"] + + def test_empty_text(self, review_service): + sections = review_service._split_into_sections("") + assert sections == {"_preamble": ""} + + def test_no_headers(self, review_service): + text = "Just some text\nwith no headers." + sections = review_service._split_into_sections(text) + assert "_preamble" in sections + assert "Just some text" in sections["_preamble"] + + +# ── Figure Caption Extraction Tests ─────────────────────────────────── + + +class TestFigureCaptionExtraction: + """Test figure caption extraction.""" + + def test_extracts_image_figures(self, review_service, sample_full_text): + figures = review_service._extract_figure_captions(sample_full_text) + ids = {f["figure_id"] for f in figures} + assert "1" in ids + fig1 = next(f for f in figures if f["figure_id"] == "1") + assert "architecture" in fig1["caption"].lower() + + def test_extracts_text_only_figures(self, review_service, sample_full_text): + figures = review_service._extract_figure_captions(sample_full_text) + ids = {f["figure_id"] for f in figures} + assert "2" in ids + + def test_no_figures(self, review_service): + figures = review_service._extract_figure_captions("No figures here.") + assert figures == [] + + def test_no_duplicates(self, review_service, sample_full_text): + figures = review_service._extract_figure_captions(sample_full_text) + ids = [f["figure_id"] for f in figures] + assert len(ids) == len(set(ids)) + + +# ── Table Extraction Tests ──────────────────────────────────────────── + + +class TestTableExtraction: + """Test table content extraction.""" + + def test_extracts_markdown_tables(self, review_service, sample_full_text): + tables = review_service._extract_table_content(sample_full_text) + assert len(tables) >= 1 + assert "Accuracy" in tables[0]["content"] + + def test_extracts_table_caption(self, review_service, sample_full_text): + tables = review_service._extract_table_content(sample_full_text) + assert any("Results" in t.get("caption", "") for t in tables) + + def test_no_tables(self, review_service): + tables = review_service._extract_table_content("No tables here.") + assert tables == [] + + +# ── Math Extraction Tests ───────────────────────────────────────────── + + +class TestMathExtraction: + """Test math block extraction.""" + + def test_extracts_display_math(self, review_service, sample_full_text): + blocks = review_service._extract_math_blocks(sample_full_text) + assert len(blocks) >= 2 + assert any("E = mc^2" in b for b in blocks) + assert any("F = ma" in b for b in blocks) + + def test_no_math(self, review_service): + blocks = review_service._extract_math_blocks("No math here.") + assert blocks == [] + + +# ── Caching Tests ───────────────────────────────────────────────────── + + +class TestReviewCaching: + """Test section-level cache operations.""" + + def test_save_and_load_section(self, tmp_config: Config, review_service): + data = {"tldr": "Test summary", "research_question": "Does it work?"} + review_service._save_section( + "2401.00001", ReviewSectionType.EXECUTIVE_SUMMARY, data, "abstract" + ) + cached = review_service._get_cached_section( + "2401.00001", ReviewSectionType.EXECUTIVE_SUMMARY + ) + assert cached is not None + assert json.loads(cached.content_json) == data + + def test_get_all_cached(self, tmp_config: Config, review_service): + review_service._save_section( + "2401.00001", + ReviewSectionType.EXECUTIVE_SUMMARY, + {"tldr": "test"}, + "abstract", + ) + review_service._save_section( + "2401.00001", + ReviewSectionType.GLOSSARY, + {"terms": []}, + "abstract", + ) + all_cached = review_service._get_all_cached_sections("2401.00001") + assert len(all_cached) == 2 + assert ReviewSectionType.EXECUTIVE_SUMMARY in all_cached + assert ReviewSectionType.GLOSSARY in all_cached + + def test_cache_replaces_on_update(self, tmp_config: Config, review_service): + review_service._save_section( + "2401.00001", + ReviewSectionType.EXECUTIVE_SUMMARY, + {"tldr": "old"}, + "abstract", + ) + review_service._save_section( + "2401.00001", + ReviewSectionType.EXECUTIVE_SUMMARY, + {"tldr": "new"}, + "full_text", + ) + cached = review_service._get_cached_section( + "2401.00001", ReviewSectionType.EXECUTIVE_SUMMARY + ) + assert json.loads(cached.content_json)["tldr"] == "new" + + def test_delete_review(self, tmp_config: Config, review_service): + review_service._save_section( + "2401.00001", + ReviewSectionType.EXECUTIVE_SUMMARY, + {"tldr": "test"}, + "abstract", + ) + assert review_service.delete_review("2401.00001") is True + assert ( + review_service._get_cached_section("2401.00001", ReviewSectionType.EXECUTIVE_SUMMARY) + is None + ) + + def test_delete_nonexistent(self, tmp_config: Config, review_service): + assert review_service.delete_review("9999.99999") is False + + def test_get_cached_review(self, tmp_config: Config, review_service): + review_service._save_section( + "2401.00001", + ReviewSectionType.EXECUTIVE_SUMMARY, + {"tldr": "cached review"}, + "abstract", + ) + cached_review = review_service.get_cached_review("2401.00001") + assert cached_review is not None + assert ReviewSectionType.EXECUTIVE_SUMMARY in cached_review.sections + assert ( + cached_review.sections[ReviewSectionType.EXECUTIVE_SUMMARY]["tldr"] == "cached review" + ) + + def test_get_cached_review_none(self, tmp_config: Config, review_service): + assert review_service.get_cached_review("9999.99999") is None + + +# ── Rendering Tests ─────────────────────────────────────────────────── + + +class TestMarkdownRendering: + """Test render_markdown output.""" + + def _make_review(self, sample_paper, sections=None): + return PaperReview( + arxiv_id=sample_paper.arxiv_id, + title=sample_paper.title, + authors=sample_paper.authors, + categories=sample_paper.categories, + published=sample_paper.published, + abstract=sample_paper.abstract, + sections=sections or {}, + ) + + def test_renders_header(self, review_service, sample_paper): + review = self._make_review(sample_paper) + md = review_service.render_markdown(review) + assert sample_paper.title in md + assert sample_paper.arxiv_id in md + assert "Authors:" in md + + def test_renders_executive_summary(self, review_service, sample_paper): + review = self._make_review( + sample_paper, + { + ReviewSectionType.EXECUTIVE_SUMMARY: { + "tldr": "This paper does X.", + "research_question": "Can we do X?", + "approach_summary": "We use Y.", + "main_result": "95% accuracy", + } + }, + ) + md = review_service.render_markdown(review) + assert "Executive Summary" in md + assert "This paper does X." in md + assert "95% accuracy" in md + + def test_renders_key_contributions(self, review_service, sample_paper): + review = self._make_review( + sample_paper, + { + ReviewSectionType.KEY_CONTRIBUTIONS: { + "contributions": [ + { + "contribution": "Novel architecture", + "type": "methodological", + "significance": "First of its kind", + } + ] + } + }, + ) + md = review_service.render_markdown(review) + assert "Key Contributions" in md + assert "Novel architecture" in md + + def test_renders_glossary_as_table(self, review_service, sample_paper): + review = self._make_review( + sample_paper, + { + ReviewSectionType.GLOSSARY: { + "terms": [{"term": "CNN", "definition": "Convolutional Neural Network"}] + } + }, + ) + md = review_service.render_markdown(review) + assert "| **CNN** |" in md + assert "Convolutional Neural Network" in md + + def test_renders_math_formulations(self, review_service, sample_paper): + review = self._make_review( + sample_paper, + { + ReviewSectionType.MATH_FORMULATIONS: { + "formulations": [ + { + "equation_label": "Loss function", + "latex": "L = -\\sum y \\log p", + "plain_language": "Cross-entropy loss", + "role": "Training objective", + } + ] + } + }, + ) + md = review_service.render_markdown(review) + assert "Mathematical Formulations" in md + assert "$$" in md + assert "Cross-entropy loss" in md + + def test_renders_footer(self, review_service, sample_paper): + review = self._make_review(sample_paper) + md = review_service.render_markdown(review) + assert "Generated by arXiv Explorer" in md + + def test_renders_source_type(self, review_service, sample_paper): + review = self._make_review(sample_paper) + review.source_type = "full_text" + md = review_service.render_markdown(review) + assert "Full text analysis" in md + + review.source_type = "abstract" + md = review_service.render_markdown(review) + assert "Abstract-only analysis" in md + + +# ── Model Tests ─────────────────────────────────────────────────────── + + +class TestPaperReviewModel: + """Test PaperReview dataclass.""" + + def test_is_complete_false(self, sample_paper): + review = PaperReview( + arxiv_id=sample_paper.arxiv_id, + title=sample_paper.title, + authors=sample_paper.authors, + categories=sample_paper.categories, + published=sample_paper.published, + abstract=sample_paper.abstract, + sections={ReviewSectionType.EXECUTIVE_SUMMARY: {}}, + ) + assert review.is_complete is False + assert len(review.missing_sections) == len(ReviewSectionType) - 1 + + def test_is_complete_true(self, sample_paper): + all_sections = {st: {} for st in ReviewSectionType} + review = PaperReview( + arxiv_id=sample_paper.arxiv_id, + title=sample_paper.title, + authors=sample_paper.authors, + categories=sample_paper.categories, + published=sample_paper.published, + abstract=sample_paper.abstract, + sections=all_sections, + ) + assert review.is_complete is True + assert review.missing_sections == [] + + def test_missing_sections_returns_correct_types(self, sample_paper): + review = PaperReview( + arxiv_id=sample_paper.arxiv_id, + title=sample_paper.title, + authors=sample_paper.authors, + categories=sample_paper.categories, + published=sample_paper.published, + abstract=sample_paper.abstract, + sections={ + ReviewSectionType.EXECUTIVE_SUMMARY: {}, + ReviewSectionType.GLOSSARY: {}, + }, + ) + missing = review.missing_sections + assert ReviewSectionType.EXECUTIVE_SUMMARY not in missing + assert ReviewSectionType.GLOSSARY not in missing + assert ReviewSectionType.METHODOLOGY in missing + + +# ── Integration Tests (mocked AI) ──────────────────────────────────── + + +class TestGenerateReviewMocked: + """Test generate_review with mocked AI provider.""" + + def _mock_responses(self): + return { + ReviewSectionType.EXECUTIVE_SUMMARY: { + "tldr": "Test", + "research_question": "Q", + "approach_summary": "A", + "main_result": "R", + }, + ReviewSectionType.KEY_CONTRIBUTIONS: { + "contributions": [ + {"contribution": "C", "type": "methodological", "significance": "S"} + ] + }, + ReviewSectionType.SECTION_SUMMARIES: {"sections": []}, + ReviewSectionType.METHODOLOGY: { + "overview": "O", + "steps": [], + "assumptions": [], + "complexity_notes": "", + }, + ReviewSectionType.MATH_FORMULATIONS: {"formulations": []}, + ReviewSectionType.FIGURES: {"figures": []}, + ReviewSectionType.TABLES: {"tables": []}, + ReviewSectionType.EXPERIMENTAL_RESULTS: { + "datasets": [], + "baselines": [], + "metrics": [], + "main_results": "", + "ablation_studies": "", + "notable_findings": [], + }, + ReviewSectionType.STRENGTHS_WEAKNESSES: { + "strengths": [], + "weaknesses": [], + "overall_assessment": "", + }, + ReviewSectionType.RELATED_WORK: { + "research_areas": [], + "key_prior_works": [], + "positioning": "", + }, + ReviewSectionType.GLOSSARY: {"terms": []}, + ReviewSectionType.QUESTIONS: {"questions": []}, + } + + def test_generates_with_abstract_only(self, tmp_config: Config, sample_paper): + service = PaperReviewService() + service._extract_full_text = MagicMock(return_value=None) + + responses = self._mock_responses() + section_order = [st for st, _ in service.SECTION_PIPELINE] + call_count = [0] + + def mock_invoke(prompt): + idx = call_count[0] + call_count[0] += 1 + return responses.get(section_order[idx], {}) + + service._invoke_ai = mock_invoke + + review = service.generate_review(sample_paper) + assert review is not None + assert review.source_type == "abstract" + assert len(review.sections) > 0 + + def test_resumes_from_cache(self, tmp_config: Config, sample_paper): + service = PaperReviewService() + service._extract_full_text = MagicMock(return_value=None) + + # Pre-populate cache for executive_summary + service._save_section( + sample_paper.arxiv_id, + ReviewSectionType.EXECUTIVE_SUMMARY, + {"tldr": "Cached", "research_question": "Q"}, + "abstract", + ) + + invoke_calls: list[str] = [] + + def tracking_invoke(prompt): + invoke_calls.append(prompt) + return {"dummy": "data"} + + service._invoke_ai = tracking_invoke + + review = service.generate_review(sample_paper) + # Should have used cache for executive_summary + assert review.sections[ReviewSectionType.EXECUTIVE_SUMMARY]["tldr"] == "Cached" + # AI should have been called for remaining sections, minus: + # - 1 cached (executive_summary) + # - 3 empty sections in abstract-only mode (figures, tables, math) + assert len(invoke_calls) == len(ReviewSectionType) - 1 - 3 + + def test_force_regenerates_cached(self, tmp_config: Config, sample_paper): + service = PaperReviewService() + service._extract_full_text = MagicMock(return_value=None) + + # Pre-populate cache + service._save_section( + sample_paper.arxiv_id, + ReviewSectionType.EXECUTIVE_SUMMARY, + {"tldr": "Old cached"}, + "abstract", + ) + + responses = self._mock_responses() + section_order = [st for st, _ in service.SECTION_PIPELINE] + call_count = [0] + + def mock_invoke(prompt): + idx = call_count[0] + call_count[0] += 1 + return responses.get(section_order[idx], {}) + + service._invoke_ai = mock_invoke + + review = service.generate_review(sample_paper, force=True) + # With force=True, AI called for all sections except 3 empty + # (figures, tables, math) which get empty data in abstract-only mode + assert call_count[0] == len(ReviewSectionType) - 3 + assert review.sections[ReviewSectionType.EXECUTIVE_SUMMARY]["tldr"] == "Test" + + def test_callbacks_invoked(self, tmp_config: Config, sample_paper): + service = PaperReviewService() + service._extract_full_text = MagicMock(return_value=None) + + responses = self._mock_responses() + section_order = [st for st, _ in service.SECTION_PIPELINE] + call_count = [0] + + def mock_invoke(prompt): + idx = call_count[0] + call_count[0] += 1 + return responses.get(section_order[idx], {}) + + service._invoke_ai = mock_invoke + + start_calls: list[tuple] = [] + complete_calls: list[tuple] = [] + + def on_start(st, idx, total): + start_calls.append((st, idx, total)) + + def on_complete(st, success): + complete_calls.append((st, success)) + + service.generate_review( + sample_paper, + on_section_start=on_start, + on_section_complete=on_complete, + ) + assert len(start_calls) == len(ReviewSectionType) + assert len(complete_calls) == len(ReviewSectionType) + assert all(s for _, s in complete_calls) + + def test_returns_none_on_total_failure(self, tmp_config: Config, sample_paper): + service = PaperReviewService() + service._extract_full_text = MagicMock(return_value=None) + service._invoke_ai = MagicMock(return_value=None) + + review = service.generate_review(sample_paper) + # Figures/Tables/Math get empty data even on AI failure, so review is not None + # but all AI-dependent sections should fail + assert review is not None + # The 3 empty sections should still be present + assert ReviewSectionType.FIGURES in review.sections + assert ReviewSectionType.TABLES in review.sections + assert ReviewSectionType.MATH_FORMULATIONS in review.sections + + +# ── Prompt Builder Tests ────────────────────────────────────────────── + + +class TestPromptBuilders: + """Test that prompt builders produce valid prompts.""" + + def test_all_section_types_have_prompt_builder(self, review_service, sample_paper): + """Every section type in the pipeline must have a prompt builder.""" + for section_type, _ in review_service.SECTION_PIPELINE: + prompt = review_service._build_prompt( + section_type=section_type, + paper=sample_paper, + full_text_md=None, + paper_sections=None, + figure_captions=None, + table_content=None, + math_blocks=None, + ) + assert isinstance(prompt, str) + assert len(prompt) > 0 + assert sample_paper.title in prompt + + def test_methodology_uses_relevant_sections(self, review_service, sample_paper): + paper_sections = { + "_preamble": "title", + "1. Introduction": "intro text", + "2. Methodology": "our novel approach uses attention", + "3. Experiments": "we test on cifar", + } + prompt = review_service._build_prompt( + section_type=ReviewSectionType.METHODOLOGY, + paper=sample_paper, + full_text_md=None, + paper_sections=paper_sections, + figure_captions=None, + table_content=None, + math_blocks=None, + ) + assert "attention" in prompt + # Should not include experiment sections + assert "cifar" not in prompt + + +# ── Empty Section Data Tests ────────────────────────────────────────── + + +class TestEmptySectionData: + """Test _empty_section_data returns correct structures.""" + + def test_figures_empty(self): + data = PaperReviewService._empty_section_data(ReviewSectionType.FIGURES) + assert data == {"figures": []} + + def test_tables_empty(self): + data = PaperReviewService._empty_section_data(ReviewSectionType.TABLES) + assert data == {"tables": []} + + def test_math_empty(self): + data = PaperReviewService._empty_section_data(ReviewSectionType.MATH_FORMULATIONS) + assert data == {"formulations": []} + + def test_other_empty(self): + data = PaperReviewService._empty_section_data(ReviewSectionType.EXECUTIVE_SUMMARY) + assert data == {}