Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/arxiv_explorer/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def main(


# Import and register subcommands
from . import config, daily, export, lists, notes, preferences, search # noqa: E402
from . import config, daily, export, lists, notes, preferences, review, search # noqa: E402

app.add_typer(preferences.app, name="prefs", help="Preference management")
app.add_typer(lists.app, name="list", help="Reading list management")
Expand All @@ -55,6 +55,7 @@ def main(
app.command(name="dislike")(daily.dislike)
app.command(name="show")(daily.show)
app.command(name="translate")(daily.translate)
app.command(name="review")(review.review)


@app.command()
Expand Down
191 changes: 191 additions & 0 deletions src/arxiv_explorer/cli/review.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
"""Review command -- generate comprehensive AI paper review."""

from pathlib import Path
from typing import Optional

import typer
from rich.progress import (
BarColumn,
Progress,
SpinnerColumn,
TaskProgressColumn,
TextColumn,
TimeElapsedColumn,
)

from ..core.models import Language, ReviewSectionType
from ..services.paper_service import PaperService
from ..services.review_service import PaperReviewService
from ..services.settings_service import SettingsService
from ..utils.display import console, print_error, print_info, print_success

# Human-readable names for review sections
_SECTION_NAMES: dict[ReviewSectionType, str] = {
ReviewSectionType.EXECUTIVE_SUMMARY: "Executive Summary",
ReviewSectionType.KEY_CONTRIBUTIONS: "Key Contributions",
ReviewSectionType.SECTION_SUMMARIES: "Section Summaries",
ReviewSectionType.METHODOLOGY: "Methodology Analysis",
ReviewSectionType.MATH_FORMULATIONS: "Math Formulations",
ReviewSectionType.FIGURES: "Figure Descriptions",
ReviewSectionType.TABLES: "Table Descriptions",
ReviewSectionType.EXPERIMENTAL_RESULTS: "Experimental Results",
ReviewSectionType.STRENGTHS_WEAKNESSES: "Strengths & Weaknesses",
ReviewSectionType.RELATED_WORK: "Related Work",
ReviewSectionType.GLOSSARY: "Glossary",
ReviewSectionType.QUESTIONS: "Questions",
}


def review(
arxiv_id: str = typer.Argument(..., help="arXiv ID (e.g., 2401.00001)"),
output: Optional[Path] = typer.Option(
None, "--output", "-o", help="Save review to file (default: print to console)"
),
force: bool = typer.Option(
False, "--force", "-f", help="Regenerate all sections (ignore cache)"
),
translate: bool = typer.Option(
False, "--translate", "-t", help="Translate review to configured language"
),
language: Optional[str] = typer.Option(
None, "--language", "-L", help="Target language code (e.g., 'ko')"
),
no_full_text: bool = typer.Option(
False, "--no-full-text", help="Skip full text extraction, use abstract only"
),
status: bool = typer.Option(
False, "--status", "-s", help="Show cached review status without generating"
),
delete: bool = typer.Option(False, "--delete", help="Delete cached review for this paper"),
):
"""Generate a comprehensive AI review of an arXiv paper.

Fetches the full paper text when possible (via arxiv-doc-builder),
then analyzes each section with AI to produce a detailed Markdown review.
Reviews are cached section-by-section -- interrupted reviews resume
automatically.

Examples:
axp review 2401.00001
axp review 2401.00001 -o review.md
axp review 2401.00001 --force --translate
axp review 2401.00001 --status
"""
review_service = PaperReviewService()

# Handle --delete
if delete:
if review_service.delete_review(arxiv_id):
print_success(f"Deleted cached review for {arxiv_id}")
else:
print_info(f"No cached review found for {arxiv_id}")
return

# Handle --status
if status:
cached = review_service.get_cached_review(arxiv_id)
if cached is None:
print_info(f"No cached review for {arxiv_id}")
else:
total = len(ReviewSectionType)
done = len(cached.sections)
console.print(f"[bold]Review status for {arxiv_id}[/bold]")
console.print(f"Sections: {done}/{total}")
for st in ReviewSectionType:
if st in cached.sections:
icon = "[green]\u2714[/green]"
else:
icon = "[dim]\u2022[/dim]"
console.print(f" {icon} {_SECTION_NAMES.get(st, st.value)}")
return

# Fetch paper metadata
paper_service = PaperService()

with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
) as progress:
progress.add_task("Fetching paper metadata...", total=None)
paper = paper_service.get_paper(arxiv_id)

if not paper:
print_error(f"Paper not found: {arxiv_id}")
raise typer.Exit(1)

console.print(f"\n[bold]{paper.title}[/bold]")
console.print(f"[dim]{', '.join(paper.authors[:5])}[/dim]\n")

# If --no-full-text, skip extraction
if no_full_text:
review_service._extract_full_text = lambda _: None # type: ignore[assignment]

# Generate review with progress bar
succeeded = 0
failed = 0

with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
console=console,
) as progress:
task = progress.add_task("Generating review...", total=len(ReviewSectionType))

def on_start(section_type: ReviewSectionType, idx: int, total: int) -> None:
name = _SECTION_NAMES.get(section_type, section_type.value)
progress.update(task, description=f"[cyan]{name}[/cyan]...")

def on_complete(section_type: ReviewSectionType, success: bool) -> None:
nonlocal succeeded, failed
if success:
succeeded += 1
else:
failed += 1
progress.advance(task)

paper_review = review_service.generate_review(
paper=paper,
force=force,
on_section_start=on_start,
on_section_complete=on_complete,
)

if not paper_review:
print_error("Review generation failed completely.")
raise typer.Exit(1)

# Report results
print_info(f"Sections: {succeeded} succeeded, {failed} failed")
if paper_review.source_type == "abstract":
print_info("Note: Full text was not available. Review is based on abstract only.")

# Resolve language
target_lang = Language.EN
if translate or language:
if language:
try:
target_lang = Language(language)
except ValueError:
supported = ", ".join(lang.value for lang in Language)
print_error(f"Unknown language: {language}. Supported: {supported}")
raise typer.Exit(1) from None
else:
target_lang = SettingsService().get_language()

# Render markdown
markdown = review_service.render_markdown(paper_review, language=target_lang)

# Output
if output:
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(markdown, encoding="utf-8")
print_success(f"Review saved: {output}")
else:
console.print()
from rich.markdown import Markdown

console.print(Markdown(markdown))
12 changes: 12 additions & 0 deletions src/arxiv_explorer/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Paper review sections (incremental cache)
CREATE TABLE IF NOT EXISTS paper_review_sections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
arxiv_id TEXT NOT NULL,
section_type TEXT NOT NULL,
content_json TEXT NOT NULL,
source_type TEXT NOT NULL DEFAULT 'abstract',
generated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(arxiv_id, section_type)
);

-- Paper cache
CREATE TABLE IF NOT EXISTS papers (
arxiv_id TEXT PRIMARY KEY NOT NULL,
Expand All @@ -109,6 +120,7 @@
CREATE INDEX IF NOT EXISTS idx_list_papers_list ON reading_list_papers(list_id);
CREATE INDEX IF NOT EXISTS idx_translations_arxiv ON paper_translations(arxiv_id);
CREATE INDEX IF NOT EXISTS idx_papers_cached_at ON papers(cached_at);
CREATE INDEX IF NOT EXISTS idx_review_sections_arxiv ON paper_review_sections(arxiv_id);
"""


Expand Down
50 changes: 50 additions & 0 deletions src/arxiv_explorer/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,21 @@ class Language(str, Enum):
KO = "ko"


class ReviewSectionType(str, Enum):
EXECUTIVE_SUMMARY = "executive_summary"
KEY_CONTRIBUTIONS = "key_contributions"
SECTION_SUMMARIES = "section_summaries"
METHODOLOGY = "methodology"
MATH_FORMULATIONS = "math_formulations"
FIGURES = "figures"
TABLES = "tables"
EXPERIMENTAL_RESULTS = "experimental_results"
STRENGTHS_WEAKNESSES = "strengths_weaknesses"
RELATED_WORK = "related_work"
GLOSSARY = "glossary"
QUESTIONS = "questions"


@dataclass
class Paper:
"""Paper data model."""
Expand Down Expand Up @@ -143,6 +158,41 @@ class KeywordInterest:
source: str = "explicit" # 'explicit' or 'inferred'


@dataclass
class ReviewSection:
"""One section of a paper review, cached individually."""

id: int
arxiv_id: str
section_type: ReviewSectionType
content_json: str
generated_at: datetime = field(default_factory=datetime.now)


@dataclass
class PaperReview:
"""Assembled paper review."""

arxiv_id: str
title: str
authors: list[str]
categories: list[str]
published: datetime
abstract: str
sections: dict[ReviewSectionType, dict] = field(default_factory=dict)
pdf_url: Optional[str] = None
source_type: str = "abstract"
generated_at: datetime = field(default_factory=datetime.now)

@property
def is_complete(self) -> bool:
return set(self.sections.keys()) == set(ReviewSectionType)

@property
def missing_sections(self) -> list[ReviewSectionType]:
return [s for s in ReviewSectionType if s not in self.sections]


@dataclass
class RecommendedPaper:
"""Recommended paper with score."""
Expand Down
Loading
Loading