Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 46 additions & 12 deletions app/explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
from anthropic import AsyncAnthropic

from app.cache import CacheProvider, cache_response, get_cached_response
from app.explain_api import CostBreakdown, ExplainRequest, ExplainResponse, TokenUsage
from app.explain_api import (
CostBreakdown,
ExplainRequest,
ExplainResponse,
ExplanationFormat,
StructuredExplanation,
TokenUsage,
)
from app.metrics import MetricsProvider
from app.model_costs import get_model_cost
from app.prompt import Prompt
Expand Down Expand Up @@ -92,16 +99,42 @@ async def _call_anthropic_api(
# Call Claude API
LOGGER.info("Using Anthropic client with model: %s", {prompt_data["model"]})

message = await client.messages.create(
model=prompt_data["model"],
max_tokens=prompt_data["max_tokens"],
temperature=prompt_data["temperature"],
system=prompt_data["system"],
messages=prompt_data["messages"],
)

# Get explanation and strip leading/trailing whitespace
explanation = message.content[0].text.strip()
use_structured = body.format == ExplanationFormat.STRUCTURED

api_kwargs: dict = {
"model": prompt_data["model"],
"max_tokens": prompt_data["max_tokens"],
"temperature": prompt_data["temperature"],
"system": prompt_data["system"],
"messages": prompt_data["messages"],
}

if use_structured:
# For structured output: skip assistant prefill, add line indexing
# hint, and use output_config with JSON schema
api_kwargs["messages"] = [prompt_data["messages"][0]] # user only
api_kwargs["system"] += (
"\n\nThe assembly listing is 0-indexed. Reference specific line ranges in your response."
)
api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
api_kwargs["output_config"] = {
"format": {
"type": "json_schema",
"schema": StructuredExplanation.model_json_schema(),
Comment on lines +119 to +123
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The max_tokens is increased to a minimum of 2048 for structured output to account for JSON overhead. While this is reasonable, it means structured format requests may consume more tokens (and cost more) than markdown requests with the same prompt configuration. Consider documenting this behavior in the API documentation or PR description so users are aware of the potential cost difference.

Suggested change
api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
api_kwargs["output_config"] = {
"format": {
"type": "json_schema",
"schema": StructuredExplanation.model_json_schema(),
# Ensure enough tokens for JSON overhead in structured output. This may increase
# token usage and cost compared to markdown responses with the same prompt.
if prompt_data["max_tokens"] < 2048:
LOGGER.info(
"Structured output: increasing max_tokens from %s to 2048 to account for JSON "
"overhead. This may increase token usage and cost versus markdown output.",
prompt_data["max_tokens"],
)
api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
api_kwargs["output_config"] = {
"format": {
"type": "json_schema",
"schema": StructuredExplanation.model_json_schema(),

Copilot uses AI. Check for mistakes.
}
}

message = await client.messages.create(**api_kwargs)

# Parse response based on format
raw_text = message.content[0].text.strip()
explanation_text: str | None = None
structured: StructuredExplanation | None = None

if use_structured:
structured = StructuredExplanation.model_validate_json(raw_text)
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The JSON validation here can raise a ValidationError if the API returns JSON that doesn't match the StructuredExplanation schema. While Anthropic's structured output with output_config should guarantee schema-compliant JSON, it would be more robust to handle potential validation errors explicitly and return a meaningful error response rather than letting the exception propagate to FastAPI's default error handler.

Consider wrapping this in a try-except block to catch pydantic.ValidationError and return an appropriate error response with status='error'.

Copilot uses AI. Check for mistakes.
else:
explanation_text = raw_text

# Extract usage information
input_tokens = message.usage.input_tokens
Expand Down Expand Up @@ -130,7 +163,8 @@ async def _call_anthropic_api(
# Create and return ExplainResponse object
return ExplainResponse(
status="success",
explanation=explanation,
explanation=explanation_text,
structuredExplanation=structured,
model=prompt_data["model"],
usage=TokenUsage(
inputTokens=input_tokens,
Expand Down
39 changes: 38 additions & 1 deletion app/explain_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
in claude_explain.md.
"""

from enum import Enum

from pydantic import BaseModel, Field

from app.explanation_types import AudienceLevel, ExplanationType
Expand Down Expand Up @@ -40,6 +42,13 @@ class AssemblyItem(BaseModel):
isOmissionMarker: bool | None = None # Added for truncated assembly


class ExplanationFormat(str, Enum):
"""Output format for explanations."""

MARKDOWN = "markdown"
STRUCTURED = "structured"


class ExplainRequest(BaseModel):
"""Request body for the Claude Explain API."""

Expand All @@ -56,6 +65,10 @@ class ExplainRequest(BaseModel):
explanation: ExplanationType = Field(
default=ExplanationType.ASSEMBLY, description="Type of explanation to generate"
)
format: ExplanationFormat = Field(
default=ExplanationFormat.MARKDOWN,
description="Output format: 'markdown' (default) or 'structured' (JSON with assembly line mappings)",
)
bypassCache: bool = Field(default=False, description="If true, skip reading from cache but still write to cache")

@property
Expand All @@ -64,6 +77,27 @@ def instruction_set_with_default(self) -> str:
return self.instructionSet or "unknown"


class ExplanationSection(BaseModel):
"""A section of a structured explanation, mapped to assembly lines."""

model_config = {"json_schema_extra": {"additionalProperties": False}}
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The model_config setting may not correctly add additionalProperties: false to the generated JSON schema. In Pydantic v2, the json_schema_extra dict is merged with the generated schema, but the placement here might not achieve the desired effect.

To properly disallow additional properties in the JSON schema sent to Anthropic, you should verify that model_json_schema() actually includes "additionalProperties": false at the schema root level. Consider using Pydantic's ConfigDict with extra='forbid' instead, or verify the generated schema matches expectations:

model_config = ConfigDict(extra='forbid')

This ensures both runtime validation and JSON schema generation correctly reject additional properties.

Copilot uses AI. Check for mistakes.

title: str = Field(..., description="Section heading")
asmStartLine: int = Field(..., description="0-indexed start line in the assembly listing")
asmEndLine: int = Field(..., description="0-indexed end line (inclusive) in the assembly listing")
Comment on lines +86 to +87
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The asmStartLine and asmEndLine fields don't include validation to ensure they're within the bounds of the actual assembly array. While Anthropic's structured output should be accurate, there's no guarantee these indices won't exceed the assembly array length.

Consider adding a validator to check bounds, or at minimum document that API consumers (like the CE frontend) should validate these indices before attempting to use them for highlighting. You could add a Pydantic field validator that checks if end >= start at minimum.

Copilot uses AI. Check for mistakes.
content: str = Field(..., description="Explanation of this group of instructions (markdown)")


class StructuredExplanation(BaseModel):
"""Structured explanation with assembly line mappings."""

model_config = {"json_schema_extra": {"additionalProperties": False}}
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above - the model_config setting may not correctly add additionalProperties: false to the generated JSON schema. Consider using ConfigDict(extra='forbid') instead to ensure both runtime validation and JSON schema generation correctly reject additional properties.

Copilot uses AI. Check for mistakes.

summary: str = Field(..., description="One-sentence overview of what the compiler did")
sections: list[ExplanationSection] = Field(..., description="Explanation sections mapped to assembly lines")
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sections field doesn't have a minimum length constraint, meaning an empty list would be valid. For most assembly explanations, having at least one section seems reasonable. Consider adding a validator to ensure at least one section exists:

sections: list[ExplanationSection] = Field(..., min_length=1, description="Explanation sections mapped to assembly lines")

This would make the schema more robust and prevent degenerate cases where the LLM returns no sections.

Suggested change
sections: list[ExplanationSection] = Field(..., description="Explanation sections mapped to assembly lines")
sections: list[ExplanationSection] = Field(
..., min_length=1, description="Explanation sections mapped to assembly lines"
)

Copilot uses AI. Check for mistakes.
keyInsight: str = Field(..., description="The single most important takeaway")


class TokenUsage(BaseModel):
"""Token usage information."""

Expand All @@ -83,7 +117,10 @@ class CostBreakdown(BaseModel):
class ExplainResponse(BaseModel):
"""Response from the Claude Explain API."""

explanation: str | None = Field(None, description="The generated explanation")
explanation: str | None = Field(None, description="The generated explanation (markdown format)")
structuredExplanation: StructuredExplanation | None = Field(
None, description="Structured explanation with assembly line mappings (structured format)"
)
status: str = Field(..., description="'success' or 'error'")
message: str | None = Field(None, description="Error message (only present on error)")
model: str | None = Field(None, description="The Claude model used")
Expand Down
100 changes: 100 additions & 0 deletions app/test_explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,3 +507,103 @@ def test_legitimate_document_validation(self):
assert request.asm[2].source.file is None
assert request.asm[2].source.line == 0
assert request.asm[2].source.column is None # This was the missing field


class TestStructuredOutput:
"""Test structured output format."""

@pytest.fixture
def structured_mock_client(self):
"""Create a mock that returns structured JSON."""
mock_client = MagicMock()
mock_message = MagicMock()
mock_content = MagicMock()
mock_content.text = json.dumps(
{
"summary": "A simple square function",
"sections": [
{
"title": "Multiply",
"asmStartLine": 1,
"asmEndLine": 1,
"content": "Multiplies edi by itself",
},
{
"title": "Return",
"asmStartLine": 2,
"asmEndLine": 3,
"content": "Moves result to eax and returns",
},
],
"keyInsight": "The function is just three instructions",
}
)
mock_message.content = [mock_content]
mock_message.usage = MagicMock()
mock_message.usage.input_tokens = 100
mock_message.usage.output_tokens = 80
mock_client.messages.create = AsyncMock(return_value=mock_message)
return mock_client

@pytest.mark.asyncio
async def test_structured_output_returns_structured_explanation(
self, sample_request, structured_mock_client, noop_metrics
):
"""Structured format returns structuredExplanation, not explanation."""
sample_request.format = "structured"
test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
response = await process_request(sample_request, structured_mock_client, test_prompt, noop_metrics)
assert response.status == "success"
assert response.structuredExplanation is not None
assert response.explanation is None
assert response.structuredExplanation.summary == "A simple square function"
assert len(response.structuredExplanation.sections) == 2
assert response.structuredExplanation.sections[0].asmStartLine == 1
assert response.structuredExplanation.keyInsight == "The function is just three instructions"

@pytest.mark.asyncio
async def test_structured_output_uses_output_config(self, sample_request, structured_mock_client, noop_metrics):
"""Structured format passes output_config to the API."""
sample_request.format = "structured"
test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
await process_request(sample_request, structured_mock_client, test_prompt, noop_metrics)
_args, kwargs = structured_mock_client.messages.create.call_args
assert "output_config" in kwargs
assert kwargs["output_config"]["format"]["type"] == "json_schema"

@pytest.mark.asyncio
async def test_markdown_format_unchanged(self, sample_request, noop_metrics):
"""Default markdown format still works as before."""
mock_client = MagicMock()
mock_message = MagicMock()
mock_content = MagicMock()
mock_content.text = "This is a markdown explanation"
mock_message.content = [mock_content]
mock_message.usage = MagicMock()
mock_message.usage.input_tokens = 100
mock_message.usage.output_tokens = 50
mock_client.messages.create = AsyncMock(return_value=mock_message)

test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
response = await process_request(sample_request, mock_client, test_prompt, noop_metrics)
assert response.status == "success"
assert response.explanation == "This is a markdown explanation"
assert response.structuredExplanation is None

@pytest.mark.asyncio
async def test_markdown_format_no_output_config(self, sample_request, noop_metrics):
"""Default markdown format does not send output_config."""
mock_client = MagicMock()
mock_message = MagicMock()
mock_content = MagicMock()
mock_content.text = "Explanation"
mock_message.content = [mock_content]
mock_message.usage = MagicMock()
mock_message.usage.input_tokens = 100
mock_message.usage.output_tokens = 50
mock_client.messages.create = AsyncMock(return_value=mock_message)

test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
await process_request(sample_request, mock_client, test_prompt, noop_metrics)
_args, kwargs = mock_client.messages.create.call_args
assert "output_config" not in kwargs