compiler-explorer · mattgodbolt-molty · Feb 21, 2026 · Copilot · Feb 21, 2026 · Copilot
diff --git a/app/explain.py b/app/explain.py
@@ -3,7 +3,14 @@
 from anthropic import AsyncAnthropic
 
 from app.cache import CacheProvider, cache_response, get_cached_response
-from app.explain_api import CostBreakdown, ExplainRequest, ExplainResponse, TokenUsage
+from app.explain_api import (
+    CostBreakdown,
+    ExplainRequest,
+    ExplainResponse,
+    ExplanationFormat,
+    StructuredExplanation,
+    TokenUsage,
+)
 from app.metrics import MetricsProvider
 from app.model_costs import get_model_cost
 from app.prompt import Prompt
@@ -92,16 +99,42 @@ async def _call_anthropic_api(
     # Call Claude API
     LOGGER.info("Using Anthropic client with model: %s", {prompt_data["model"]})
 
-    message = await client.messages.create(
-        model=prompt_data["model"],
-        max_tokens=prompt_data["max_tokens"],
-        temperature=prompt_data["temperature"],
-        system=prompt_data["system"],
-        messages=prompt_data["messages"],
-    )
-
-    # Get explanation and strip leading/trailing whitespace
-    explanation = message.content[0].text.strip()
+    use_structured = body.format == ExplanationFormat.STRUCTURED
+
+    api_kwargs: dict = {
+        "model": prompt_data["model"],
+        "max_tokens": prompt_data["max_tokens"],
+        "temperature": prompt_data["temperature"],
+        "system": prompt_data["system"],
+        "messages": prompt_data["messages"],
+    }
+
+    if use_structured:
+        # For structured output: skip assistant prefill, add line indexing
+        # hint, and use output_config with JSON schema
+        api_kwargs["messages"] = [prompt_data["messages"][0]]  # user only
+        api_kwargs["system"] += (
+            "\n\nThe assembly listing is 0-indexed. Reference specific line ranges in your response."
+        )
+        api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
+        api_kwargs["output_config"] = {
+            "format": {
+                "type": "json_schema",
+                "schema": StructuredExplanation.model_json_schema(),
-        api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
-        api_kwargs["output_config"] = {
-            "format": {
-                "type": "json_schema",
-                "schema": StructuredExplanation.model_json_schema(),
+        # Ensure enough tokens for JSON overhead in structured output. This may increase
+        # token usage and cost compared to markdown responses with the same prompt.
+        if prompt_data["max_tokens"] < 2048:
+            LOGGER.info(
+                "Structured output: increasing max_tokens from %s to 2048 to account for JSON "
+                "overhead. This may increase token usage and cost versus markdown output.",
+                prompt_data["max_tokens"],
+            )
+        api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
+        api_kwargs["output_config"] = {
+            "format": {
+                "type": "json_schema",
+                    "schema": StructuredExplanation.model_json_schema(),
-        api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
-        api_kwargs["output_config"] = {
-            "format": {
-                "type": "json_schema",
-                "schema": StructuredExplanation.model_json_schema(),
+        # Ensure enough tokens for JSON overhead in structured output. This may increase
+        # token usage and cost compared to markdown responses with the same prompt.
+        if prompt_data["max_tokens"] < 2048:
+            LOGGER.info(
+                "Structured output: increasing max_tokens from %s to 2048 to account for JSON "
+                "overhead. This may increase token usage and cost versus markdown output.",
+                prompt_data["max_tokens"],
+            )
+        api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048)
+        api_kwargs["output_config"] = {
+            "format": {
+                "type": "json_schema",
+                    "schema": StructuredExplanation.model_json_schema(),
+            }
+        }
+
+    message = await client.messages.create(**api_kwargs)
+
+    # Parse response based on format
+    raw_text = message.content[0].text.strip()
+    explanation_text: str | None = None
+    structured: StructuredExplanation | None = None
+
+    if use_structured:
+        structured = StructuredExplanation.model_validate_json(raw_text)
+    else:
+        explanation_text = raw_text
 
     # Extract usage information
     input_tokens = message.usage.input_tokens
@@ -130,7 +163,8 @@ async def _call_anthropic_api(
     # Create and return ExplainResponse object
     return ExplainResponse(
         status="success",
-        explanation=explanation,
+        explanation=explanation_text,
+        structuredExplanation=structured,
         model=prompt_data["model"],
         usage=TokenUsage(
             inputTokens=input_tokens,

diff --git a/app/explain_api.py b/app/explain_api.py
@@ -4,6 +4,8 @@
 in claude_explain.md.
 """
 
+from enum import Enum
+
 from pydantic import BaseModel, Field
 
 from app.explanation_types import AudienceLevel, ExplanationType
@@ -40,6 +42,13 @@ class AssemblyItem(BaseModel):
     isOmissionMarker: bool | None = None  # Added for truncated assembly
 
 
+class ExplanationFormat(str, Enum):
+    """Output format for explanations."""
+
+    MARKDOWN = "markdown"
+    STRUCTURED = "structured"
+
+
 class ExplainRequest(BaseModel):
     """Request body for the Claude Explain API."""
 
@@ -56,6 +65,10 @@ class ExplainRequest(BaseModel):
     explanation: ExplanationType = Field(
         default=ExplanationType.ASSEMBLY, description="Type of explanation to generate"
     )
+    format: ExplanationFormat = Field(
+        default=ExplanationFormat.MARKDOWN,
+        description="Output format: 'markdown' (default) or 'structured' (JSON with assembly line mappings)",
+    )
     bypassCache: bool = Field(default=False, description="If true, skip reading from cache but still write to cache")
 
     @property
@@ -64,6 +77,27 @@ def instruction_set_with_default(self) -> str:
         return self.instructionSet or "unknown"
 
 
+class ExplanationSection(BaseModel):
+    """A section of a structured explanation, mapped to assembly lines."""
+
+    model_config = {"json_schema_extra": {"additionalProperties": False}}
+
+    title: str = Field(..., description="Section heading")
+    asmStartLine: int = Field(..., description="0-indexed start line in the assembly listing")
+    asmEndLine: int = Field(..., description="0-indexed end line (inclusive) in the assembly listing")
+    content: str = Field(..., description="Explanation of this group of instructions (markdown)")
+
+
+class StructuredExplanation(BaseModel):
+    """Structured explanation with assembly line mappings."""
+
+    model_config = {"json_schema_extra": {"additionalProperties": False}}
+
+    summary: str = Field(..., description="One-sentence overview of what the compiler did")
+    sections: list[ExplanationSection] = Field(..., description="Explanation sections mapped to assembly lines")
-    sections: list[ExplanationSection] = Field(..., description="Explanation sections mapped to assembly lines")
+    sections: list[ExplanationSection] = Field(
+        ..., min_length=1, description="Explanation sections mapped to assembly lines"
+    )
-    sections: list[ExplanationSection] = Field(..., description="Explanation sections mapped to assembly lines")
+    sections: list[ExplanationSection] = Field(
+        ..., min_length=1, description="Explanation sections mapped to assembly lines"
+    )
+    keyInsight: str = Field(..., description="The single most important takeaway")
+
+
 class TokenUsage(BaseModel):
     """Token usage information."""
 
@@ -83,7 +117,10 @@ class CostBreakdown(BaseModel):
 class ExplainResponse(BaseModel):
     """Response from the Claude Explain API."""
 
-    explanation: str | None = Field(None, description="The generated explanation")
+    explanation: str | None = Field(None, description="The generated explanation (markdown format)")
+    structuredExplanation: StructuredExplanation | None = Field(
+        None, description="Structured explanation with assembly line mappings (structured format)"
+    )
     status: str = Field(..., description="'success' or 'error'")
     message: str | None = Field(None, description="Error message (only present on error)")
     model: str | None = Field(None, description="The Claude model used")

diff --git a/app/test_explain.py b/app/test_explain.py
@@ -507,3 +507,103 @@ def test_legitimate_document_validation(self):
         assert request.asm[2].source.file is None
         assert request.asm[2].source.line == 0
         assert request.asm[2].source.column is None  # This was the missing field
+
+
+class TestStructuredOutput:
+    """Test structured output format."""
+
+    @pytest.fixture
+    def structured_mock_client(self):
+        """Create a mock that returns structured JSON."""
+        mock_client = MagicMock()
+        mock_message = MagicMock()
+        mock_content = MagicMock()
+        mock_content.text = json.dumps(
+            {
+                "summary": "A simple square function",
+                "sections": [
+                    {
+                        "title": "Multiply",
+                        "asmStartLine": 1,
+                        "asmEndLine": 1,
+                        "content": "Multiplies edi by itself",
+                    },
+                    {
+                        "title": "Return",
+                        "asmStartLine": 2,
+                        "asmEndLine": 3,
+                        "content": "Moves result to eax and returns",
+                    },
+                ],
+                "keyInsight": "The function is just three instructions",
+            }
+        )
+        mock_message.content = [mock_content]
+        mock_message.usage = MagicMock()
+        mock_message.usage.input_tokens = 100
+        mock_message.usage.output_tokens = 80
+        mock_client.messages.create = AsyncMock(return_value=mock_message)
+        return mock_client
+
+    @pytest.mark.asyncio
+    async def test_structured_output_returns_structured_explanation(
+        self, sample_request, structured_mock_client, noop_metrics
+    ):
+        """Structured format returns structuredExplanation, not explanation."""
+        sample_request.format = "structured"
+        test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
+        response = await process_request(sample_request, structured_mock_client, test_prompt, noop_metrics)
+        assert response.status == "success"
+        assert response.structuredExplanation is not None
+        assert response.explanation is None
+        assert response.structuredExplanation.summary == "A simple square function"
+        assert len(response.structuredExplanation.sections) == 2
+        assert response.structuredExplanation.sections[0].asmStartLine == 1
+        assert response.structuredExplanation.keyInsight == "The function is just three instructions"
+
+    @pytest.mark.asyncio
+    async def test_structured_output_uses_output_config(self, sample_request, structured_mock_client, noop_metrics):
+        """Structured format passes output_config to the API."""
+        sample_request.format = "structured"
+        test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
+        await process_request(sample_request, structured_mock_client, test_prompt, noop_metrics)
+        _args, kwargs = structured_mock_client.messages.create.call_args
+        assert "output_config" in kwargs
+        assert kwargs["output_config"]["format"]["type"] == "json_schema"
+
+    @pytest.mark.asyncio
+    async def test_markdown_format_unchanged(self, sample_request, noop_metrics):
+        """Default markdown format still works as before."""
+        mock_client = MagicMock()
+        mock_message = MagicMock()
+        mock_content = MagicMock()
+        mock_content.text = "This is a markdown explanation"
+        mock_message.content = [mock_content]
+        mock_message.usage = MagicMock()
+        mock_message.usage.input_tokens = 100
+        mock_message.usage.output_tokens = 50
+        mock_client.messages.create = AsyncMock(return_value=mock_message)
+
+        test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
+        response = await process_request(sample_request, mock_client, test_prompt, noop_metrics)
+        assert response.status == "success"
+        assert response.explanation == "This is a markdown explanation"
+        assert response.structuredExplanation is None
+
+    @pytest.mark.asyncio
+    async def test_markdown_format_no_output_config(self, sample_request, noop_metrics):
+        """Default markdown format does not send output_config."""
+        mock_client = MagicMock()
+        mock_message = MagicMock()
+        mock_content = MagicMock()
+        mock_content.text = "Explanation"
+        mock_message.content = [mock_content]
+        mock_message.usage = MagicMock()
+        mock_message.usage.input_tokens = 100
+        mock_message.usage.output_tokens = 50
+        mock_client.messages.create = AsyncMock(return_value=mock_message)
+
+        test_prompt = Prompt(Path(__file__).parent / "prompt.yaml")
+        await process_request(sample_request, mock_client, test_prompt, noop_metrics)
+        _args, kwargs = mock_client.messages.create.call_args
+        assert "output_config" not in kwargs