-
Notifications
You must be signed in to change notification settings - Fork 3
Add structured output format with assembly line mappings #14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,14 @@ | |
| from anthropic import AsyncAnthropic | ||
|
|
||
| from app.cache import CacheProvider, cache_response, get_cached_response | ||
| from app.explain_api import CostBreakdown, ExplainRequest, ExplainResponse, TokenUsage | ||
| from app.explain_api import ( | ||
| CostBreakdown, | ||
| ExplainRequest, | ||
| ExplainResponse, | ||
| ExplanationFormat, | ||
| StructuredExplanation, | ||
| TokenUsage, | ||
| ) | ||
| from app.metrics import MetricsProvider | ||
| from app.model_costs import get_model_cost | ||
| from app.prompt import Prompt | ||
|
|
@@ -92,16 +99,42 @@ async def _call_anthropic_api( | |
| # Call Claude API | ||
| LOGGER.info("Using Anthropic client with model: %s", {prompt_data["model"]}) | ||
|
|
||
| message = await client.messages.create( | ||
| model=prompt_data["model"], | ||
| max_tokens=prompt_data["max_tokens"], | ||
| temperature=prompt_data["temperature"], | ||
| system=prompt_data["system"], | ||
| messages=prompt_data["messages"], | ||
| ) | ||
|
|
||
| # Get explanation and strip leading/trailing whitespace | ||
| explanation = message.content[0].text.strip() | ||
| use_structured = body.format == ExplanationFormat.STRUCTURED | ||
|
|
||
| api_kwargs: dict = { | ||
| "model": prompt_data["model"], | ||
| "max_tokens": prompt_data["max_tokens"], | ||
| "temperature": prompt_data["temperature"], | ||
| "system": prompt_data["system"], | ||
| "messages": prompt_data["messages"], | ||
| } | ||
|
|
||
| if use_structured: | ||
| # For structured output: skip assistant prefill, add line indexing | ||
| # hint, and use output_config with JSON schema | ||
| api_kwargs["messages"] = [prompt_data["messages"][0]] # user only | ||
| api_kwargs["system"] += ( | ||
| "\n\nThe assembly listing is 0-indexed. Reference specific line ranges in your response." | ||
| ) | ||
| api_kwargs["max_tokens"] = max(prompt_data["max_tokens"], 2048) | ||
| api_kwargs["output_config"] = { | ||
| "format": { | ||
| "type": "json_schema", | ||
| "schema": StructuredExplanation.model_json_schema(), | ||
| } | ||
| } | ||
|
|
||
| message = await client.messages.create(**api_kwargs) | ||
|
|
||
| # Parse response based on format | ||
| raw_text = message.content[0].text.strip() | ||
| explanation_text: str | None = None | ||
| structured: StructuredExplanation | None = None | ||
|
|
||
| if use_structured: | ||
| structured = StructuredExplanation.model_validate_json(raw_text) | ||
|
||
| else: | ||
| explanation_text = raw_text | ||
|
|
||
| # Extract usage information | ||
| input_tokens = message.usage.input_tokens | ||
|
|
@@ -130,7 +163,8 @@ async def _call_anthropic_api( | |
| # Create and return ExplainResponse object | ||
| return ExplainResponse( | ||
| status="success", | ||
| explanation=explanation, | ||
| explanation=explanation_text, | ||
| structuredExplanation=structured, | ||
| model=prompt_data["model"], | ||
| usage=TokenUsage( | ||
| inputTokens=input_tokens, | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -4,6 +4,8 @@ | |||||||||
| in claude_explain.md. | ||||||||||
| """ | ||||||||||
|
|
||||||||||
| from enum import Enum | ||||||||||
|
|
||||||||||
| from pydantic import BaseModel, Field | ||||||||||
|
|
||||||||||
| from app.explanation_types import AudienceLevel, ExplanationType | ||||||||||
|
|
@@ -40,6 +42,13 @@ class AssemblyItem(BaseModel): | |||||||||
| isOmissionMarker: bool | None = None # Added for truncated assembly | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class ExplanationFormat(str, Enum): | ||||||||||
| """Output format for explanations.""" | ||||||||||
|
|
||||||||||
| MARKDOWN = "markdown" | ||||||||||
| STRUCTURED = "structured" | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class ExplainRequest(BaseModel): | ||||||||||
| """Request body for the Claude Explain API.""" | ||||||||||
|
|
||||||||||
|
|
@@ -56,6 +65,10 @@ class ExplainRequest(BaseModel): | |||||||||
| explanation: ExplanationType = Field( | ||||||||||
| default=ExplanationType.ASSEMBLY, description="Type of explanation to generate" | ||||||||||
| ) | ||||||||||
| format: ExplanationFormat = Field( | ||||||||||
| default=ExplanationFormat.MARKDOWN, | ||||||||||
| description="Output format: 'markdown' (default) or 'structured' (JSON with assembly line mappings)", | ||||||||||
| ) | ||||||||||
| bypassCache: bool = Field(default=False, description="If true, skip reading from cache but still write to cache") | ||||||||||
|
|
||||||||||
| @property | ||||||||||
|
|
@@ -64,6 +77,27 @@ def instruction_set_with_default(self) -> str: | |||||||||
| return self.instructionSet or "unknown" | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class ExplanationSection(BaseModel): | ||||||||||
| """A section of a structured explanation, mapped to assembly lines.""" | ||||||||||
|
|
||||||||||
| model_config = {"json_schema_extra": {"additionalProperties": False}} | ||||||||||
|
||||||||||
|
|
||||||||||
| title: str = Field(..., description="Section heading") | ||||||||||
| asmStartLine: int = Field(..., description="0-indexed start line in the assembly listing") | ||||||||||
| asmEndLine: int = Field(..., description="0-indexed end line (inclusive) in the assembly listing") | ||||||||||
|
Comment on lines
+86
to
+87
|
||||||||||
| content: str = Field(..., description="Explanation of this group of instructions (markdown)") | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class StructuredExplanation(BaseModel): | ||||||||||
| """Structured explanation with assembly line mappings.""" | ||||||||||
|
|
||||||||||
| model_config = {"json_schema_extra": {"additionalProperties": False}} | ||||||||||
|
||||||||||
|
|
||||||||||
| summary: str = Field(..., description="One-sentence overview of what the compiler did") | ||||||||||
| sections: list[ExplanationSection] = Field(..., description="Explanation sections mapped to assembly lines") | ||||||||||
|
||||||||||
| sections: list[ExplanationSection] = Field(..., description="Explanation sections mapped to assembly lines") | |
| sections: list[ExplanationSection] = Field( | |
| ..., min_length=1, description="Explanation sections mapped to assembly lines" | |
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The max_tokens is increased to a minimum of 2048 for structured output to account for JSON overhead. While this is reasonable, it means structured format requests may consume more tokens (and cost more) than markdown requests with the same prompt configuration. Consider documenting this behavior in the API documentation or PR description so users are aware of the potential cost difference.