Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion verifiers/envs/experimental/rlm_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,15 +664,43 @@ async def _call_sub_tool(
"tool_call_id": tool_call_id,
}

def _normalize_message_content(self, messages: list[dict]) -> list[dict]:
"""Normalize message content fields to formats the API accepts.

The API expects content to be: string, array of objects, or None.
Handles several malformed cases:
1. Content is a nested message dict (has 'role' and 'content' keys) - extract inner content
2. Content is a content part object (has 'type' key) - wrap in array
"""
normalized = []
for msg in messages:
msg_copy = dict(msg)
content = msg_copy.get("content")

if content is not None and isinstance(content, dict):
# Check if content is a nested message dict (has 'role' and 'content' keys)
# This happens when model passes message dicts to llm_batch instead of strings
if "role" in content and "content" in content:
msg_copy["content"] = content["content"]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nested content extraction skips further normalization checks

When extracting inner content from a nested message dict (one with both role and content keys), the extracted content["content"] is assigned directly without further normalization. If the inner content is itself a malformed dict (e.g., a content part object with type key, or another nested message), it won't be wrapped in an array or recursively normalized. This means the final content could still be an invalid bare dict, violating the stated invariant that the API expects content to be a string, array of objects, or None.

Fix in Cursor Fix in Web

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems overly defensive, I've never seen that happen. Also, maybe the models just shouldn't nest too deep, and failing on this edgecase is fine.

elif "type" in content:
# Content part object (e.g. {"type": "text", "text": "..."}) - wrap in array
msg_copy["content"] = [content]
else:
# Unknown dict structure - try wrapping in array as fallback
msg_copy["content"] = [content]
normalized.append(msg_copy)
return normalized

async def _call_sub_llm_api(
self, client: Any, model: str, messages: list[dict], tools: list | None = None
) -> Any | None:
"""Make a single sub-LLM API call with timeout. Returns None on timeout."""
normalized_messages = self._normalize_message_content(messages)
try:
return await asyncio.wait_for(
client.chat.completions.create(
model=model,
messages=messages,
messages=normalized_messages,
tools=tools,
logprobs=self._sub_llm_supports_logprobs or None,
),
Expand Down
Loading