Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 66 additions & 48 deletions src/eve_mcp/server/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

logger = logging.getLogger(__name__)

JSON_FENCE_MARKER = "```json"


class NoTagFoundError(Exception):
"""Exception to raise when no tag is found."""
Expand All @@ -33,12 +35,14 @@ def extract_xml_tag(text, tag):


def extract_tag(text, tag):
"""Extract tag from text."""
pattern = rf"```{tag}(.*)```"
"""Extract the first ```{tag} ... ``` markdown fence from text.

Uses a non-greedy match so that subsequent fences in the same text
are not consumed.
"""
pattern = rf"```{tag}(.*?)```"
if match := re.search(pattern, text, flags=re.DOTALL):
# extract the graph definition
content = match.group(1)
return content
return match.group(1)

raise NoTagFoundError(f"no {tag} found in genai response")

Expand Down Expand Up @@ -206,34 +210,31 @@ async def _extract_factuality_issues(question: str, python_code: str) -> str:
{question}
</QUESTION>

Your task is to analyze the Google Earth Engine Python code below and extract what aspects
or issues are making scientific or data assumptions either explicitly or implicitly and might require
factual verification.
Your task is to analyze the Google Earth Engine Python code below and extract aspects
or issues that are making scientific or data assumptions, either explicitly or implicitly,
and that might require factual verification.

<PYTHON_CODE>
{python_code}
</PYTHON_CODE>

Your response must be a list of json structures, each one describing a specific aspect you identify
and containing the following fields:
[
{{
"title": "A short title describing the aspect or assumption",
"description": "A detailed description of the aspect or assumption, "
"why it might require factual verification",
"facts": "Data, information, constants or facts to be verified",
"question_for_expert": "The question that should be posed to an expert "
"to verify the aspect or assumption"
}}
{{ ... more issues ...}}
]
Wrap your response in a ```json fenced code block containing a JSON array. Each
array element describes one aspect, with these fields:

```json
[
{{
"title": "A short title describing the aspect or assumption",
"description": "Why this aspect might require factual verification",
"facts": "Data, information, constants or facts to be verified",
"question_for_expert": "The question to pose to an expert"
}}
]
```
"""
r = await _query_eve(prompt)
r = json.loads(r)
issues = extract_tag(r["answer"], "json")
r.update({"issues": issues})
return json.dumps(r)
payload = json.loads(await _query_eve(prompt))
payload["issues"] = extract_tag(payload["answer"], "json")
return json.dumps(payload)


async def _assess_factuality_issue( # pylint: disable=too-many-positional-arguments
Expand All @@ -245,7 +246,7 @@ async def _assess_factuality_issue( # pylint: disable=too-many-positional-argum
issue_question_for_expert: str,
) -> str:
prompt = f"""
I am trying solve the following Earth Observation question
I am trying to solve the following Earth Observation question

<EO_QUESTION>
{question}
Expand All @@ -271,37 +272,54 @@ async def _assess_factuality_issue( # pylint: disable=too-many-positional-argum
{issue_facts}
</ISSUE_FACTS>

You task, as an Earth Observation expert, is to answer the following question
Your task, as an Earth Observation expert, is to answer the following question

{issue_question_for_expert}

Express your asessment as free Markdown text.
Express your assessment as free Markdown text.

If you have recommendations to fix or update the code, add a json string within
an xml tag <CODE_RECOMMENDATIONS> with the following structure
If you have recommendations to fix or update the code, add a JSON object
inside an xml tag <CODE_RECOMMENDATIONS> with the following structure
(use double-quoted JSON, not single quotes):

<CODE_RECOMMENDATIONS>
{{
'recommendation_1_title': {{
'explanation': an explanation of the recommendation,
'code_snippet': a string with python code with the suggested code udpate
"recommendation_1_title": {{
"explanation": "an explanation of the recommendation",
"code_snippet": "a string with python code with the suggested update"
}},
'recommendation_2_title': {{
'explanation': an explanation of the recommendation,
'code_snippet': a string with python code with the suggested code udpate
"recommendation_2_title": {{
"explanation": "an explanation of the recommendation",
"code_snippet": "a string with python code with the suggested update"
}}
}}
</CODE_RECOMMENDATIONS>

If you have no recommendations, omit the <CODE_RECOMMENDATIONS> tag entirely.
"""

r = await _query_eve(prompt)
r = json.loads(r)
code_recommendations = extract_xml_tag(r["answer"], "CODE_RECOMMENDATIONS")
if (
"```json" # pylint: disable=magic-value-comparison
in code_recommendations
):
code_recommendations = extract_tag(code_recommendations, "json")
r["answer"] = r["answer"].replace(code_recommendations, "")
r.update({"code_recommendations": code_recommendations})
return json.dumps(r)
payload = json.loads(await _query_eve(prompt))
answer = payload["answer"]

code_recommendations = ""
try:
inner = extract_xml_tag(answer, "CODE_RECOMMENDATIONS")
except NoTagFoundError:
# Recommendations are optional per the prompt.
pass
else:
block = f"<CODE_RECOMMENDATIONS>{inner}</CODE_RECOMMENDATIONS>"
answer = answer.replace(block, "")
code_recommendations = inner
if JSON_FENCE_MARKER in code_recommendations:
try:
code_recommendations = extract_tag(
code_recommendations, "json"
)
except NoTagFoundError:
# Leave as-is if the inner fence is malformed.
pass

payload["answer"] = answer
payload["code_recommendations"] = code_recommendations
return json.dumps(payload)
104 changes: 77 additions & 27 deletions tests/test_server/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,19 @@ def test_raises_when_no_fence(helpers_mod):
with pytest.raises(helpers_mod.NoTagFoundError):
helpers_mod.extract_tag("plain text", "json")

@staticmethod
def test_does_not_span_multiple_fences(helpers_mod):
"""Non-greedy match stops at the first closing fence."""
text = (
'first ```json {"x": 1} ``` middle prose '
'```json {"y": 2} ``` last'
)
captured = helpers_mod.extract_tag(text, "json")
assert "first" not in captured
assert '"x": 1' in captured
assert "middle prose" not in captured
assert '"y": 2' not in captured
Comment thread
will-fawcett-trillium marked this conversation as resolved.


class TestGetEveClient:
"""Test the get_eve_client lazy singleton."""
Expand Down Expand Up @@ -260,59 +273,96 @@ async def test_extracts_json_block_from_answer(helpers_mod):
class TestAssessFactualityIssue:
"""Test the _assess_factuality_issue helper."""

@staticmethod
async def _run_assess(helpers_mod, answer):
"""Drive _assess_factuality_issue with a stubbed _query_eve."""
query_payload = json.dumps(
{"answer": answer, "sources": [], "conversation_id": "c"}
)
with patch(
f"{PKG}.helpers._query_eve",
new=AsyncMock(return_value=query_payload),
):
raw = await helpers_mod._assess_factuality_issue(
"Q?", "code", "t", "d", "f", "eq"
)
return json.loads(raw)

@pytest.mark.asyncio
@staticmethod
async def test_strips_recommendations_block_from_answer(
async def test_strips_full_recommendations_block_from_answer(
helpers_mod,
):
"""Removes <CODE_RECOMMENDATIONS> from answer; surfaces it
separately."""
"""Removes the entire <CODE_RECOMMENDATIONS>...</...> block,
tags included, from the answer body."""
recs = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}'
answer = (
"Markdown body. "
f"<CODE_RECOMMENDATIONS>{recs}</CODE_RECOMMENDATIONS>"
" trailer"
)
query_payload = json.dumps(
{"answer": answer, "sources": [], "conversation_id": "c"}
result = await TestAssessFactualityIssue._run_assess(
helpers_mod, answer
)
with patch(
f"{PKG}.helpers._query_eve",
new=AsyncMock(return_value=query_payload),
):
raw = await helpers_mod._assess_factuality_issue( # pylint: disable=protected-access
"Q?", "code", "t", "d", "f", "eq"
)
result = json.loads(raw)
assert "<CODE_RECOMMENDATIONS>" in result["answer"]
# The recs payload itself is removed from the answer body.
assert "<CODE_RECOMMENDATIONS>" not in result["answer"]
assert "</CODE_RECOMMENDATIONS>" not in result["answer"]
assert recs not in result["answer"]
assert "Markdown body." in result["answer"]
assert "trailer" in result["answer"]
assert result["code_recommendations"] == recs

@pytest.mark.asyncio
@staticmethod
async def test_unwraps_inner_json_fence_in_recommendations(
async def test_unwraps_inner_json_fence_and_cleans_answer(
helpers_mod,
):
"""If <CODE_RECOMMENDATIONS> wraps a ```json fence, it is unwrapped."""
"""If <CODE_RECOMMENDATIONS> wraps a ```json fence, the inner
JSON is unwrapped AND the wrapping tags + fence are removed
from the answer."""
inner = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}'
recs_with_fence = f"```json {inner} ```"
answer = (
"Body. "
f"<CODE_RECOMMENDATIONS>{recs_with_fence}</CODE_RECOMMENDATIONS>"
)
query_payload = json.dumps(
{"answer": answer, "sources": [], "conversation_id": "c"}
result = await TestAssessFactualityIssue._run_assess(
helpers_mod, answer
)
with patch(
f"{PKG}.helpers._query_eve",
new=AsyncMock(return_value=query_payload),
):
raw = await helpers_mod._assess_factuality_issue( # pylint: disable=protected-access
"Q?", "code", "t", "d", "f", "eq"
)
result = json.loads(raw)
assert inner in result["code_recommendations"]
assert "<CODE_RECOMMENDATIONS>" not in result["answer"]
assert "```json" not in result["answer"]
assert "Body." in result["answer"]

@pytest.mark.asyncio
@staticmethod
async def test_returns_empty_recommendations_when_tag_absent(
helpers_mod,
):
"""If the LLM omits <CODE_RECOMMENDATIONS>, the tool returns
an empty recommendations field rather than raising."""
answer = "Body of the assessment with no recommendations section."
result = await TestAssessFactualityIssue._run_assess(
helpers_mod, answer
)
assert result["code_recommendations"] == ""
assert result["answer"] == answer

@pytest.mark.asyncio
@staticmethod
async def test_keeps_malformed_inner_fence_as_is(
helpers_mod,
):
"""If the inner fence claims to be json but doesn't close,
leave the recommendations as the raw block content."""
broken = '```json {"r1": 1}' # missing closing fence
answer = (
"Body. " f"<CODE_RECOMMENDATIONS>{broken}</CODE_RECOMMENDATIONS>"
)
result = await TestAssessFactualityIssue._run_assess(
helpers_mod, answer
)
assert "```json" in result["code_recommendations"]
assert "<CODE_RECOMMENDATIONS>" not in result["answer"]


def test_helpers_module_importable():
Expand Down
Loading