diff --git a/lib/eve-api b/lib/eve-api
index 1f8de11..57265a8 160000
--- a/lib/eve-api
+++ b/lib/eve-api
@@ -1 +1 @@
-Subproject commit 1f8de11795beff29597416a1ce2dde2a7ed66fa5
+Subproject commit 57265a8ebbe6cb4c1f826a33f84ad5487bbc427c
diff --git a/src/eve_mcp/server/helpers.py b/src/eve_mcp/server/helpers.py
index 018bc95..252ca7e 100644
--- a/src/eve_mcp/server/helpers.py
+++ b/src/eve_mcp/server/helpers.py
@@ -17,6 +17,8 @@
logger = logging.getLogger(__name__)
+JSON_FENCE_MARKER = "```json"
+
class NoTagFoundError(Exception):
"""Exception to raise when no tag is found."""
@@ -33,12 +35,14 @@ def extract_xml_tag(text, tag):
def extract_tag(text, tag):
- """Extract tag from text."""
- pattern = rf"```{tag}(.*)```"
+ """Extract the first ```{tag} ... ``` markdown fence from text.
+
+ Uses a non-greedy match so that subsequent fences in the same text
+ are not consumed.
+ """
+ pattern = rf"```{tag}(.*?)```"
if match := re.search(pattern, text, flags=re.DOTALL):
- # extract the graph definition
- content = match.group(1)
- return content
+ return match.group(1)
raise NoTagFoundError(f"no {tag} found in genai response")
@@ -206,34 +210,31 @@ async def _extract_factuality_issues(question: str, python_code: str) -> str:
{question}
- Your task is to analyze the Google Earth Engine Python code below and extract what aspects
- or issues are making scientific or data assumptions either explicitly or implicitly and might require
- factual verification.
+ Your task is to analyze the Google Earth Engine Python code below and extract aspects
+ or issues that are making scientific or data assumptions, either explicitly or implicitly,
+ and that might require factual verification.
{python_code}
- Your response must be a list of json structures, each one describing a specific aspect you identify
- and containing the following fields:
- [
- {{
- "title": "A short title describing the aspect or assumption",
- "description": "A detailed description of the aspect or assumption, "
- "why it might require factual verification",
- "facts": "Data, information, constants or facts to be verified",
- "question_for_expert": "The question that should be posed to an expert "
- "to verify the aspect or assumption"
- }}
- {{ ... more issues ...}}
- ]
+ Wrap your response in a ```json fenced code block containing a JSON array. Each
+ array element describes one aspect, with these fields:
+ ```json
+ [
+ {{
+ "title": "A short title describing the aspect or assumption",
+ "description": "Why this aspect might require factual verification",
+ "facts": "Data, information, constants or facts to be verified",
+ "question_for_expert": "The question to pose to an expert"
+ }}
+ ]
+ ```
"""
- r = await _query_eve(prompt)
- r = json.loads(r)
- issues = extract_tag(r["answer"], "json")
- r.update({"issues": issues})
- return json.dumps(r)
+ payload = json.loads(await _query_eve(prompt))
+ payload["issues"] = extract_tag(payload["answer"], "json")
+ return json.dumps(payload)
async def _assess_factuality_issue( # pylint: disable=too-many-positional-arguments
@@ -245,7 +246,7 @@ async def _assess_factuality_issue( # pylint: disable=too-many-positional-argum
issue_question_for_expert: str,
) -> str:
prompt = f"""
- I am trying solve the following Earth Observation question
+ I am trying to solve the following Earth Observation question
{question}
@@ -271,37 +272,54 @@ async def _assess_factuality_issue( # pylint: disable=too-many-positional-argum
{issue_facts}
- You task, as an Earth Observation expert, is to answer the following question
+ Your task, as an Earth Observation expert, is to answer the following question
{issue_question_for_expert}
- Express your asessment as free Markdown text.
+ Express your assessment as free Markdown text.
- If you have recommendations to fix or update the code, add a json string within
- an xml tag with the following structure
+ If you have recommendations to fix or update the code, add a JSON object
+ inside an xml tag with the following structure
+ (use double-quoted JSON, not single quotes):
{{
- 'recommendation_1_title': {{
- 'explanation': an explanation of the recommendation,
- 'code_snippet': a string with python code with the suggested code udpate
+ "recommendation_1_title": {{
+ "explanation": "an explanation of the recommendation",
+ "code_snippet": "a string with python code with the suggested update"
}},
- 'recommendation_2_title': {{
- 'explanation': an explanation of the recommendation,
- 'code_snippet': a string with python code with the suggested code udpate
+ "recommendation_2_title": {{
+ "explanation": "an explanation of the recommendation",
+ "code_snippet": "a string with python code with the suggested update"
}}
}}
+
+ If you have no recommendations, omit the tag entirely.
"""
- r = await _query_eve(prompt)
- r = json.loads(r)
- code_recommendations = extract_xml_tag(r["answer"], "CODE_RECOMMENDATIONS")
- if (
- "```json" # pylint: disable=magic-value-comparison
- in code_recommendations
- ):
- code_recommendations = extract_tag(code_recommendations, "json")
- r["answer"] = r["answer"].replace(code_recommendations, "")
- r.update({"code_recommendations": code_recommendations})
- return json.dumps(r)
+ payload = json.loads(await _query_eve(prompt))
+ answer = payload["answer"]
+
+ code_recommendations = ""
+ try:
+ inner = extract_xml_tag(answer, "CODE_RECOMMENDATIONS")
+ except NoTagFoundError:
+ # Recommendations are optional per the prompt.
+ pass
+ else:
+ block = f"{inner}"
+ answer = answer.replace(block, "")
+ code_recommendations = inner
+ if JSON_FENCE_MARKER in code_recommendations:
+ try:
+ code_recommendations = extract_tag(
+ code_recommendations, "json"
+ )
+ except NoTagFoundError:
+ # Leave as-is if the inner fence is malformed.
+ pass
+
+ payload["answer"] = answer
+ payload["code_recommendations"] = code_recommendations
+ return json.dumps(payload)
diff --git a/tests/test_server/test_helpers.py b/tests/test_server/test_helpers.py
index 47cb671..0d3d788 100644
--- a/tests/test_server/test_helpers.py
+++ b/tests/test_server/test_helpers.py
@@ -54,6 +54,19 @@ def test_raises_when_no_fence(helpers_mod):
with pytest.raises(helpers_mod.NoTagFoundError):
helpers_mod.extract_tag("plain text", "json")
+ @staticmethod
+ def test_does_not_span_multiple_fences(helpers_mod):
+ """Non-greedy match stops at the first closing fence."""
+ text = (
+ 'first ```json {"x": 1} ``` middle prose '
+ '```json {"y": 2} ``` last'
+ )
+ captured = helpers_mod.extract_tag(text, "json")
+ assert "first" not in captured
+ assert '"x": 1' in captured
+ assert "middle prose" not in captured
+ assert '"y": 2' not in captured
+
class TestGetEveClient:
"""Test the get_eve_client lazy singleton."""
@@ -260,59 +273,96 @@ async def test_extracts_json_block_from_answer(helpers_mod):
class TestAssessFactualityIssue:
"""Test the _assess_factuality_issue helper."""
+ @staticmethod
+ async def _run_assess(helpers_mod, answer):
+ """Drive _assess_factuality_issue with a stubbed _query_eve."""
+ query_payload = json.dumps(
+ {"answer": answer, "sources": [], "conversation_id": "c"}
+ )
+ with patch(
+ f"{PKG}.helpers._query_eve",
+ new=AsyncMock(return_value=query_payload),
+ ):
+ raw = await helpers_mod._assess_factuality_issue(
+ "Q?", "code", "t", "d", "f", "eq"
+ )
+ return json.loads(raw)
+
@pytest.mark.asyncio
@staticmethod
- async def test_strips_recommendations_block_from_answer(
+ async def test_strips_full_recommendations_block_from_answer(
helpers_mod,
):
- """Removes from answer; surfaces it
- separately."""
+ """Removes the entire ... block,
+ tags included, from the answer body."""
recs = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}'
answer = (
"Markdown body. "
f"{recs}"
" trailer"
)
- query_payload = json.dumps(
- {"answer": answer, "sources": [], "conversation_id": "c"}
+ result = await TestAssessFactualityIssue._run_assess(
+ helpers_mod, answer
)
- with patch(
- f"{PKG}.helpers._query_eve",
- new=AsyncMock(return_value=query_payload),
- ):
- raw = await helpers_mod._assess_factuality_issue( # pylint: disable=protected-access
- "Q?", "code", "t", "d", "f", "eq"
- )
- result = json.loads(raw)
- assert "" in result["answer"]
- # The recs payload itself is removed from the answer body.
+ assert "" not in result["answer"]
+ assert "" not in result["answer"]
assert recs not in result["answer"]
+ assert "Markdown body." in result["answer"]
+ assert "trailer" in result["answer"]
assert result["code_recommendations"] == recs
@pytest.mark.asyncio
@staticmethod
- async def test_unwraps_inner_json_fence_in_recommendations(
+ async def test_unwraps_inner_json_fence_and_cleans_answer(
helpers_mod,
):
- """If wraps a ```json fence, it is unwrapped."""
+ """If wraps a ```json fence, the inner
+ JSON is unwrapped AND the wrapping tags + fence are removed
+ from the answer."""
inner = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}'
recs_with_fence = f"```json {inner} ```"
answer = (
"Body. "
f"{recs_with_fence}"
)
- query_payload = json.dumps(
- {"answer": answer, "sources": [], "conversation_id": "c"}
+ result = await TestAssessFactualityIssue._run_assess(
+ helpers_mod, answer
)
- with patch(
- f"{PKG}.helpers._query_eve",
- new=AsyncMock(return_value=query_payload),
- ):
- raw = await helpers_mod._assess_factuality_issue( # pylint: disable=protected-access
- "Q?", "code", "t", "d", "f", "eq"
- )
- result = json.loads(raw)
assert inner in result["code_recommendations"]
+ assert "" not in result["answer"]
+ assert "```json" not in result["answer"]
+ assert "Body." in result["answer"]
+
+ @pytest.mark.asyncio
+ @staticmethod
+ async def test_returns_empty_recommendations_when_tag_absent(
+ helpers_mod,
+ ):
+ """If the LLM omits , the tool returns
+ an empty recommendations field rather than raising."""
+ answer = "Body of the assessment with no recommendations section."
+ result = await TestAssessFactualityIssue._run_assess(
+ helpers_mod, answer
+ )
+ assert result["code_recommendations"] == ""
+ assert result["answer"] == answer
+
+ @pytest.mark.asyncio
+ @staticmethod
+ async def test_keeps_malformed_inner_fence_as_is(
+ helpers_mod,
+ ):
+ """If the inner fence claims to be json but doesn't close,
+ leave the recommendations as the raw block content."""
+ broken = '```json {"r1": 1}' # missing closing fence
+ answer = (
+ "Body. " f"{broken}"
+ )
+ result = await TestAssessFactualityIssue._run_assess(
+ helpers_mod, answer
+ )
+ assert "```json" in result["code_recommendations"]
+ assert "" not in result["answer"]
def test_helpers_module_importable():