FrontierDevelopmentLab · will-fawcett-trillium · May 1, 2026 · Apr 28, 2026 · May 1, 2026 · May 1, 2026
diff --git a/lib/eve-api b/lib/eve-api
diff --git a/src/eve_mcp/server/helpers.py b/src/eve_mcp/server/helpers.py
@@ -17,6 +17,8 @@
 
 logger = logging.getLogger(__name__)
 
+JSON_FENCE_MARKER = "```json"
+
 
 class NoTagFoundError(Exception):
     """Exception to raise when no tag is found."""
@@ -33,12 +35,14 @@ def extract_xml_tag(text, tag):
 
 
 def extract_tag(text, tag):
-    """Extract tag from text."""
-    pattern = rf"```{tag}(.*)```"
+    """Extract the first ```{tag} ... ``` markdown fence from text.
+
+    Uses a non-greedy match so that subsequent fences in the same text
+    are not consumed.
+    """
+    pattern = rf"```{tag}(.*?)```"
     if match := re.search(pattern, text, flags=re.DOTALL):
-        # extract the graph definition
-        content = match.group(1)
-        return content
+        return match.group(1)
 
     raise NoTagFoundError(f"no {tag} found in genai response")
 
@@ -206,34 +210,31 @@ async def _extract_factuality_issues(question: str, python_code: str) -> str:
     {question}
     </QUESTION>
 
-    Your task is to analyze the Google Earth Engine Python code below and extract what aspects 
-    or issues are making scientific or data assumptions either explicitly or implicitly and might require 
-    factual verification.
+    Your task is to analyze the Google Earth Engine Python code below and extract aspects
+    or issues that are making scientific or data assumptions, either explicitly or implicitly,
+    and that might require factual verification.
 
     <PYTHON_CODE>
     {python_code}
     </PYTHON_CODE>
 
-    Your response must be a list of json structures, each one describing a specific aspect you identify
-    and containing the following fields:
-    [ 
-    {{
-        "title": "A short title describing the aspect or assumption",
-        "description": "A detailed description of the aspect or assumption, "
-            "why it might require factual verification",
-        "facts": "Data, information, constants or facts to be verified",
-        "question_for_expert": "The question that should be posed to an expert "
-            "to verify the aspect or assumption"
-    }}
-    {{ ... more issues ...}}
-    ]
+    Wrap your response in a ```json fenced code block containing a JSON array. Each
+    array element describes one aspect, with these fields:
 
+    ```json
+    [
+        {{
+            "title": "A short title describing the aspect or assumption",
+            "description": "Why this aspect might require factual verification",
+            "facts": "Data, information, constants or facts to be verified",
+            "question_for_expert": "The question to pose to an expert"
+        }}
+    ]
+    ```
     """
-    r = await _query_eve(prompt)
-    r = json.loads(r)
-    issues = extract_tag(r["answer"], "json")
-    r.update({"issues": issues})
-    return json.dumps(r)
+    payload = json.loads(await _query_eve(prompt))
+    payload["issues"] = extract_tag(payload["answer"], "json")
+    return json.dumps(payload)
 
 
 async def _assess_factuality_issue(  # pylint: disable=too-many-positional-arguments
@@ -245,7 +246,7 @@ async def _assess_factuality_issue(  # pylint: disable=too-many-positional-argum
     issue_question_for_expert: str,
 ) -> str:
     prompt = f"""
-    I am trying solve the following Earth Observation question
+    I am trying to solve the following Earth Observation question
 
     <EO_QUESTION>
     {question}
@@ -271,37 +272,54 @@ async def _assess_factuality_issue(  # pylint: disable=too-many-positional-argum
     {issue_facts}
     </ISSUE_FACTS>
 
-    You task, as an Earth Observation expert, is to answer the following question
+    Your task, as an Earth Observation expert, is to answer the following question
 
     {issue_question_for_expert}
 
-    Express your asessment as free Markdown text. 
+    Express your assessment as free Markdown text.
 
-    If you have recommendations to fix or update the code, add a json string within
-    an xml tag <CODE_RECOMMENDATIONS> with the following structure
+    If you have recommendations to fix or update the code, add a JSON object
+    inside an xml tag <CODE_RECOMMENDATIONS> with the following structure
+    (use double-quoted JSON, not single quotes):
 
     <CODE_RECOMMENDATIONS>
     {{
-        'recommendation_1_title': {{
-            'explanation': an explanation of the recommendation,
-            'code_snippet': a string with python code with the suggested code udpate
+        "recommendation_1_title": {{
+            "explanation": "an explanation of the recommendation",
+            "code_snippet": "a string with python code with the suggested update"
         }},
-        'recommendation_2_title': {{
-            'explanation': an explanation of the recommendation,
-            'code_snippet': a string with python code with the suggested code udpate
+        "recommendation_2_title": {{
+            "explanation": "an explanation of the recommendation",
+            "code_snippet": "a string with python code with the suggested update"
         }}
     }}
     </CODE_RECOMMENDATIONS>
+
+    If you have no recommendations, omit the <CODE_RECOMMENDATIONS> tag entirely.
     """
 
-    r = await _query_eve(prompt)
-    r = json.loads(r)
-    code_recommendations = extract_xml_tag(r["answer"], "CODE_RECOMMENDATIONS")
-    if (
-        "```json"  # pylint: disable=magic-value-comparison
-        in code_recommendations
-    ):
-        code_recommendations = extract_tag(code_recommendations, "json")
-    r["answer"] = r["answer"].replace(code_recommendations, "")
-    r.update({"code_recommendations": code_recommendations})
-    return json.dumps(r)
+    payload = json.loads(await _query_eve(prompt))
+    answer = payload["answer"]
+
+    code_recommendations = ""
+    try:
+        inner = extract_xml_tag(answer, "CODE_RECOMMENDATIONS")
+    except NoTagFoundError:
+        # Recommendations are optional per the prompt.
+        pass
+    else:
+        block = f"<CODE_RECOMMENDATIONS>{inner}</CODE_RECOMMENDATIONS>"
+        answer = answer.replace(block, "")
+        code_recommendations = inner
+        if JSON_FENCE_MARKER in code_recommendations:
+            try:
+                code_recommendations = extract_tag(
+                    code_recommendations, "json"
+                )
+            except NoTagFoundError:
+                # Leave as-is if the inner fence is malformed.
+                pass
+
+    payload["answer"] = answer
+    payload["code_recommendations"] = code_recommendations
+    return json.dumps(payload)
diff --git a/tests/test_server/test_helpers.py b/tests/test_server/test_helpers.py
@@ -54,6 +54,19 @@ def test_raises_when_no_fence(helpers_mod):
         with pytest.raises(helpers_mod.NoTagFoundError):
             helpers_mod.extract_tag("plain text", "json")
 
+    @staticmethod
+    def test_does_not_span_multiple_fences(helpers_mod):
+        """Non-greedy match stops at the first closing fence."""
+        text = (
+            'first ```json {"x": 1} ``` middle prose '
+            '```json {"y": 2} ``` last'
+        )
+        captured = helpers_mod.extract_tag(text, "json")
+        assert "first" not in captured
+        assert '"x": 1' in captured
+        assert "middle prose" not in captured
+        assert '"y": 2' not in captured
+
 
 class TestGetEveClient:
     """Test the get_eve_client lazy singleton."""
@@ -260,59 +273,96 @@ async def test_extracts_json_block_from_answer(helpers_mod):
 class TestAssessFactualityIssue:
     """Test the _assess_factuality_issue helper."""
 
+    @staticmethod
+    async def _run_assess(helpers_mod, answer):
+        """Drive _assess_factuality_issue with a stubbed _query_eve."""
+        query_payload = json.dumps(
+            {"answer": answer, "sources": [], "conversation_id": "c"}
+        )
+        with patch(
+            f"{PKG}.helpers._query_eve",
+            new=AsyncMock(return_value=query_payload),
+        ):
+            raw = await helpers_mod._assess_factuality_issue(
+                "Q?", "code", "t", "d", "f", "eq"
+            )
+        return json.loads(raw)
+
     @pytest.mark.asyncio
     @staticmethod
-    async def test_strips_recommendations_block_from_answer(
+    async def test_strips_full_recommendations_block_from_answer(
         helpers_mod,
     ):
-        """Removes <CODE_RECOMMENDATIONS> from answer; surfaces it
-        separately."""
+        """Removes the entire <CODE_RECOMMENDATIONS>...</...> block,
+        tags included, from the answer body."""
         recs = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}'
         answer = (
             "Markdown body. "
             f"<CODE_RECOMMENDATIONS>{recs}</CODE_RECOMMENDATIONS>"
             " trailer"
         )
-        query_payload = json.dumps(
-            {"answer": answer, "sources": [], "conversation_id": "c"}
+        result = await TestAssessFactualityIssue._run_assess(
+            helpers_mod, answer
         )
-        with patch(
-            f"{PKG}.helpers._query_eve",
-            new=AsyncMock(return_value=query_payload),
-        ):
-            raw = await helpers_mod._assess_factuality_issue(  # pylint: disable=protected-access
-                "Q?", "code", "t", "d", "f", "eq"
-            )
-        result = json.loads(raw)
-        assert "<CODE_RECOMMENDATIONS>" in result["answer"]
-        # The recs payload itself is removed from the answer body.
+        assert "<CODE_RECOMMENDATIONS>" not in result["answer"]
+        assert "</CODE_RECOMMENDATIONS>" not in result["answer"]
         assert recs not in result["answer"]
+        assert "Markdown body." in result["answer"]
+        assert "trailer" in result["answer"]
         assert result["code_recommendations"] == recs
 
     @pytest.mark.asyncio
     @staticmethod
-    async def test_unwraps_inner_json_fence_in_recommendations(
+    async def test_unwraps_inner_json_fence_and_cleans_answer(
         helpers_mod,
     ):
-        """If <CODE_RECOMMENDATIONS> wraps a ```json fence, it is unwrapped."""
+        """If <CODE_RECOMMENDATIONS> wraps a ```json fence, the inner
+        JSON is unwrapped AND the wrapping tags + fence are removed
+        from the answer."""
         inner = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}'
         recs_with_fence = f"```json {inner} ```"
         answer = (
             "Body. "
             f"<CODE_RECOMMENDATIONS>{recs_with_fence}</CODE_RECOMMENDATIONS>"
         )
-        query_payload = json.dumps(
-            {"answer": answer, "sources": [], "conversation_id": "c"}
+        result = await TestAssessFactualityIssue._run_assess(
+            helpers_mod, answer
         )
-        with patch(
-            f"{PKG}.helpers._query_eve",
-            new=AsyncMock(return_value=query_payload),
-        ):
-            raw = await helpers_mod._assess_factuality_issue(  # pylint: disable=protected-access
-                "Q?", "code", "t", "d", "f", "eq"
-            )
-        result = json.loads(raw)
         assert inner in result["code_recommendations"]
+        assert "<CODE_RECOMMENDATIONS>" not in result["answer"]
+        assert "```json" not in result["answer"]
+        assert "Body." in result["answer"]
+
+    @pytest.mark.asyncio
+    @staticmethod
+    async def test_returns_empty_recommendations_when_tag_absent(
+        helpers_mod,
+    ):
+        """If the LLM omits <CODE_RECOMMENDATIONS>, the tool returns
+        an empty recommendations field rather than raising."""
+        answer = "Body of the assessment with no recommendations section."
+        result = await TestAssessFactualityIssue._run_assess(
+            helpers_mod, answer
+        )
+        assert result["code_recommendations"] == ""
+        assert result["answer"] == answer
+
+    @pytest.mark.asyncio
+    @staticmethod
+    async def test_keeps_malformed_inner_fence_as_is(
+        helpers_mod,
+    ):
+        """If the inner fence claims to be json but doesn't close,
+        leave the recommendations as the raw block content."""
+        broken = '```json {"r1": 1}'  # missing closing fence
+        answer = (
+            "Body. " f"<CODE_RECOMMENDATIONS>{broken}</CODE_RECOMMENDATIONS>"
+        )
+        result = await TestAssessFactualityIssue._run_assess(
+            helpers_mod, answer
+        )
+        assert "```json" in result["code_recommendations"]
+        assert "<CODE_RECOMMENDATIONS>" not in result["answer"]
 
 
 def test_helpers_module_importable():
+11 −4		.github/workflows/main.yml
+1 −1		.pre-commit-config.yaml
+0 −2		.pylintrc
+64 −0		CONTRIBUTING.md
+21 −0		LICENSE
+75 −10		README.md
+21 −217		examples/tutorial.ipynb
+4 −139		poetry.lock
+9 −21		pyproject.toml
+0 −2		src/eve_api/__init__.py
+6 −6		src/eve_api/auth.py
+11 −12		src/eve_api/client.py
+15 −11		src/eve_api/exceptions.py
+0 −18		src/eve_api/response.py
+4 −2		tests/conftest.py
+17 −20		tests/test_auth.py
+79 −52		tests/test_client.py