diff --git a/lib/eve-api b/lib/eve-api index 1f8de11..57265a8 160000 --- a/lib/eve-api +++ b/lib/eve-api @@ -1 +1 @@ -Subproject commit 1f8de11795beff29597416a1ce2dde2a7ed66fa5 +Subproject commit 57265a8ebbe6cb4c1f826a33f84ad5487bbc427c diff --git a/src/eve_mcp/server/helpers.py b/src/eve_mcp/server/helpers.py index 018bc95..252ca7e 100644 --- a/src/eve_mcp/server/helpers.py +++ b/src/eve_mcp/server/helpers.py @@ -17,6 +17,8 @@ logger = logging.getLogger(__name__) +JSON_FENCE_MARKER = "```json" + class NoTagFoundError(Exception): """Exception to raise when no tag is found.""" @@ -33,12 +35,14 @@ def extract_xml_tag(text, tag): def extract_tag(text, tag): - """Extract tag from text.""" - pattern = rf"```{tag}(.*)```" + """Extract the first ```{tag} ... ``` markdown fence from text. + + Uses a non-greedy match so that subsequent fences in the same text + are not consumed. + """ + pattern = rf"```{tag}(.*?)```" if match := re.search(pattern, text, flags=re.DOTALL): - # extract the graph definition - content = match.group(1) - return content + return match.group(1) raise NoTagFoundError(f"no {tag} found in genai response") @@ -206,34 +210,31 @@ async def _extract_factuality_issues(question: str, python_code: str) -> str: {question} - Your task is to analyze the Google Earth Engine Python code below and extract what aspects - or issues are making scientific or data assumptions either explicitly or implicitly and might require - factual verification. + Your task is to analyze the Google Earth Engine Python code below and extract aspects + or issues that are making scientific or data assumptions, either explicitly or implicitly, + and that might require factual verification. {python_code} - Your response must be a list of json structures, each one describing a specific aspect you identify - and containing the following fields: - [ - {{ - "title": "A short title describing the aspect or assumption", - "description": "A detailed description of the aspect or assumption, " - "why it might require factual verification", - "facts": "Data, information, constants or facts to be verified", - "question_for_expert": "The question that should be posed to an expert " - "to verify the aspect or assumption" - }} - {{ ... more issues ...}} - ] + Wrap your response in a ```json fenced code block containing a JSON array. Each + array element describes one aspect, with these fields: + ```json + [ + {{ + "title": "A short title describing the aspect or assumption", + "description": "Why this aspect might require factual verification", + "facts": "Data, information, constants or facts to be verified", + "question_for_expert": "The question to pose to an expert" + }} + ] + ``` """ - r = await _query_eve(prompt) - r = json.loads(r) - issues = extract_tag(r["answer"], "json") - r.update({"issues": issues}) - return json.dumps(r) + payload = json.loads(await _query_eve(prompt)) + payload["issues"] = extract_tag(payload["answer"], "json") + return json.dumps(payload) async def _assess_factuality_issue( # pylint: disable=too-many-positional-arguments @@ -245,7 +246,7 @@ async def _assess_factuality_issue( # pylint: disable=too-many-positional-argum issue_question_for_expert: str, ) -> str: prompt = f""" - I am trying solve the following Earth Observation question + I am trying to solve the following Earth Observation question {question} @@ -271,37 +272,54 @@ async def _assess_factuality_issue( # pylint: disable=too-many-positional-argum {issue_facts} - You task, as an Earth Observation expert, is to answer the following question + Your task, as an Earth Observation expert, is to answer the following question {issue_question_for_expert} - Express your asessment as free Markdown text. + Express your assessment as free Markdown text. - If you have recommendations to fix or update the code, add a json string within - an xml tag with the following structure + If you have recommendations to fix or update the code, add a JSON object + inside an xml tag with the following structure + (use double-quoted JSON, not single quotes): {{ - 'recommendation_1_title': {{ - 'explanation': an explanation of the recommendation, - 'code_snippet': a string with python code with the suggested code udpate + "recommendation_1_title": {{ + "explanation": "an explanation of the recommendation", + "code_snippet": "a string with python code with the suggested update" }}, - 'recommendation_2_title': {{ - 'explanation': an explanation of the recommendation, - 'code_snippet': a string with python code with the suggested code udpate + "recommendation_2_title": {{ + "explanation": "an explanation of the recommendation", + "code_snippet": "a string with python code with the suggested update" }} }} + + If you have no recommendations, omit the tag entirely. """ - r = await _query_eve(prompt) - r = json.loads(r) - code_recommendations = extract_xml_tag(r["answer"], "CODE_RECOMMENDATIONS") - if ( - "```json" # pylint: disable=magic-value-comparison - in code_recommendations - ): - code_recommendations = extract_tag(code_recommendations, "json") - r["answer"] = r["answer"].replace(code_recommendations, "") - r.update({"code_recommendations": code_recommendations}) - return json.dumps(r) + payload = json.loads(await _query_eve(prompt)) + answer = payload["answer"] + + code_recommendations = "" + try: + inner = extract_xml_tag(answer, "CODE_RECOMMENDATIONS") + except NoTagFoundError: + # Recommendations are optional per the prompt. + pass + else: + block = f"{inner}" + answer = answer.replace(block, "") + code_recommendations = inner + if JSON_FENCE_MARKER in code_recommendations: + try: + code_recommendations = extract_tag( + code_recommendations, "json" + ) + except NoTagFoundError: + # Leave as-is if the inner fence is malformed. + pass + + payload["answer"] = answer + payload["code_recommendations"] = code_recommendations + return json.dumps(payload) diff --git a/tests/test_server/test_helpers.py b/tests/test_server/test_helpers.py index 47cb671..0d3d788 100644 --- a/tests/test_server/test_helpers.py +++ b/tests/test_server/test_helpers.py @@ -54,6 +54,19 @@ def test_raises_when_no_fence(helpers_mod): with pytest.raises(helpers_mod.NoTagFoundError): helpers_mod.extract_tag("plain text", "json") + @staticmethod + def test_does_not_span_multiple_fences(helpers_mod): + """Non-greedy match stops at the first closing fence.""" + text = ( + 'first ```json {"x": 1} ``` middle prose ' + '```json {"y": 2} ``` last' + ) + captured = helpers_mod.extract_tag(text, "json") + assert "first" not in captured + assert '"x": 1' in captured + assert "middle prose" not in captured + assert '"y": 2' not in captured + class TestGetEveClient: """Test the get_eve_client lazy singleton.""" @@ -260,59 +273,96 @@ async def test_extracts_json_block_from_answer(helpers_mod): class TestAssessFactualityIssue: """Test the _assess_factuality_issue helper.""" + @staticmethod + async def _run_assess(helpers_mod, answer): + """Drive _assess_factuality_issue with a stubbed _query_eve.""" + query_payload = json.dumps( + {"answer": answer, "sources": [], "conversation_id": "c"} + ) + with patch( + f"{PKG}.helpers._query_eve", + new=AsyncMock(return_value=query_payload), + ): + raw = await helpers_mod._assess_factuality_issue( + "Q?", "code", "t", "d", "f", "eq" + ) + return json.loads(raw) + @pytest.mark.asyncio @staticmethod - async def test_strips_recommendations_block_from_answer( + async def test_strips_full_recommendations_block_from_answer( helpers_mod, ): - """Removes from answer; surfaces it - separately.""" + """Removes the entire ... block, + tags included, from the answer body.""" recs = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}' answer = ( "Markdown body. " f"{recs}" " trailer" ) - query_payload = json.dumps( - {"answer": answer, "sources": [], "conversation_id": "c"} + result = await TestAssessFactualityIssue._run_assess( + helpers_mod, answer ) - with patch( - f"{PKG}.helpers._query_eve", - new=AsyncMock(return_value=query_payload), - ): - raw = await helpers_mod._assess_factuality_issue( # pylint: disable=protected-access - "Q?", "code", "t", "d", "f", "eq" - ) - result = json.loads(raw) - assert "" in result["answer"] - # The recs payload itself is removed from the answer body. + assert "" not in result["answer"] + assert "" not in result["answer"] assert recs not in result["answer"] + assert "Markdown body." in result["answer"] + assert "trailer" in result["answer"] assert result["code_recommendations"] == recs @pytest.mark.asyncio @staticmethod - async def test_unwraps_inner_json_fence_in_recommendations( + async def test_unwraps_inner_json_fence_and_cleans_answer( helpers_mod, ): - """If wraps a ```json fence, it is unwrapped.""" + """If wraps a ```json fence, the inner + JSON is unwrapped AND the wrapping tags + fence are removed + from the answer.""" inner = '{"r1": {"explanation": "e", "code_snippet": "x = 1"}}' recs_with_fence = f"```json {inner} ```" answer = ( "Body. " f"{recs_with_fence}" ) - query_payload = json.dumps( - {"answer": answer, "sources": [], "conversation_id": "c"} + result = await TestAssessFactualityIssue._run_assess( + helpers_mod, answer ) - with patch( - f"{PKG}.helpers._query_eve", - new=AsyncMock(return_value=query_payload), - ): - raw = await helpers_mod._assess_factuality_issue( # pylint: disable=protected-access - "Q?", "code", "t", "d", "f", "eq" - ) - result = json.loads(raw) assert inner in result["code_recommendations"] + assert "" not in result["answer"] + assert "```json" not in result["answer"] + assert "Body." in result["answer"] + + @pytest.mark.asyncio + @staticmethod + async def test_returns_empty_recommendations_when_tag_absent( + helpers_mod, + ): + """If the LLM omits , the tool returns + an empty recommendations field rather than raising.""" + answer = "Body of the assessment with no recommendations section." + result = await TestAssessFactualityIssue._run_assess( + helpers_mod, answer + ) + assert result["code_recommendations"] == "" + assert result["answer"] == answer + + @pytest.mark.asyncio + @staticmethod + async def test_keeps_malformed_inner_fence_as_is( + helpers_mod, + ): + """If the inner fence claims to be json but doesn't close, + leave the recommendations as the raw block content.""" + broken = '```json {"r1": 1}' # missing closing fence + answer = ( + "Body. " f"{broken}" + ) + result = await TestAssessFactualityIssue._run_assess( + helpers_mod, answer + ) + assert "```json" in result["code_recommendations"] + assert "" not in result["answer"] def test_helpers_module_importable():