diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/uv.lock b/examples/evaluation_and_profiling/email_phishing_analyzer/uv.lock
index 6c3ad2a99e..bf83a89b5f 100644
--- a/examples/evaluation_and_profiling/email_phishing_analyzer/uv.lock
+++ b/examples/evaluation_and_profiling/email_phishing_analyzer/uv.lock
@@ -196,6 +196,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
+[[package]]
+name = "appdirs"
+version = "1.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470, upload-time = "2020-05-11T07:59:51.037Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566, upload-time = "2020-05-11T07:59:49.499Z" },
+]
+
 [[package]]
 name = "arize-phoenix-otel"
 version = "0.14.0"
@@ -689,6 +698,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
 ]
 
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" },
+]
+
 [[package]]
 name = "distro"
 version = "1.9.0"
@@ -1954,7 +1972,7 @@ source = { editable = "." }
 dependencies = [
     { name = "beautifulsoup4" },
     { name = "networkx" },
-    { name = "nvidia-nat", extra = ["eval", "langchain", "phoenix", "profiler", "test"] },
+    { name = "nvidia-nat", extra = ["eval", "langchain", "phoenix", "profiler", "ragas", "test"] },
     { name = "openinference-instrumentation-langchain" },
 ]
 
@@ -1966,6 +1984,15 @@ requires-dist = [
     { name = "openinference-instrumentation-langchain", specifier = "==0.1.29" },
 ]
 
+[[package]]
+name = "nest-asyncio"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" },
+]
+
 [[package]]
 name = "nest-asyncio2"
 version = "1.7.2"
@@ -2062,6 +2089,9 @@ phoenix = [
 profiler = [
     { name = "nvidia-nat-profiler" },
 ]
+ragas = [
+    { name = "nvidia-nat-ragas" },
+]
 test = [
     { name = "nvidia-nat-test" },
 ]
@@ -2086,6 +2116,7 @@ requires-dist = [
     { name = "nat-math-assistant-a2a-protected", marker = "extra == 'examples'", editable = "../../A2A/math_assistant_a2a_protected" },
     { name = "nat-multi-frameworks", marker = "extra == 'examples'", editable = "../../frameworks/multi_frameworks" },
     { name = "nat-notebooks", marker = "extra == 'examples'", editable = "../../notebooks" },
+    { name = "nat-parallel-executor", marker = "extra == 'examples'", editable = "../../control_flow/parallel_executor" },
     { name = "nat-per-user-workflow", marker = "extra == 'examples'", editable = "../../front_ends/per_user_workflow" },
     { name = "nat-plot-charts", marker = "extra == 'examples'", editable = "../../custom_functions/plot_charts" },
     { name = "nat-por-to-jiratickets", marker = "extra == 'examples'", editable = "../../HITL/por_to_jiratickets" },
@@ -2160,6 +2191,7 @@ requires-dist = [
     { name = "nvidia-nat-rag", marker = "extra == 'rag'", editable = "../../../packages/nvidia_nat_rag" },
     { name = "nvidia-nat-ragaai", marker = "extra == 'ragaai'", editable = "../../../packages/nvidia_nat_ragaai" },
     { name = "nvidia-nat-ragas", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_ragas" },
+    { name = "nvidia-nat-ragas", marker = "extra == 'ragas'", editable = "../../../packages/nvidia_nat_ragas" },
     { name = "nvidia-nat-redis", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_redis" },
     { name = "nvidia-nat-redis", marker = "extra == 'redis'", editable = "../../../packages/nvidia_nat_redis" },
     { name = "nvidia-nat-s3", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_s3" },
@@ -2180,7 +2212,7 @@ requires-dist = [
     { name = "nvidia-nat-zep-cloud", marker = "extra == 'zep-cloud'", editable = "../../../packages/nvidia_nat_zep_cloud" },
     { name = "text-file-ingest", marker = "extra == 'examples'", editable = "../../documentation_guides/workflows/text_file_ingest" },
 ]
-provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
+provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragas", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -2426,6 +2458,24 @@ requires-dist = [
 ]
 provides-extras = ["test"]
 
+[[package]]
+name = "nvidia-nat-ragas"
+source = { editable = "../../../packages/nvidia_nat_ragas" }
+dependencies = [
+    { name = "nvidia-nat-core" },
+    { name = "nvidia-nat-eval" },
+    { name = "ragas" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nvidia-nat-core", editable = "../../../packages/nvidia_nat_core" },
+    { name = "nvidia-nat-eval", editable = "../../../packages/nvidia_nat_eval" },
+    { name = "nvidia-nat-test", marker = "extra == 'test'", editable = "../../../packages/nvidia_nat_test" },
+    { name = "ragas", specifier = "~=0.2.14" },
+]
+provides-extras = ["test"]
+
 [[package]]
 name = "nvidia-nat-test"
 source = { editable = "../../../packages/nvidia_nat_test" }
@@ -3376,6 +3426,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
 ]
 
+[[package]]
+name = "ragas"
+version = "0.2.15"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "appdirs" },
+    { name = "datasets" },
+    { name = "diskcache" },
+    { name = "langchain" },
+    { name = "langchain-community" },
+    { name = "langchain-core" },
+    { name = "langchain-openai" },
+    { name = "nest-asyncio" },
+    { name = "numpy" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "tiktoken" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6c/0f/04fddfa94744b1c3d8901aed8832a6b4193cc8e4886881f1bb88ff055350/ragas-0.2.15.tar.gz", hash = "sha256:2d0cd77b315a9c9c02ceb0a19ca8a48e82e1d02416587a2944ea51e6e327cd7b", size = 40867766, upload-time = "2025-04-24T16:39:28.734Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/9b/a5641da8aab06e069885a9ffa1b4897878f14c5b9807a9e3c5f1f532a6a9/ragas-0.2.15-py3-none-any.whl", hash = "sha256:298cd3d1fe3bd21ca4d31023a55079740d7bdd27a8c915bb371cec3c50cde608", size = 190947, upload-time = "2025-04-24T16:39:25.841Z" },
+]
+
 [[package]]
 name = "referencing"
 version = "0.37.0"
diff --git a/examples/evaluation_and_profiling/simple_calculator_eval/src/nat_simple_calculator_eval/configs/config-tunable-rag-eval-atif.yml b/examples/evaluation_and_profiling/simple_calculator_eval/src/nat_simple_calculator_eval/configs/config-tunable-rag-eval-atif.yml
new file mode 100644
index 0000000000..8114786ea3
--- /dev/null
+++ b/examples/evaluation_and_profiling/simple_calculator_eval/src/nat_simple_calculator_eval/configs/config-tunable-rag-eval-atif.yml
@@ -0,0 +1,75 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+function_groups:
+  calculator:
+    _type: calculator
+
+functions:
+  current_datetime:
+    _type: current_datetime
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: nvidia/nemotron-3-nano-30b-a3b
+    temperature: 0.0
+    max_tokens: 1024
+  eval_llm:
+    _type: nim
+    model_name: mistralai/mixtral-8x22b-instruct-v0.1
+    temperature: 0.0
+    max_tokens: 1024
+  openai_llm:
+    _type: openai
+    model_name: gpt-3.5-turbo
+    max_tokens: 2000
+
+workflow:
+  _type: react_agent
+  tool_names: [calculator, current_datetime]
+  llm_name: nim_llm
+  verbose: true
+  parse_agent_response_max_retries: 3
+
+
+eval:
+  general:
+    max_concurrency: 1
+    output:
+      dir: .tmp/nat/examples/getting_started/simple_calculator/atif
+      write_atif_workflow_output: true
+    dataset:
+      _type: json
+      file_path: examples/getting_started/simple_calculator/data/simple_calculator.json
+  evaluators:
+    tuneable_eval:
+      _type: tunable_rag_evaluator
+      enable_atif_evaluator: true
+      llm_name: eval_llm
+      default_scoring: true
+      default_score_weights:
+        coverage: 0.5
+        correctness: 0.3
+        relevance: 0.2
+      judge_llm_prompt: >
+        You are an intelligent evaluator that scores the generated answer based on the description of the expected answer.
+        The score is a measure of how well the generated answer matches the description of the expected answer based on the question.
+        Take into account the question, the relevance of the answer to the question and the quality compared to the description of the expected answer.
+
+        Rules:
+        - The score must be a float of any value between 0.0 and 1.0 on a sliding scale.
+        - The reasoning string must be concise and to the point. It should be 1 sentence and 2 only if extra description is needed. It must explain why the score was given and what is different between the generated answer and the expected answer.
+        - The tags <image> and <chart> are real images and charts.
diff --git a/examples/evaluation_and_profiling/simple_calculator_eval/uv.lock b/examples/evaluation_and_profiling/simple_calculator_eval/uv.lock
index cade30036f..cb42530ebc 100644
--- a/examples/evaluation_and_profiling/simple_calculator_eval/uv.lock
+++ b/examples/evaluation_and_profiling/simple_calculator_eval/uv.lock
@@ -2070,6 +2070,7 @@ requires-dist = [
     { name = "nat-math-assistant-a2a-protected", marker = "extra == 'examples'", editable = "../../A2A/math_assistant_a2a_protected" },
     { name = "nat-multi-frameworks", marker = "extra == 'examples'", editable = "../../frameworks/multi_frameworks" },
     { name = "nat-notebooks", marker = "extra == 'examples'", editable = "../../notebooks" },
+    { name = "nat-parallel-executor", marker = "extra == 'examples'", editable = "../../control_flow/parallel_executor" },
     { name = "nat-per-user-workflow", marker = "extra == 'examples'", editable = "../../front_ends/per_user_workflow" },
     { name = "nat-plot-charts", marker = "extra == 'examples'", editable = "../../custom_functions/plot_charts" },
     { name = "nat-por-to-jiratickets", marker = "extra == 'examples'", editable = "../../HITL/por_to_jiratickets" },
@@ -2144,6 +2145,7 @@ requires-dist = [
     { name = "nvidia-nat-rag", marker = "extra == 'rag'", editable = "../../../packages/nvidia_nat_rag" },
     { name = "nvidia-nat-ragaai", marker = "extra == 'ragaai'", editable = "../../../packages/nvidia_nat_ragaai" },
     { name = "nvidia-nat-ragas", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_ragas" },
+    { name = "nvidia-nat-ragas", marker = "extra == 'ragas'", editable = "../../../packages/nvidia_nat_ragas" },
     { name = "nvidia-nat-redis", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_redis" },
     { name = "nvidia-nat-redis", marker = "extra == 'redis'", editable = "../../../packages/nvidia_nat_redis" },
     { name = "nvidia-nat-s3", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_s3" },
@@ -2164,7 +2166,7 @@ requires-dist = [
     { name = "nvidia-nat-zep-cloud", marker = "extra == 'zep-cloud'", editable = "../../../packages/nvidia_nat_zep_cloud" },
     { name = "text-file-ingest", marker = "extra == 'examples'", editable = "../../documentation_guides/workflows/text_file_ingest" },
 ]
-provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
+provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragas", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
 
 [package.metadata.requires-dev]
 dev = [
diff --git a/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_atif.yml b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_atif.yml
new file mode 100644
index 0000000000..8a0aeff0d0
--- /dev/null
+++ b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_atif.yml
@@ -0,0 +1,82 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+functions:
+  webpage_query:
+    _type: webpage_query
+    webpage_url: https://docs.smith.langchain.com
+    description: "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!"
+    embedder_name: nv-embedqa-e5-v5
+  current_datetime:
+    _type: current_datetime
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: nvidia/nemotron-3-nano-30b-a3b
+    temperature: 0.0
+  nim_rag_eval_llm:
+    _type: nim
+    model_name: nvidia/nemotron-3-nano-30b-a3b
+    max_tokens: 8
+  nim_trajectory_eval_llm:
+    _type: nim
+    model_name: nvidia/nemotron-3-nano-30b-a3b
+    temperature: 0.0
+    max_tokens: 1024
+
+embedders:
+  nv-embedqa-e5-v5:
+    _type: nim
+    model_name: nvidia/nv-embedqa-e5-v5
+
+workflow:
+  _type: react_agent
+  tool_names: [webpage_query, current_datetime]
+  llm_name: nim_llm
+  verbose: true
+  parse_agent_response_max_retries: 3
+
+eval:
+  general:
+    max_concurrency: 1
+    output:
+      dir: ./.tmp/nat/examples/evaluation_and_profiling/simple_web_query_eval/atif/
+      cleanup: true
+      write_atif_workflow_output: true
+    dataset:
+      _type: json
+      file_path: examples/evaluation_and_profiling/simple_web_query_eval/data/langsmith.json
+  evaluators:
+    # RAGAS evaluators now run through the ATIF-native evaluator lane.
+    accuracy:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: AnswerAccuracy
+      llm_name: nim_rag_eval_llm
+    groundedness:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: ResponseGroundedness
+      llm_name: nim_rag_eval_llm
+    relevance:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: ContextRelevance
+      llm_name: nim_rag_eval_llm
+    trajectory_accuracy:
+      _type: trajectory
+      enable_atif_evaluator: true
+      llm_name: nim_trajectory_eval_llm
\ No newline at end of file
diff --git a/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama31_atif.yml b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama31_atif.yml
new file mode 100644
index 0000000000..d74abffef0
--- /dev/null
+++ b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama31_atif.yml
@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+functions:
+  webpage_query:
+    _type: webpage_query
+    webpage_url: https://docs.smith.langchain.com
+    description: "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!"
+    embedder_name: nv-embedqa-e5-v5
+  current_datetime:
+    _type: current_datetime
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: meta/llama-3.1-8b-instruct
+    temperature: 0.0
+  nim_rag_eval_llm:
+    _type: nim
+    model_name: meta/llama-3.1-70b-instruct
+    max_tokens: 8
+  nim_trajectory_eval_llm:
+    _type: nim
+    model_name: meta/llama-3.1-70b-instruct
+    temperature: 0.0
+    max_tokens: 1024
+
+embedders:
+  nv-embedqa-e5-v5:
+    _type: nim
+    model_name: nvidia/nv-embedqa-e5-v5
+
+workflow:
+  _type: react_agent
+  tool_names: [webpage_query, current_datetime]
+  llm_name: nim_llm
+  verbose: true
+  parse_agent_response_max_retries: 3
+
+eval:
+  general:
+    max_concurrency: 1
+    workflow_alias: nat-simple-llama-31-8b
+    output:
+      dir: ./.tmp/nat/examples/evaluation_and_profiling/simple_web_query_eval/atif/llama-31-8b
+      cleanup: true
+      write_atif_workflow_output: true
+    dataset:
+      _type: json
+      file_path: examples/evaluation_and_profiling/simple_web_query_eval/data/langsmith.json
+    profiler:
+      base_metrics: true
+
+  evaluators:
+    accuracy:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: AnswerAccuracy
+      llm_name: nim_rag_eval_llm
+    groundedness:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: ResponseGroundedness
+      llm_name: nim_rag_eval_llm
+    relevance:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: ContextRelevance
+      llm_name: nim_rag_eval_llm
+    trajectory_accuracy:
+      _type: trajectory
+      enable_atif_evaluator: true
+      llm_name: nim_trajectory_eval_llm
diff --git a/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama33.yml b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama33.yml
index 531b954853..2a40694d75 100644
--- a/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama33.yml
+++ b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama33.yml
@@ -14,13 +14,6 @@
 # limitations under the License.
 
 
-general:
-  telemetry:
-    tracing:
-      weave:
-        _type: weave
-        project: "nat-simple"
-
 functions:
   webpage_query:
     _type: webpage_query
@@ -37,11 +30,11 @@ llms:
     temperature: 0.0
   nim_rag_eval_llm:
     _type: nim
-    model_name: meta/llama-3.1-70b-instruct
+    model_name: meta/llama-3.3-70b-instruct
     max_tokens: 8
   nim_trajectory_eval_llm:
     _type: nim
-    model_name: meta/llama-3.1-70b-instruct
+    model_name: meta/llama-3.3-70b-instruct
     temperature: 0.0
     max_tokens: 1024
 
diff --git a/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama33_atif.yml b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama33_atif.yml
new file mode 100644
index 0000000000..09ef63ccee
--- /dev/null
+++ b/examples/evaluation_and_profiling/simple_web_query_eval/src/nat_simple_web_query_eval/configs/eval_config_llama33_atif.yml
@@ -0,0 +1,82 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+functions:
+  webpage_query:
+    _type: webpage_query
+    webpage_url: https://docs.smith.langchain.com
+    description: "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!"
+    embedder_name: nv-embedqa-e5-v5
+  current_datetime:
+    _type: current_datetime
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: meta/llama-3.3-70b-instruct
+    temperature: 0.0
+  nim_rag_eval_llm:
+    _type: nim
+    model_name: meta/llama-3.3-70b-instruct
+    max_tokens: 8
+  nim_trajectory_eval_llm:
+    _type: nim
+    model_name: meta/llama-3.3-70b-instruct
+    temperature: 0.0
+    max_tokens: 1024
+
+embedders:
+  nv-embedqa-e5-v5:
+    _type: nim
+    model_name: nvidia/nv-embedqa-e5-v5
+
+workflow:
+  _type: react_agent
+  tool_names: [webpage_query, current_datetime]
+  llm_name: nim_llm
+  verbose: true
+  parse_agent_response_max_retries: 3
+
+eval:
+  general:
+    max_concurrency: 1
+    output:
+      dir: ./.tmp/nat/examples/evaluation_and_profiling/simple_web_query_eval/atif/llama-33-70b
+      cleanup: true
+      write_atif_workflow_output: true
+    dataset:
+      _type: json
+      file_path: examples/evaluation_and_profiling/simple_web_query_eval/data/langsmith.json
+  evaluators:
+    # RAGAS evaluators now run through the ATIF-native evaluator lane.
+    accuracy:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: AnswerAccuracy
+      llm_name: nim_rag_eval_llm
+    groundedness:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: ResponseGroundedness
+      llm_name: nim_rag_eval_llm
+    relevance:
+      _type: ragas
+      enable_atif_evaluator: true
+      metric: ContextRelevance
+      llm_name: nim_rag_eval_llm
+    trajectory_accuracy:
+      _type: trajectory
+      enable_atif_evaluator: true
+      llm_name: nim_trajectory_eval_llm
diff --git a/examples/evaluation_and_profiling/simple_web_query_eval/uv.lock b/examples/evaluation_and_profiling/simple_web_query_eval/uv.lock
index d80266bb21..e83c43c508 100644
--- a/examples/evaluation_and_profiling/simple_web_query_eval/uv.lock
+++ b/examples/evaluation_and_profiling/simple_web_query_eval/uv.lock
@@ -220,6 +220,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
+[[package]]
+name = "appdirs"
+version = "1.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470, upload-time = "2020-05-11T07:59:51.037Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566, upload-time = "2020-05-11T07:59:49.499Z" },
+]
+
 [[package]]
 name = "appnope"
 version = "0.1.4"
@@ -2830,7 +2839,7 @@ name = "nat-simple-web-query-eval"
 source = { editable = "." }
 dependencies = [
     { name = "nat-simple-web-query" },
-    { name = "nvidia-nat", extra = ["eval", "langchain", "profiler", "test"] },
+    { name = "nvidia-nat", extra = ["eval", "langchain", "profiler", "ragas", "test"] },
 ]
 
 [package.metadata]
@@ -2965,6 +2974,9 @@ profiler = [
 ragaai = [
     { name = "nvidia-nat-ragaai" },
 ]
+ragas = [
+    { name = "nvidia-nat-ragas" },
+]
 test = [
     { name = "nvidia-nat-test" },
 ]
@@ -2992,6 +3004,7 @@ requires-dist = [
     { name = "nat-math-assistant-a2a-protected", marker = "extra == 'examples'", editable = "../../A2A/math_assistant_a2a_protected" },
     { name = "nat-multi-frameworks", marker = "extra == 'examples'", editable = "../../frameworks/multi_frameworks" },
     { name = "nat-notebooks", marker = "extra == 'examples'", editable = "../../notebooks" },
+    { name = "nat-parallel-executor", marker = "extra == 'examples'", editable = "../../control_flow/parallel_executor" },
     { name = "nat-per-user-workflow", marker = "extra == 'examples'", editable = "../../front_ends/per_user_workflow" },
     { name = "nat-plot-charts", marker = "extra == 'examples'", editable = "../../custom_functions/plot_charts" },
     { name = "nat-por-to-jiratickets", marker = "extra == 'examples'", editable = "../../HITL/por_to_jiratickets" },
@@ -3066,6 +3079,7 @@ requires-dist = [
     { name = "nvidia-nat-rag", marker = "extra == 'rag'", editable = "../../../packages/nvidia_nat_rag" },
     { name = "nvidia-nat-ragaai", marker = "extra == 'ragaai'", editable = "../../../packages/nvidia_nat_ragaai" },
     { name = "nvidia-nat-ragas", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_ragas" },
+    { name = "nvidia-nat-ragas", marker = "extra == 'ragas'", editable = "../../../packages/nvidia_nat_ragas" },
     { name = "nvidia-nat-redis", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_redis" },
     { name = "nvidia-nat-redis", marker = "extra == 'redis'", editable = "../../../packages/nvidia_nat_redis" },
     { name = "nvidia-nat-s3", marker = "extra == 'most'", editable = "../../../packages/nvidia_nat_s3" },
@@ -3086,7 +3100,7 @@ requires-dist = [
     { name = "nvidia-nat-zep-cloud", marker = "extra == 'zep-cloud'", editable = "../../../packages/nvidia_nat_zep_cloud" },
     { name = "text-file-ingest", marker = "extra == 'examples'", editable = "../../documentation_guides/workflows/text_file_ingest" },
 ]
-provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
+provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragas", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -3352,6 +3366,24 @@ requires-dist = [
 ]
 provides-extras = ["test"]
 
+[[package]]
+name = "nvidia-nat-ragas"
+source = { editable = "../../../packages/nvidia_nat_ragas" }
+dependencies = [
+    { name = "nvidia-nat-core" },
+    { name = "nvidia-nat-eval" },
+    { name = "ragas" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nvidia-nat-core", editable = "../../../packages/nvidia_nat_core" },
+    { name = "nvidia-nat-eval", editable = "../../../packages/nvidia_nat_eval" },
+    { name = "nvidia-nat-test", marker = "extra == 'test'", editable = "../../../packages/nvidia_nat_test" },
+    { name = "ragas", specifier = "~=0.2.14" },
+]
+provides-extras = ["test"]
+
 [[package]]
 name = "nvidia-nat-test"
 source = { editable = "../../../packages/nvidia_nat_test" }
@@ -4896,6 +4928,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b6/2f/ecce53efba5696591b615476904ab0bd847d154a0a878c061eb8e461b0a9/ragaai_catalyst-2.2.7-py3-none-any.whl", hash = "sha256:bc9b42504ea2b9d88a48c07a0164fc367b52a6e6c828712137a6682013d12a74", size = 436462, upload-time = "2025-11-26T09:37:36.372Z" },
 ]
 
+[[package]]
+name = "ragas"
+version = "0.2.15"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "appdirs" },
+    { name = "datasets" },
+    { name = "diskcache" },
+    { name = "langchain" },
+    { name = "langchain-community" },
+    { name = "langchain-core" },
+    { name = "langchain-openai" },
+    { name = "nest-asyncio" },
+    { name = "numpy" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "tiktoken" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6c/0f/04fddfa94744b1c3d8901aed8832a6b4193cc8e4886881f1bb88ff055350/ragas-0.2.15.tar.gz", hash = "sha256:2d0cd77b315a9c9c02ceb0a19ca8a48e82e1d02416587a2944ea51e6e327cd7b", size = 40867766, upload-time = "2025-04-24T16:39:28.734Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/9b/a5641da8aab06e069885a9ffa1b4897878f14c5b9807a9e3c5f1f532a6a9/ragas-0.2.15-py3-none-any.whl", hash = "sha256:298cd3d1fe3bd21ca4d31023a55079740d7bdd27a8c915bb371cec3c50cde608", size = 190947, upload-time = "2025-04-24T16:39:25.841Z" },
+]
+
 [[package]]
 name = "referencing"
 version = "0.37.0"
diff --git a/packages/nvidia_nat_core/src/nat/builder/evaluator.py b/packages/nvidia_nat_core/src/nat/builder/evaluator.py
index 07c3ca4698..3019a0d31f 100644
--- a/packages/nvidia_nat_core/src/nat/builder/evaluator.py
+++ b/packages/nvidia_nat_core/src/nat/builder/evaluator.py
@@ -22,7 +22,10 @@
 
 class EvaluatorInfo:
 
-    def __init__(self, *, config: EvaluatorBaseConfig, evaluate_fn: Callable[[EvalInput], EvalOutput],
+    def __init__(self,
+                 *,
+                 config: EvaluatorBaseConfig,
+                 evaluate_fn: Callable[[EvalInput], EvalOutput] | None = None,
                  description: str):
         self.config = config
         self.evaluate_fn = evaluate_fn
diff --git a/packages/nvidia_nat_core/src/nat/data_models/evaluate_config.py b/packages/nvidia_nat_core/src/nat/data_models/evaluate_config.py
index d2785a7134..f7b6a62c8a 100644
--- a/packages/nvidia_nat_core/src/nat/data_models/evaluate_config.py
+++ b/packages/nvidia_nat_core/src/nat/data_models/evaluate_config.py
@@ -92,6 +92,11 @@ class EvalOutputConfig(BaseModel):
     workflow_output_step_filter: list[IntermediateStepType] | None = Field(
         default=None, description="Filter for the workflow output steps.")
 
+    write_atif_workflow_output: bool = Field(
+        default=False,
+        description="When enabled, also writes ATIF-converted workflow output to `workflow_output_atif.json` "
+        "for troubleshooting and debugging.")
+
 
 class EvalGeneralConfig(BaseModel):
     """
diff --git a/packages/nvidia_nat_core/src/nat/utils/atif_message_utils.py b/packages/nvidia_nat_core/src/nat/utils/atif_message_utils.py
new file mode 100644
index 0000000000..6033a5749c
--- /dev/null
+++ b/packages/nvidia_nat_core/src/nat/utils/atif_message_utils.py
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Shared helpers for extracting text from ATIF messages and trajectories."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from nat.data_models.atif import ATIFContentPart
+from nat.data_models.atif import ATIFTrajectory
+
+
+def content_part_to_text(part: ATIFContentPart) -> str:
+    """Convert a single ATIF content part to text."""
+    if part.type == "text":
+        return part.text or ""
+    if part.type == "image":
+        return part.source.path if part.source else ""
+    return ""
+
+
+def message_to_text(message: str | Sequence[ATIFContentPart] | None) -> str:
+    """Convert ATIF message content to plain text."""
+    if message is None:
+        return ""
+    if isinstance(message, str):
+        return message
+    return "\n".join([content_part_to_text(part) for part in message if content_part_to_text(part)])
+
+
+def trajectory_to_user_input(trajectory: ATIFTrajectory) -> str:
+    """Return the first non-empty user message from an ATIF trajectory."""
+    for step in trajectory.steps:
+        if step.source == "user":
+            text = message_to_text(step.message)
+            if text:
+                return text
+    return ""
diff --git a/packages/nvidia_nat_core/tests/eval/test_evaluate_callbacks.py b/packages/nvidia_nat_core/tests/eval/test_evaluate_callbacks.py
index 772dd058c7..cd68d2a11a 100644
--- a/packages/nvidia_nat_core/tests/eval/test_evaluate_callbacks.py
+++ b/packages/nvidia_nat_core/tests/eval/test_evaluate_callbacks.py
@@ -31,7 +31,7 @@ def test_callback_manager_accepted_by_init(self):
 
     def test_callback_manager_defaults_to_empty(self):
         """EvaluationRun defaults callback_manager to an empty EvalCallbackManager."""
-        config = EvaluationRunConfig(config_file=Path("dummy.yml"))
+        config = EvaluationRunConfig(config_file=Path("dummy.yml"), write_output=False)
         runner = EvaluationRun(config=config)
         assert isinstance(runner.callback_manager, EvalCallbackManager)
         assert not runner.callback_manager.has_callbacks
diff --git a/packages/nvidia_nat_core/tests/nat/builder/test_evaluator.py b/packages/nvidia_nat_core/tests/nat/builder/test_evaluator.py
new file mode 100644
index 0000000000..a67ad9eede
--- /dev/null
+++ b/packages/nvidia_nat_core/tests/nat/builder/test_evaluator.py
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest.mock import MagicMock
+
+from nat.builder.evaluator import EvaluatorInfo
+
+
+def test_evaluator_info_allows_missing_evaluate_fn():
+    """`EvaluatorInfo` should support ATIF-only evaluators."""
+    info = EvaluatorInfo(config=MagicMock(), description="ATIF-only evaluator")
+    assert info.evaluate_fn is None
diff --git a/packages/nvidia_nat_eval/scripts/compare_eval_runs.py b/packages/nvidia_nat_eval/scripts/compare_eval_runs.py
new file mode 100644
index 0000000000..8decb8d267
--- /dev/null
+++ b/packages/nvidia_nat_eval/scripts/compare_eval_runs.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Compare two eval run output directories.
+
+This script compares evaluator outputs from two run directories.
+By default it prioritizes common files (RAGAS, trajectory, and tunable RAG),
+and it also auto-discovers any additional ``*_output.json`` evaluator files.
+
+It prints:
+- average score delta per evaluator
+- per-item score change count
+- optional per-item score diffs (with --show-item-diffs)
+
+Example:
+    python3 packages/nvidia_nat_eval/scripts/compare_eval_runs.py \
+      .tmp/nat/examples/evaluation_and_profiling/simple_web_query_eval/atif/llama-33-70b \
+      .tmp/nat/examples/evaluation_and_profiling/simple_web_query_eval/llama-33-70b \
+      --show-item-diffs
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+EVALUATOR_FILES = (
+    "accuracy_output.json",
+    "groundedness_output.json",
+    "relevance_output.json",
+    "trajectory_accuracy_output.json",
+    "tuneable_eval_output.json",
+    "tunable_eval_output.json",
+)
+
+
+def _read_json(path: Path) -> dict:
+    with path.open(encoding="utf-8") as f:
+        return json.load(f)
+
+
+def _score_delta(a: object, b: object) -> float | None:
+    if isinstance(a, (int, float)) and isinstance(b, (int, float)):
+        return float(a) - float(b)
+    return None
+
+
+def _fmt_score(v: object) -> str:
+    if isinstance(v, float):
+        return f"{v:.6f}"
+    return str(v)
+
+
+def _discover_evaluator_files(run_a: Path, run_b: Path) -> list[str]:
+    """Discover evaluator output files from both run directories.
+
+    Includes all ``*_output.json`` files except workflow outputs.
+    Preferred known evaluator files are listed first for stable output.
+    """
+    excluded = {"workflow_output.json", "workflow_output_atif.json"}
+    discovered = set()
+    for run_dir in (run_a, run_b):
+        if not run_dir.exists():
+            continue
+        for path in run_dir.glob("*_output.json"):
+            if path.name not in excluded:
+                discovered.add(path.name)
+
+    ordered: list[str] = []
+    for name in EVALUATOR_FILES:
+        if name in discovered:
+            ordered.append(name)
+
+    for name in sorted(discovered):
+        if name not in ordered:
+            ordered.append(name)
+
+    return ordered
+
+
+def compare_evaluator(run_a: Path, run_b: Path, file_name: str, show_item_diffs: bool) -> None:
+    """Compare a single evaluator output file across two runs.
+
+    Args:
+        run_a: Path to the first run output directory.
+        run_b: Path to the second run output directory.
+        file_name: Evaluator output JSON file name to compare.
+        show_item_diffs: Whether to print per-item score differences.
+
+    Returns:
+        None.
+    """
+    path_a = run_a / file_name
+    path_b = run_b / file_name
+
+    if not path_a.exists() or not path_b.exists():
+        print(f"- {file_name}: missing in one/both runs")
+        return
+
+    try:
+        data_a = _read_json(path_a)
+    except (OSError, json.JSONDecodeError, ValueError) as e:
+        print(f"- {file_name}: unreadable in run_a ({path_a}): {e}")
+        return
+
+    try:
+        data_b = _read_json(path_b)
+    except (OSError, json.JSONDecodeError, ValueError) as e:
+        print(f"- {file_name}: unreadable in run_b ({path_b}): {e}")
+        return
+
+    avg_a = data_a.get("average_score")
+    avg_b = data_b.get("average_score")
+    delta = _score_delta(avg_a, avg_b)
+
+    items_a = {}
+    skipped_a = 0
+    for item in data_a.get("eval_output_items", []):
+        if not isinstance(item, dict):
+            skipped_a += 1
+            continue
+        item_id = item.get("id")
+        if item_id is None:
+            skipped_a += 1
+            continue
+        items_a[str(item_id)] = item
+
+    items_b = {}
+    skipped_b = 0
+    for item in data_b.get("eval_output_items", []):
+        if not isinstance(item, dict):
+            skipped_b += 1
+            continue
+        item_id = item.get("id")
+        if item_id is None:
+            skipped_b += 1
+            continue
+        items_b[str(item_id)] = item
+
+    all_ids = sorted(set(items_a) | set(items_b), key=lambda x: (len(x), x))
+
+    changed_ids: list[str] = []
+    for item_id in all_ids:
+        score_a = items_a.get(item_id, {}).get("score")
+        score_b = items_b.get(item_id, {}).get("score")
+        if score_a != score_b:
+            changed_ids.append(item_id)
+
+    print(f"\n{file_name}")
+    print(f"  avg_score run_a={_fmt_score(avg_a)} run_b={_fmt_score(avg_b)}", end="")
+    if delta is not None:
+        print(f" delta={delta:+.6f}")
+    else:
+        print(" delta=N/A")
+    print(f"  item_count run_a={len(items_a)} run_b={len(items_b)} changed_items={len(changed_ids)}")
+    if skipped_a or skipped_b:
+        print(f"  skipped_items run_a={skipped_a} run_b={skipped_b}")
+
+    if show_item_diffs and changed_ids:
+        for item_id in changed_ids:
+            score_a = items_a.get(item_id, {}).get("score")
+            score_b = items_b.get(item_id, {}).get("score")
+            print(f"    id={item_id} run_a={_fmt_score(score_a)} run_b={_fmt_score(score_b)}")
+
+
+def main() -> int:
+    """Run the CLI to compare evaluator outputs from two run directories.
+
+    Parses positional run directory arguments and an optional per-item diff flag,
+    then compares all discovered evaluator output files.
+
+    Returns:
+        Process exit code. Returns 0 for normal CLI completion.
+    """
+    parser = argparse.ArgumentParser(description="Compare evaluator outputs between two eval runs.")
+    parser.add_argument("run_a", type=Path, help="Path to first run output directory")
+    parser.add_argument("run_b", type=Path, help="Path to second run output directory")
+    parser.add_argument("--show-item-diffs", action="store_true", help="Print per-item score deltas for changed items")
+    args = parser.parse_args()
+
+    print(f"Run A: {args.run_a}")
+    print(f"Run B: {args.run_b}")
+
+    evaluator_files = _discover_evaluator_files(args.run_a, args.run_b)
+    if not evaluator_files:
+        print("\nNo evaluator output files found in either run directory.")
+        return 0
+
+    for file_name in evaluator_files:
+        compare_evaluator(args.run_a, args.run_b, file_name, args.show_item_diffs)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/cli/evaluate.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/cli/evaluate.py
index 1b3f6645a8..918c6cf5b7 100644
--- a/packages/nvidia_nat_eval/src/nat/plugins/eval/cli/evaluate.py
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/cli/evaluate.py
@@ -113,10 +113,15 @@ def write_tabular_output(eval_run_output: EvaluationRunOutput):
     # Print header with workflow status and runtime
     workflow_status = "INTERRUPTED" if eval_run_output.workflow_interrupted else "COMPLETED"
     total_runtime = eval_run_output.usage_stats.total_runtime if eval_run_output.usage_stats else 0.0
+    workflow_output_files = ["workflow_output.json"]
+    if eval_run_output.workflow_output_file:
+        atif_workflow_output = eval_run_output.workflow_output_file.parent / "workflow_output_atif.json"
+        if atif_workflow_output.exists():
+            workflow_output_files.append("workflow_output_atif.json")
 
     click.echo("")
     click.echo(click.style("=== EVALUATION SUMMARY ===", fg="bright_blue", bold=True))
-    click.echo(f"Workflow Status: {workflow_status} (workflow_output.json)")
+    click.echo(f"Workflow Status: {workflow_status} ({', '.join(workflow_output_files)})")
     click.echo(f"Total Runtime: {total_runtime:.2f}s")
 
     # Include profiler stats if available
@@ -213,7 +218,7 @@ def _build_eval_callback_manager(config: EvaluationRunConfig):
 
 
 async def run_and_evaluate(config: EvaluationRunConfig):
-    from nat.eval.eval_callbacks import EvalCallbackManager
+    from nat.plugins.eval.eval_callbacks import EvalCallbackManager
     from nat.plugins.eval.exporters.file_eval_callback import FileEvalCallback
 
     callback_manager = _build_eval_callback_manager(config) or EvalCallbackManager()
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/eval_callbacks.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/eval_callbacks.py
index 81fd5cdb13..b5ef0f9e4b 100644
--- a/packages/nvidia_nat_eval/src/nat/plugins/eval/eval_callbacks.py
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/eval_callbacks.py
@@ -61,6 +61,7 @@ class EvalResult:
 
     evaluation_outputs: list[tuple[str, Any]] = field(default_factory=list)
     workflow_output_json: str | None = None
+    atif_workflow_output_json: str | None = None
     run_config: Any | None = None
     effective_config: Any | None = None
     output_dir: Path | None = None
@@ -74,6 +75,7 @@ def build_eval_result(
     usage_stats: Any | None = None,
     item_span_ids: dict[str, int] | None = None,
     workflow_output_json: str | None = None,
+    atif_workflow_output_json: str | None = None,
     run_config: Any | None = None,
     effective_config: Any | None = None,
     output_dir: Path | None = None,
@@ -118,6 +120,7 @@ def build_eval_result(
         items=cb_items,
         evaluation_outputs=evaluation_results,
         workflow_output_json=workflow_output_json,
+        atif_workflow_output_json=atif_workflow_output_json,
         run_config=run_config,
         effective_config=effective_config,
         output_dir=output_dir,
@@ -186,8 +189,11 @@ def needs_root_span_ids(self) -> bool:
 
     def on_dataset_loaded(self, *, dataset_name: str, items: list[EvalInputItem]) -> None:
         for cb in self._callbacks:
+            fn = getattr(cb, "on_dataset_loaded", None)
+            if not fn:
+                continue
             try:
-                cb.on_dataset_loaded(dataset_name=dataset_name, items=items)
+                fn(dataset_name=dataset_name, items=items)
             except Exception:
                 logger.exception("EvalCallback %s.on_dataset_loaded failed", type(cb).__name__)
 
@@ -266,8 +272,11 @@ def evaluation_context(self):
 
     def on_eval_complete(self, result: EvalResult) -> None:
         for cb in self._callbacks:
+            fn = getattr(cb, "on_eval_complete", None)
+            if not fn:
+                continue
             try:
-                cb.on_eval_complete(result)
+                fn(result)
             except Exception:
                 logger.exception("EvalCallback %s.on_eval_complete failed", type(cb).__name__)
 
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/evaluator/atif_evaluator.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/evaluator/atif_evaluator.py
new file mode 100644
index 0000000000..ea7c780d86
--- /dev/null
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/evaluator/atif_evaluator.py
@@ -0,0 +1,59 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""ATIF-native evaluator protocol definitions."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+from typing import Protocol
+from typing import runtime_checkable
+
+from pydantic import BaseModel
+from pydantic import Field
+
+from nat.data_models.atif import ATIFTrajectory
+from nat.data_models.evaluator import EvalOutput
+
+
+class AtifEvalSample(BaseModel):
+    """ATIF-native evaluation sample used by ATIF-backed evaluators."""
+
+    item_id: Any = Field(description="Identifier matching the source EvalInputItem.")
+    trajectory: ATIFTrajectory = Field(description="Canonical ATIF trajectory.")
+    expected_output_obj: Any = Field(default=None, description="Optional expected output reference.")
+    output_obj: Any = Field(default=None, description="Optional workflow output reference.")
+    metadata: dict[str, Any] = Field(default_factory=dict, description="Optional evaluator metadata.")
+
+
+AtifEvalSampleList = Sequence[AtifEvalSample]
+
+
+@runtime_checkable
+class AtifEvaluator(Protocol):
+    """Protocol for evaluators that consume ATIF-native samples."""
+
+    async def evaluate_atif_fn(self, atif_samples: AtifEvalSampleList) -> EvalOutput:
+        """Evaluate using ATIF-native sample payloads."""
+        ...
+
+
+@runtime_checkable
+class LegacyEvaluator(Protocol):
+    """Protocol for evaluators that consume legacy `EvalInput` payloads."""
+
+    async def evaluate_fn(self, eval_input) -> EvalOutput:
+        """Evaluate using legacy eval input payloads."""
+        ...
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/exporters/file_eval_callback.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/exporters/file_eval_callback.py
index a83398eda7..b405a16445 100644
--- a/packages/nvidia_nat_eval/src/nat/plugins/eval/exporters/file_eval_callback.py
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/exporters/file_eval_callback.py
@@ -44,6 +44,7 @@ class FileEvalCallback:
 
     def __init__(self) -> None:
         self.workflow_output_file: Path | None = None
+        self.atif_workflow_output_file: Path | None = None
         self.evaluator_output_files: list[Path] = []
         self.config_original_file: Path | None = None
         self.config_effective_file: Path | None = None
@@ -149,14 +150,21 @@ def _build_run_metadata(run_config: Any) -> dict[str, Any]:
 
     def _write_workflow_output(self, result: EvalResult, output_dir: Path) -> None:
         """Write the serialized workflow output JSON."""
-        if result.workflow_output_json is None:
+        if result.workflow_output_json is not None:
+            workflow_output_file = output_dir / "workflow_output.json"
+            with open(workflow_output_file, "w", encoding="utf-8") as f:
+                f.write(result.workflow_output_json)
+            self.workflow_output_file = workflow_output_file
+            logger.info("Workflow output written to %s", workflow_output_file)
+
+        if result.atif_workflow_output_json is None:
             return
 
-        workflow_output_file = output_dir / "workflow_output.json"
-        with open(workflow_output_file, "w", encoding="utf-8") as f:
-            f.write(result.workflow_output_json)
-        self.workflow_output_file = workflow_output_file
-        logger.info("Workflow output written to %s", workflow_output_file)
+        atif_workflow_output_file = output_dir / "workflow_output_atif.json"
+        with open(atif_workflow_output_file, "w", encoding="utf-8") as f:
+            f.write(result.atif_workflow_output_json)
+        self.atif_workflow_output_file = atif_workflow_output_file
+        logger.info("ATIF workflow output written to %s", atif_workflow_output_file)
 
     def _write_evaluator_outputs(self, result: EvalResult, output_dir: Path) -> None:
         """Write per-evaluator result files."""
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/__init__.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/__init__.py
index e69de29bb2..a0a2fd31fe 100644
--- a/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/__init__.py
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/__init__.py
@@ -0,0 +1,18 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .eval_harness import EvaluationHarness
+
+__all__ = ["EvaluationHarness"]
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/atif_adapter.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/atif_adapter.py
new file mode 100644
index 0000000000..c0a8500daf
--- /dev/null
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/atif_adapter.py
@@ -0,0 +1,100 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""ATIF adapter utilities for eval runtime ingress.
+
+This module provides a single-conversion adapter layer from ``EvalInputItem``
+trajectory data to ``ATIFTrajectory`` objects. Runtime code uses this to avoid
+per-evaluator conversion and to keep ATIF as the canonical internal trace shape.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+from nat.data_models.atif import ATIFTrajectory
+from nat.data_models.evaluator import EvalInput
+from nat.data_models.evaluator import EvalInputItem
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSampleList
+from nat.utils.atif_converter import IntermediateStepToATIFConverter
+
+
+class EvalAtifAdapter:
+    """Build and cache ATIF trajectories for eval items."""
+
+    def __init__(self, converter: IntermediateStepToATIFConverter | None = None) -> None:
+        self._converter = converter or IntermediateStepToATIFConverter()
+        self._cache: dict[str, ATIFTrajectory] = {}
+
+    @staticmethod
+    def _cache_key(item_id: Any) -> str:
+        item_type = type(item_id)
+        return f"{item_type.__module__}.{item_type.__qualname__}:{item_id!r}"
+
+    def _coerce_trajectory(self, value: Any) -> ATIFTrajectory:
+        if isinstance(value, ATIFTrajectory):
+            return value
+        if isinstance(value, Mapping):
+            return ATIFTrajectory.model_validate(value)
+        raise TypeError(f"Unsupported ATIF trajectory payload type: {type(value)}")
+
+    def get_trajectory(self,
+                       item: EvalInputItem,
+                       prebuilt: ATIFTrajectory | Mapping[str, Any] | None = None) -> ATIFTrajectory:
+        """Return cached ATIF trajectory for an eval item, converting at most once."""
+        key = self._cache_key(item.id)
+        if key in self._cache:
+            return self._cache[key]
+
+        if prebuilt is not None:
+            trajectory = self._coerce_trajectory(prebuilt)
+        else:
+            trajectory = self._converter.convert(steps=item.trajectory, session_id=key)
+        self._cache[key] = trajectory
+        return trajectory
+
+    def _ensure_cache(self,
+                      eval_input: EvalInput,
+                      prebuilt_trajectories: Mapping[str, ATIFTrajectory | Mapping[str, Any]] | None = None) -> None:
+        """Populate cache for all eval items."""
+        for item in eval_input.eval_input_items:
+            prebuilt = None
+            if prebuilt_trajectories is not None:
+                # Prefer type-aware cache keys but allow legacy string keys.
+                prebuilt = prebuilt_trajectories.get(self._cache_key(item.id))
+                if prebuilt is None:
+                    prebuilt = prebuilt_trajectories.get(str(item.id))
+            self.get_trajectory(item=item, prebuilt=prebuilt)
+
+    def build_samples(
+            self,
+            eval_input: EvalInput,
+            prebuilt_trajectories: Mapping[str, ATIFTrajectory | Mapping[str, Any]] | None = None
+    ) -> AtifEvalSampleList:
+        """Build ATIF-native samples for all eval input items."""
+        self._ensure_cache(eval_input=eval_input, prebuilt_trajectories=prebuilt_trajectories)
+        samples: AtifEvalSampleList = []
+        for item in eval_input.eval_input_items:
+            trajectory = self._cache[self._cache_key(item.id)]
+            samples.append(
+                AtifEvalSample(
+                    item_id=item.id,
+                    trajectory=trajectory,
+                    expected_output_obj=item.expected_output_obj,
+                    output_obj=item.output_obj,
+                    metadata={},
+                ))
+        return samples
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/eval_harness.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/eval_harness.py
new file mode 100644
index 0000000000..431b705e84
--- /dev/null
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/eval_harness.py
@@ -0,0 +1,88 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Lightweight ATIF-only evaluator harness.
+
+This harness is intentionally narrow in scope:
+- it evaluates ATIF-native evaluators only (`evaluate_atif_fn`)
+- it runs evaluators concurrently
+- it returns per-evaluator `EvalOutput` objects
+
+Example:
+    ```python
+    harness = EvaluationHarness()
+    results = await harness.evaluate(
+        evaluators={"trajectory": trajectory_evaluator},
+        atif_samples=atif_samples,
+    )
+    ```
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+
+from nat.data_models.evaluator import EvalOutput
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSampleList
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvaluator
+
+logger = logging.getLogger(__name__)
+
+
+class EvaluationHarness:
+    """Run ATIF-native evaluators against a shared sample list."""
+
+    def __init__(self, logger_instance: logging.Logger | None = None):
+        self._logger = logger_instance or logger
+
+    async def _evaluate_single(self, evaluator_name: str, evaluator: AtifEvaluator,
+                               atif_samples: AtifEvalSampleList) -> tuple[str, EvalOutput] | None:
+        """Evaluate one evaluator using the ATIF lane.
+
+        Returns:
+            A tuple of evaluator name and result on success, otherwise ``None``.
+        """
+        if not callable(evaluator.evaluate_atif_fn):
+            self._logger.warning("Skipping evaluator %s: missing callable evaluate_atif_fn", evaluator_name)
+            return None
+
+        try:
+            eval_output = await evaluator.evaluate_atif_fn(atif_samples)
+            return evaluator_name, eval_output
+        except Exception:
+            # Best-effort policy: log per-evaluator failure and continue.
+            self._logger.exception("An error occurred while running evaluator %s", evaluator_name)
+            return None
+
+    async def evaluate(self, evaluators: dict[str, AtifEvaluator],
+                       atif_samples: AtifEvalSampleList) -> dict[str, EvalOutput]:
+        """Evaluate ATIF-native evaluators concurrently.
+
+        Args:
+            evaluators: Evaluators keyed by evaluator name.
+            atif_samples: Pre-built ATIF samples shared by all evaluators.
+
+        Returns:
+            A mapping of evaluator name to `EvalOutput` for successful evaluators.
+        """
+        tasks = [
+            self._evaluate_single(evaluator_name=name, evaluator=evaluator, atif_samples=atif_samples)
+            for name, evaluator in evaluators.items() if evaluator
+        ]
+        if not tasks:
+            return {}
+
+        results = await asyncio.gather(*tasks)
+        return {name: output for result in results if result is not None for name, output in [result]}
diff --git a/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/evaluate.py b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/evaluate.py
index 8256c8b8f5..46e2cf1390 100644
--- a/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/evaluate.py
+++ b/packages/nvidia_nat_eval/src/nat/plugins/eval/runtime/evaluate.py
@@ -14,9 +14,11 @@
 # limitations under the License.
 
 import asyncio
+import inspect
 import json
 import logging
 import shutil
+from collections.abc import Awaitable
 from contextlib import nullcontext
 from datetime import UTC
 from datetime import datetime
@@ -45,11 +47,16 @@
 from nat.data_models.intermediate_step import IntermediateStepType
 from nat.plugins.eval.dataset_handler.dataset_handler import DatasetHandler
 from nat.plugins.eval.eval_callbacks import EvalCallbackManager
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvaluator
+from nat.plugins.eval.evaluator.atif_evaluator import LegacyEvaluator
+from nat.plugins.eval.runtime.eval_harness import EvaluationHarness
 from nat.plugins.eval.runtime.llm_validator import validate_llm_endpoints
 from nat.plugins.eval.utils.output_uploader import OutputUploader
 from nat.runtime.session import SessionManager
 
 if TYPE_CHECKING:
+    from nat.plugins.eval.eval_callbacks import EvalCallbackManager
+    from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSampleList
     from nat.plugins.eval.exporters.file_eval_callback import FileEvalCallback
 
 logger = logging.getLogger(__name__)
@@ -73,13 +80,22 @@ def __init__(self, config: EvaluationRunConfig, callback_manager: "EvalCallbackM
         # Run-specific configuration
         self.config: EvaluationRunConfig = config
         self.callback_manager: EvalCallbackManager = callback_manager or EvalCallbackManager()
+        if self.config.write_output:
+            from nat.plugins.eval.exporters.file_eval_callback import FileEvalCallback
+            if not any(isinstance(cb, FileEvalCallback) for cb in self.callback_manager._callbacks):
+                # Keep direct `EvaluationRun(...)` behavior consistent with CLI usage.
+                self.callback_manager.register(FileEvalCallback())
         self.eval_config: EvalConfig | None = None
         self.effective_config: Config | None = None  # Stores the complete config after applying overrides
 
         # Helpers
         self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
+        from nat.plugins.eval.runtime.atif_adapter import EvalAtifAdapter
+        self.atif_adapter = EvalAtifAdapter()
+        self.evaluation_harness = EvaluationHarness()
         # Metadata
         self.eval_input: EvalInput | None = None
+        self.atif_eval_samples: AtifEvalSampleList = []
         self.workflow_interrupted: bool = False
 
         # evaluation_results is list of tuples (evaluator_name, EvalOutput)
@@ -477,6 +493,15 @@ def write_output(self, dataset_handler: DatasetHandler, profiler_results: Profil
         self.workflow_output_file = workflow_output_file
         logger.info("Workflow output written to %s", workflow_output_file)
 
+        output_config = self.eval_config.general.output
+        if output_config and output_config.write_atif_workflow_output:
+            atif_workflow_output_file = self.eval_config.general.output_dir / "workflow_output_atif.json"
+            atif_workflow_output = json.dumps([sample.model_dump(mode="json") for sample in self.atif_eval_samples],
+                                              indent=2)
+            with open(atif_workflow_output_file, "w", encoding="utf-8") as f:
+                f.write(atif_workflow_output)
+            logger.info("ATIF workflow output written to %s", atif_workflow_output_file)
+
         # Write the output of each evaluator to a separate json file
         for evaluator_name, eval_output in self.evaluation_results:
             output_file = self.eval_config.general.output_dir / f"{evaluator_name}_output.json"
@@ -506,8 +531,31 @@ def publish_output(self, dataset_handler: DatasetHandler, profiler_results: Prof
 
     async def run_single_evaluator(self, evaluator_name: str, evaluator: Any):
         """Run a single evaluator and store its results."""
+        if isinstance(evaluator, AtifEvaluator):
+            if not self.atif_eval_samples and self.eval_input is not None:
+                # Lazy-populate when run_single_evaluator is called outside run_and_evaluate.
+                self.atif_eval_samples = self.atif_adapter.build_samples(self.eval_input)
+            harness_results = await self.evaluation_harness.evaluate({evaluator_name: evaluator},
+                                                                     self.atif_eval_samples)
+            eval_output = harness_results.get(evaluator_name)
+            if eval_output is None:
+                return
+            self.evaluation_results.append((evaluator_name, eval_output))
+            if self.callback_manager:
+                await self.callback_manager.a_on_evaluator_score(eval_output=eval_output, evaluator_name=evaluator_name)
+            return
+        await self._run_single_legacy_evaluator(evaluator_name, evaluator)
+
+    async def _run_single_legacy_evaluator(self, evaluator_name: str, evaluator: Any):
+        """Run one evaluator through the legacy `evaluate_fn` lane."""
         try:
-            eval_output = await evaluator.evaluate_fn(self.eval_input)
+            evaluate_fn = getattr(evaluator, "evaluate_fn", None)
+            if not isinstance(evaluator, LegacyEvaluator):
+                raise TypeError(f"Evaluator '{evaluator_name}' is missing callable evaluate_fn and evaluate_atif_fn")
+            eval_result = evaluate_fn(self.eval_input)
+            if not inspect.isawaitable(eval_result):
+                raise TypeError(f"Evaluator '{evaluator_name}' evaluate_fn must return an awaitable")
+            eval_output = await eval_result
             self.evaluation_results.append((evaluator_name, eval_output))
             if self.callback_manager:
                 await self.callback_manager.a_on_evaluator_score(eval_output=eval_output, evaluator_name=evaluator_name)
@@ -516,14 +564,40 @@ async def run_single_evaluator(self, evaluator_name: str, evaluator: Any):
 
     async def run_evaluators(self, evaluators: dict[str, Any]):
         """Run all configured evaluators asynchronously."""
-        tasks = [self.run_single_evaluator(name, evaluator) for name, evaluator in evaluators.items() if evaluator]
+        atif_evaluators: dict[str, AtifEvaluator] = {}
+        legacy_evaluators: dict[str, LegacyEvaluator] = {}
+        for name, evaluator in evaluators.items():
+            if not evaluator:
+                continue
+            if isinstance(evaluator, AtifEvaluator):
+                atif_evaluators[name] = evaluator
+            elif isinstance(evaluator, LegacyEvaluator):
+                legacy_evaluators[name] = evaluator
+            else:
+                logger.warning("Skipping evaluator %s: missing ATIF and legacy evaluator interfaces", name)
 
-        if not tasks:
+        if not atif_evaluators and not legacy_evaluators:
             logger.warning("All evaluators were empty or invalid.")
             return
 
         try:
-            await asyncio.gather(*tasks)
+            if atif_evaluators:
+                if not self.atif_eval_samples and self.eval_input is not None:
+                    # Lazy-populate for direct callers of run_evaluators.
+                    self.atif_eval_samples = self.atif_adapter.build_samples(self.eval_input)
+                harness_results = await self.evaluation_harness.evaluate(atif_evaluators, self.atif_eval_samples)
+                for evaluator_name, eval_output in harness_results.items():
+                    self.evaluation_results.append((evaluator_name, eval_output))
+                    if self.callback_manager:
+                        await self.callback_manager.a_on_evaluator_score(eval_output=eval_output,
+                                                                         evaluator_name=evaluator_name)
+
+            if legacy_evaluators:
+                tasks: list[Awaitable[None]] = [
+                    self._run_single_legacy_evaluator(evaluator_name=name, evaluator=evaluator)
+                    for name, evaluator in legacy_evaluators.items()
+                ]
+                await asyncio.gather(*tasks)
         except Exception as e:
             logger.error("An error occurred while running evaluators: %s", e)
             raise
@@ -590,10 +664,14 @@ def _on_eval_complete(self, dataset_handler: DatasetHandler | None = None) -> No
             from nat.plugins.eval.eval_callbacks import build_eval_result
 
             workflow_output_json: str | None = None
+            atif_workflow_output_json: str | None = None
             if dataset_handler is not None and self.eval_input is not None:
                 step_filter = (self.eval_config.general.output.workflow_output_step_filter
                                if self.eval_config and self.eval_config.general.output else None)
                 workflow_output_json = dataset_handler.publish_eval_input(self.eval_input, step_filter)
+                if self.eval_config.general.output and self.eval_config.general.output.write_atif_workflow_output:
+                    atif_workflow_output_json = json.dumps(
+                        [sample.model_dump(mode="json") for sample in self.atif_eval_samples], indent=2)
 
             scores = {name: output.average_score for name, output in self.evaluation_results}
             result = build_eval_result(
@@ -603,6 +681,7 @@ def _on_eval_complete(self, dataset_handler: DatasetHandler | None = None) -> No
                 usage_stats=self.usage_stats,
                 item_span_ids=self._item_span_ids,
                 workflow_output_json=workflow_output_json,
+                atif_workflow_output_json=atif_workflow_output_json,
                 run_config=self.config,
                 effective_config=self.effective_config,
                 output_dir=(self.eval_config.general.output_dir if self.eval_config else None),
@@ -746,9 +825,20 @@ async def run_and_evaluate(self,
 
                     # Pre-evaluation process the workflow output
                     self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
+                    evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
+                    needs_atif_samples = any(
+                        callable(getattr(evaluator, "evaluate_atif_fn", None)) for evaluator in evaluators.values()
+                        if evaluator is not None)
+                    write_atif_workflow_output = bool(self.eval_config.general.output
+                                                      and self.eval_config.general.output.write_atif_workflow_output)
+                    if needs_atif_samples or write_atif_workflow_output:
+                        # Build and cache ATIF trajectories when ATIF evaluators are present or ATIF workflow export is
+                        # explicitly requested.
+                        self.atif_eval_samples = self.atif_adapter.build_samples(self.eval_input)
+                    else:
+                        self.atif_eval_samples = []
 
                     # Evaluate
-                    evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
                     await self.run_evaluators(evaluators)
 
                     # Wait for all trace export tasks to complete (local workflows only)
diff --git a/packages/nvidia_nat_eval/tests/eval/test_atif_adapter.py b/packages/nvidia_nat_eval/tests/eval/test_atif_adapter.py
new file mode 100644
index 0000000000..b0488022c7
--- /dev/null
+++ b/packages/nvidia_nat_eval/tests/eval/test_atif_adapter.py
@@ -0,0 +1,75 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nat.data_models.atif import ATIFAgentConfig
+from nat.data_models.atif import ATIFTrajectory
+from nat.data_models.evaluator import EvalInput
+from nat.data_models.evaluator import EvalInputItem
+from nat.data_models.intermediate_step import IntermediateStep
+from nat.data_models.intermediate_step import IntermediateStepPayload
+from nat.data_models.intermediate_step import IntermediateStepType
+from nat.data_models.intermediate_step import InvocationNode
+from nat.data_models.intermediate_step import StreamEventData
+from nat.plugins.eval.runtime.atif_adapter import EvalAtifAdapter
+
+
+def _make_eval_input_item(item_id: str = "item-1") -> EvalInputItem:
+    step = IntermediateStep(parent_id="root",
+                            function_ancestry=InvocationNode(function_name="llm_test", function_id="llm-test"),
+                            payload=IntermediateStepPayload(event_type=IntermediateStepType.LLM_END,
+                                                            data=StreamEventData(input="input", output="output")))
+    return EvalInputItem(id=item_id,
+                         input_obj="input",
+                         expected_output_obj="expected",
+                         output_obj="actual",
+                         trajectory=[step],
+                         full_dataset_entry={"id": item_id})
+
+
+class _CountingConverter:
+
+    def __init__(self) -> None:
+        self.calls = 0
+
+    def convert(self, steps: list[IntermediateStep], *, session_id: str | None = None, agent_name: str | None = None):
+        self.calls += 1
+        return ATIFTrajectory(session_id=session_id or "sid",
+                              agent=ATIFAgentConfig(name=agent_name or "nat-agent", version="0.0.0"))
+
+
+def test_private_ensure_cache_converts_once_per_item():
+    converter = _CountingConverter()
+    adapter = EvalAtifAdapter(converter=converter)
+    eval_input = EvalInput(eval_input_items=[_make_eval_input_item("1")])
+
+    adapter._ensure_cache(eval_input)
+    adapter._ensure_cache(eval_input)
+
+    assert converter.calls == 1
+
+
+def test_build_samples_uses_prebuilt_trajectory_without_conversion():
+    converter = _CountingConverter()
+    adapter = EvalAtifAdapter(converter=converter)
+    item = _make_eval_input_item("sample-a")
+    eval_input = EvalInput(eval_input_items=[item])
+    prebuilt = ATIFTrajectory(session_id="sample-a", agent=ATIFAgentConfig(name="prebuilt-agent", version="0.0.0"))
+
+    samples = adapter.build_samples(eval_input, prebuilt_trajectories={"sample-a": prebuilt})
+
+    assert converter.calls == 0
+    assert len(samples) == 1
+    assert samples[0].trajectory.agent.name == "prebuilt-agent"
+    assert samples[0].item_id == "sample-a"
diff --git a/packages/nvidia_nat_eval/tests/eval/test_eval_harness.py b/packages/nvidia_nat_eval/tests/eval/test_eval_harness.py
new file mode 100644
index 0000000000..ed9be89738
--- /dev/null
+++ b/packages/nvidia_nat_eval/tests/eval/test_eval_harness.py
@@ -0,0 +1,75 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest.mock import AsyncMock
+from unittest.mock import patch
+
+from nat.data_models.evaluator import EvalOutput
+from nat.data_models.evaluator import EvalOutputItem
+from nat.plugins.eval.runtime.eval_harness import EvaluationHarness
+
+
+async def test_evaluate_returns_per_evaluator_outputs():
+    """Harness returns per-evaluator outputs for successful evaluators."""
+    harness = EvaluationHarness()
+    samples = [object()]
+
+    output_a = EvalOutput(average_score=1.0, eval_output_items=[EvalOutputItem(id=1, score=1.0, reasoning={})])
+    output_b = EvalOutput(average_score=0.5, eval_output_items=[EvalOutputItem(id=1, score=0.5, reasoning={})])
+
+    evaluator_a = AsyncMock()
+    evaluator_a.evaluate_atif_fn = AsyncMock(return_value=output_a)
+    evaluator_b = AsyncMock()
+    evaluator_b.evaluate_atif_fn = AsyncMock(return_value=output_b)
+
+    results = await harness.evaluate({"A": evaluator_a, "B": evaluator_b}, samples)
+
+    assert list(results.keys()) == ["A", "B"]
+    assert results["A"] == output_a
+    assert results["B"] == output_b
+    evaluator_a.evaluate_atif_fn.assert_awaited_once_with(samples)
+    evaluator_b.evaluate_atif_fn.assert_awaited_once_with(samples)
+
+
+async def test_evaluate_best_effort_when_one_evaluator_fails():
+    """Harness continues and returns successful outputs when one evaluator fails."""
+    harness = EvaluationHarness()
+    samples = [object()]
+
+    output = EvalOutput(average_score=0.7, eval_output_items=[EvalOutputItem(id=1, score=0.7, reasoning={})])
+    good_evaluator = AsyncMock()
+    good_evaluator.evaluate_atif_fn = AsyncMock(return_value=output)
+    bad_evaluator = AsyncMock()
+    bad_evaluator.evaluate_atif_fn = AsyncMock(side_effect=RuntimeError("boom"))
+
+    with patch("nat.plugins.eval.runtime.eval_harness.logger.exception") as mock_log_exception:
+        results = await harness.evaluate({"good": good_evaluator, "bad": bad_evaluator}, samples)
+
+    assert results == {"good": output}
+    mock_log_exception.assert_called_once()
+    good_evaluator.evaluate_atif_fn.assert_awaited_once_with(samples)
+    bad_evaluator.evaluate_atif_fn.assert_awaited_once_with(samples)
+
+
+async def test_evaluate_skips_none_evaluator_entry():
+    """Harness skips falsy evaluator entries."""
+    harness = EvaluationHarness()
+    samples = [object()]
+
+    with patch("nat.plugins.eval.runtime.eval_harness.logger.warning") as mock_log_warning:
+        results = await harness.evaluate({"missing": None}, samples)
+
+    assert results == {}
+    mock_log_warning.assert_not_called()
diff --git a/packages/nvidia_nat_eval/tests/eval/test_evaluate.py b/packages/nvidia_nat_eval/tests/eval/test_evaluate.py
index cec19718bd..8777f3d12f 100644
--- a/packages/nvidia_nat_eval/tests/eval/test_evaluate.py
+++ b/packages/nvidia_nat_eval/tests/eval/test_evaluate.py
@@ -15,12 +15,15 @@
 
 import asyncio
 import inspect
+import json
 import os
 import time
 from contextlib import asynccontextmanager
 from pathlib import Path
+from types import SimpleNamespace
 from unittest.mock import AsyncMock
 from unittest.mock import MagicMock
+from unittest.mock import mock_open
 from unittest.mock import patch
 from uuid import UUID
 from uuid import uuid4
@@ -44,6 +47,7 @@
 from nat.data_models.intermediate_step import IntermediateStepType
 from nat.data_models.intermediate_step import StreamEventData
 from nat.data_models.invocation_node import InvocationNode
+from nat.plugins.eval.exporters.file_eval_callback import FileEvalCallback
 from nat.plugins.eval.runtime.evaluate import EvaluationRun
 from nat.runtime.session import SessionManager
 
@@ -93,6 +97,12 @@ def generated_answer():
     return "Generated answer"
 
 
+def test_evaluation_run_registers_file_callback_by_default(default_eval_run_config):
+    """`EvaluationRun` should register file output callback when write_output is enabled."""
+    eval_run = EvaluationRun(default_eval_run_config)
+    assert any(isinstance(cb, FileEvalCallback) for cb in eval_run.callback_manager._callbacks)
+
+
 @pytest.fixture
 def tool_end_intermediate_step():
     """Fixture to create a valid TOOL_END IntermediateStep."""
@@ -129,14 +139,18 @@ def eval_output(average_score):
 def mock_evaluator(eval_output):
     """Fixture to create a mock evaluator."""
 
-    async def mock_evaluate_fn(_eval_input):
-        return eval_output
+    class LegacyEvaluatorDouble:
 
-    # Create a mock evaluator
-    mock_evaluator = AsyncMock()
-    mock_evaluator.evaluate_fn = AsyncMock(side_effect=mock_evaluate_fn)
+        def __init__(self, output):
 
-    return mock_evaluator
+            async def mock_evaluate_fn(_eval_input):
+                return output
+
+            self.evaluate_fn = AsyncMock(side_effect=mock_evaluate_fn)
+            # Explicitly disable ATIF lane for legacy evaluator fixture.
+            self.evaluate_atif_fn = None
+
+    return LegacyEvaluatorDouble(eval_output)
 
 
 @pytest.fixture
@@ -504,6 +518,37 @@ async def test_run_single_evaluator_success(evaluation_run, mock_evaluator, eval
     assert result.average_score == average_score, f"Expected average score to be {average_score}"
 
 
+async def test_run_single_evaluator_atif_lane(evaluation_run, eval_output):
+    """ATIF evaluators should run via evaluate_atif_fn and skip legacy evaluate_fn."""
+    atif_evaluator = AsyncMock()
+    atif_evaluator.evaluate_atif_fn = AsyncMock(return_value=eval_output)
+    atif_evaluator.evaluate_fn = AsyncMock(side_effect=AssertionError("legacy path should not be called"))
+
+    with patch.object(evaluation_run.evaluation_harness, "evaluate",
+                      wraps=evaluation_run.evaluation_harness.evaluate) as mock_harness_evaluate:
+        await evaluation_run.run_single_evaluator("AtifEvaluator", atif_evaluator)
+
+    atif_evaluator.evaluate_atif_fn.assert_awaited_once()
+    atif_evaluator.evaluate_fn.assert_not_called()
+    mock_harness_evaluate.assert_awaited_once()
+    assert evaluation_run.evaluation_results[-1][0] == "AtifEvaluator"
+    assert evaluation_run.evaluation_results[-1][1] == eval_output
+
+
+async def test_run_single_evaluator_atif_lane_lazy_builds_samples(evaluation_run, eval_output):
+    """ATIF lane should lazily build samples when run outside run_and_evaluate."""
+    atif_evaluator = AsyncMock()
+    atif_evaluator.evaluate_atif_fn = AsyncMock(return_value=eval_output)
+    atif_evaluator.evaluate_fn = AsyncMock(side_effect=AssertionError("legacy path should not be called"))
+
+    with patch.object(evaluation_run.atif_adapter, "build_samples",
+                      wraps=evaluation_run.atif_adapter.build_samples) as mock_build:
+        await evaluation_run.run_single_evaluator("AtifEvaluator", atif_evaluator)
+
+    mock_build.assert_called_once()
+    atif_evaluator.evaluate_atif_fn.assert_awaited_once()
+
+
 async def test_run_evaluators_success(evaluation_run, mock_evaluator, eval_output, average_score):
     """Test for running multiple evaluators successfully."""
 
@@ -525,6 +570,26 @@ async def test_run_evaluators_success(evaluation_run, mock_evaluator, eval_outpu
         assert result.average_score == average_score, f"Expected average score to be {average_score}"
 
 
+async def test_run_evaluators_uses_harness_for_atif_evaluators(evaluation_run, eval_output):
+    """`run_evaluators` delegates ATIF evaluator execution to `EvaluationHarness`."""
+    atif_evaluator_1 = AsyncMock()
+    atif_evaluator_1.evaluate_atif_fn = AsyncMock(return_value=eval_output)
+    atif_evaluator_1.evaluate_fn = AsyncMock(side_effect=AssertionError("legacy path should not be called"))
+
+    atif_evaluator_2 = AsyncMock()
+    atif_evaluator_2.evaluate_atif_fn = AsyncMock(return_value=eval_output)
+    atif_evaluator_2.evaluate_fn = AsyncMock(side_effect=AssertionError("legacy path should not be called"))
+
+    with patch.object(evaluation_run.evaluation_harness, "evaluate",
+                      wraps=evaluation_run.evaluation_harness.evaluate) as mock_harness_evaluate:
+        await evaluation_run.run_evaluators({"Atif1": atif_evaluator_1, "Atif2": atif_evaluator_2})
+
+    mock_harness_evaluate.assert_awaited_once()
+    atif_evaluator_1.evaluate_fn.assert_not_called()
+    atif_evaluator_2.evaluate_fn.assert_not_called()
+    assert len(evaluation_run.evaluation_results) == 2
+
+
 async def test_run_evaluators_partial_failure(evaluation_run, mock_evaluator, eval_output, average_score):
     """
     Test run_evaluators where one evaluator fails but others succeed.
@@ -536,8 +601,13 @@ async def test_run_evaluators_partial_failure(evaluation_run, mock_evaluator, ev
     bad_evaluator_name = "BadEvaluator"
 
     # Create a failing evaluator
-    mock_failing_evaluator = AsyncMock()
-    mock_failing_evaluator.evaluate_fn.side_effect = RuntimeError("Evaluator failed")
+    class LegacyFailingEvaluatorDouble:
+
+        def __init__(self):
+            self.evaluate_fn = AsyncMock(side_effect=RuntimeError("Evaluator failed"))
+            self.evaluate_atif_fn = None
+
+    mock_failing_evaluator = LegacyFailingEvaluatorDouble()
 
     evaluators = {good_evaluator_name: mock_evaluator, bad_evaluator_name: mock_failing_evaluator}
 
@@ -562,6 +632,229 @@ async def test_run_evaluators_partial_failure(evaluation_run, mock_evaluator, ev
         "Error message should indicate evaluator failure"
 
 
+# Batch-3: Tests for running eval and writing results
+def test_write_output(evaluation_run, default_eval_config, eval_input, eval_output, generated_answer):
+    """Test writing the workflow and evaluation results."""
+    # Mock dataset handler to get the formatted workflow results
+    for eval_input_item in eval_input.eval_input_items:
+        eval_input_item.output_obj = generated_answer
+
+    mock_dataset_handler = MagicMock()
+    workflow_output = json.dumps([item.model_dump() for item in eval_input.eval_input_items])
+    mock_dataset_handler.publish_eval_input.return_value = workflow_output
+
+    # Mock evaluation results
+    evaluator_name = "MockEvaluator"
+    evaluation_run.evaluation_results = [(evaluator_name, eval_output)]
+
+    # Mock eval_config output directory
+    evaluation_run.eval_config = default_eval_config
+    output_dir = default_eval_config.general.output_dir
+
+    # Workflow output must be written to workflow_output.json
+    workflow_output_path = output_dir / "workflow_output.json"
+
+    # Evaluator results must be written to {evaluator_name}_output.json
+    evaluator_output_path = output_dir / f"{evaluator_name}_output.json"
+
+    # Create a mock ProfilerResults object
+    mock_profiler_results = ProfilerResults()
+
+    # Patch file operations and logging. It is important to keep logs frozen to match user expectations.
+    with patch("builtins.open", mock_open()) as mock_file, \
+         patch("pathlib.Path.mkdir") as mock_mkdir, \
+         patch("nat.plugins.eval.runtime.evaluate.logger.info") as mock_logger:
+
+        # Run the actual function
+        evaluation_run.write_output(mock_dataset_handler, mock_profiler_results)
+
+        # Ensure directories are created
+        mock_mkdir.assert_called()
+
+        # Ensure the workflow output is written
+        mock_file.assert_any_call(workflow_output_path, "w", encoding="utf-8")
+        mock_file().write.assert_any_call(workflow_output)
+
+        # Ensure the evaluator output is written
+        mock_file.assert_any_call(evaluator_output_path, "w", encoding="utf-8")
+        eval_output_dict = eval_output.model_dump_json(indent=2)
+        mock_file().write.assert_any_call(eval_output_dict)
+
+        # Ensure log format has not changed
+        mock_logger.assert_any_call("Workflow output written to %s", workflow_output_path)
+        mock_logger.assert_any_call("Evaluation results written to %s", evaluator_output_path)
+
+
+def test_write_output_writes_atif_workflow_output_when_enabled(evaluation_run,
+                                                               default_eval_config,
+                                                               eval_input,
+                                                               eval_output):
+    """Test optional ATIF workflow output export for troubleshooting."""
+    mock_dataset_handler = MagicMock()
+    mock_dataset_handler.publish_eval_input.return_value = json.dumps(
+        [item.model_dump() for item in eval_input.eval_input_items])
+
+    evaluator_name = "MockEvaluator"
+    evaluation_run.evaluation_results = [(evaluator_name, eval_output)]
+    evaluation_run.eval_config = default_eval_config
+    evaluation_run.eval_config.general.output.write_atif_workflow_output = True
+    evaluation_run.atif_eval_samples = [
+        MagicMock(model_dump=MagicMock(return_value={
+            "item_id": 1, "trajectory": {
+                "steps": []
+            }
+        }))
+    ]
+
+    output_dir = default_eval_config.general.output_dir
+    atif_workflow_output_path = output_dir / "workflow_output_atif.json"
+    expected_atif_output = json.dumps([{"item_id": 1, "trajectory": {"steps": []}}], indent=2)
+
+    mock_profiler_results = ProfilerResults()
+    with patch("builtins.open", mock_open()) as mock_file, \
+         patch("pathlib.Path.mkdir"), \
+         patch("nat.plugins.eval.runtime.evaluate.logger.info") as mock_logger:
+        evaluation_run.write_output(mock_dataset_handler, mock_profiler_results)
+
+        mock_file.assert_any_call(atif_workflow_output_path, "w", encoding="utf-8")
+        mock_file().write.assert_any_call(expected_atif_output)
+        mock_logger.assert_any_call("ATIF workflow output written to %s", atif_workflow_output_path)
+
+
+def test_write_output_handles_none_output(evaluation_run, eval_input):
+    """This test ensures that write_output does not access .output without a None check."""
+    # Setup minimal eval_config with output = None
+    evaluation_run.eval_config = SimpleNamespace(
+        general=SimpleNamespace(output=None, output_dir=Path(".tmp/nat/examples/mock/")))
+    evaluation_run.eval_input = eval_input
+    # Mock dataset handler
+    mock_dataset_handler = MagicMock()
+    mock_dataset_handler.publish_eval_input.return_value = "[]"
+    # Create a mock ProfilerResults object
+    mock_profiler_results = ProfilerResults()
+    # Patch file operations and logging
+    with patch("builtins.open", mock_open()), \
+         patch("pathlib.Path.mkdir"), \
+         patch("nat.plugins.eval.runtime.evaluate.logger.info"):
+        # Should not raise AttributeError
+        try:
+            evaluation_run.write_output(mock_dataset_handler, mock_profiler_results)
+        except AttributeError:
+            pytest.fail("write_output should not access .output without a None check")
+
+
+@pytest.mark.filterwarnings("ignore:.*Pydantic serializer warnings.*:UserWarning")
+def test_write_configuration_with_path_config(evaluation_run, default_eval_config, tmp_path):
+    """Test that write_configuration correctly saves config files when config_file is a Path."""
+    # Create a temporary config file
+    config_file = tmp_path / "test_config.yml"
+    config_file.write_text("""workflow:
+  type: test
+eval:
+  general:
+    max_concurrency: 1
+""")
+    # Setup evaluation run
+    evaluation_run.config.config_file = config_file
+    evaluation_run.config.override = (("eval.general.max_concurrency", "5"), )
+    evaluation_run.eval_config = default_eval_config
+    evaluation_run.eval_config.evaluators = {}
+    evaluation_run.eval_config.general.output_dir = tmp_path / "output"
+
+    # Create a mock effective config
+    mock_effective_config = Config()
+    mock_effective_config.eval = default_eval_config
+    evaluation_run.effective_config = mock_effective_config
+
+    # Run the function
+    with patch("nat.plugins.eval.runtime.evaluate.logger.info") as mock_logger:
+        evaluation_run.write_configuration()
+
+    # Verify that all three files were created
+    config_original_file = evaluation_run.eval_config.general.output_dir / "config_original.yml"
+    config_effective_file = evaluation_run.eval_config.general.output_dir / "config_effective.yml"
+    config_metadata_file = evaluation_run.eval_config.general.output_dir / "config_metadata.json"
+
+    assert config_original_file.exists(), "config_original.yml should be created"
+    assert config_effective_file.exists(), "config_effective.yml should be created"
+    assert config_metadata_file.exists(), "config_metadata.json should be created"
+
+    # Verify metadata content
+    with open(config_metadata_file, encoding="utf-8") as f:
+        metadata = json.load(f)
+    assert metadata["config_file"] == str(config_file)
+    assert metadata["config_file_type"] == "Path"
+    assert len(metadata["overrides"]) == 1
+    assert metadata["overrides"][0]["path"] == "eval.general.max_concurrency"
+    assert metadata["overrides"][0]["value"] == "5"
+
+    # Verify logging
+    assert mock_logger.call_count >= 3, "Should log for all three config files"
+
+
+@pytest.mark.filterwarnings("ignore:.*Pydantic serializer warnings.*:UserWarning")
+def test_write_configuration_with_basemodel_config(evaluation_run, default_eval_config, tmp_path):
+    """Test that write_configuration correctly saves config files when config_file is a BaseModel."""
+    # Setup evaluation run with BaseModel config
+    mock_config = Config()
+    default_eval_config.evaluators = {}
+    mock_config.eval = default_eval_config
+    evaluation_run.config.config_file = mock_config
+    evaluation_run.config.override = ()  # No overrides
+    evaluation_run.eval_config = default_eval_config
+    evaluation_run.eval_config.general.output_dir = tmp_path / "output"
+    evaluation_run.effective_config = mock_config
+
+    # Run the function
+    with patch("nat.plugins.eval.runtime.evaluate.logger.info"):
+        evaluation_run.write_configuration()
+
+    # Verify that all three files were created
+    config_original_file = evaluation_run.eval_config.general.output_dir / "config_original.yml"
+    config_effective_file = evaluation_run.eval_config.general.output_dir / "config_effective.yml"
+    config_metadata_file = evaluation_run.eval_config.general.output_dir / "config_metadata.json"
+
+    assert config_original_file.exists(), "config_original.yml should be created"
+    assert config_effective_file.exists(), "config_effective.yml should be created"
+    assert config_metadata_file.exists(), "config_metadata.json should be created"
+
+    # Verify metadata content
+    with open(config_metadata_file, encoding="utf-8") as f:
+        metadata = json.load(f)
+    assert metadata["config_file_type"] == "BaseModel"
+    assert len(metadata["overrides"]) == 0, "Should have no overrides"
+
+
+def test_write_configuration_handles_missing_effective_config(evaluation_run, default_eval_config, tmp_path):
+    """Test that write_configuration handles gracefully when effective_config is None."""
+    # Create a temporary config file
+    config_file = tmp_path / "test_config.yml"
+    config_file.write_text("workflow:\n  type: test\n")
+
+    # Setup evaluation run with None effective_config
+    evaluation_run.config.config_file = config_file
+    evaluation_run.eval_config = default_eval_config
+    evaluation_run.eval_config.general.output_dir = tmp_path / "output"
+    evaluation_run.effective_config = None  # This is the key test condition
+
+    # Run the function - it should not crash
+    with patch("nat.plugins.eval.runtime.evaluate.logger.info"), \
+         patch("nat.plugins.eval.runtime.evaluate.logger.warning") as mock_warning:
+        evaluation_run.write_configuration()
+
+    # Verify warning was logged
+    mock_warning.assert_any_call("Effective config not available, skipping config_effective.yml")
+
+    # Verify that original and metadata files were created but not effective
+    config_original_file = evaluation_run.eval_config.general.output_dir / "config_original.yml"
+    config_effective_file = evaluation_run.eval_config.general.output_dir / "config_effective.yml"
+    config_metadata_file = evaluation_run.eval_config.general.output_dir / "config_metadata.json"
+
+    assert config_original_file.exists(), "config_original.yml should be created"
+    assert not config_effective_file.exists(), "config_effective.yml should NOT be created when there are no overrides"
+    assert config_metadata_file.exists(), "config_metadata.json should be created"
+
+
 # Batch-3: Tests for running eval via run_and_evaluate
 @pytest.mark.parametrize("skip_workflow", [True, False])
 async def test_run_and_evaluate(evaluation_run, default_eval_config, session_manager, mock_evaluator, skip_workflow):
diff --git a/packages/nvidia_nat_eval/tests/eval/test_file_eval_callback.py b/packages/nvidia_nat_eval/tests/eval/test_file_eval_callback.py
index d3b491e0ad..0111c50511 100644
--- a/packages/nvidia_nat_eval/tests/eval/test_file_eval_callback.py
+++ b/packages/nvidia_nat_eval/tests/eval/test_file_eval_callback.py
@@ -88,6 +88,19 @@ def test_file_eval_callback_writes_workflow_output(eval_result, tmp_path):
     assert callback.workflow_output_file == output_file
 
 
+def test_file_eval_callback_writes_atif_workflow_output(eval_result, tmp_path):
+    """Test that FileEvalCallback writes workflow_output_atif.json when provided."""
+    eval_result.atif_workflow_output_json = '[{"item_id": 1, "trajectory": {"steps": []}}]'
+
+    callback = FileEvalCallback()
+    callback.on_eval_complete(eval_result)
+
+    output_file = tmp_path / "output" / "workflow_output_atif.json"
+    assert output_file.exists()
+    assert output_file.read_text() == eval_result.atif_workflow_output_json
+    assert callback.atif_workflow_output_file == output_file
+
+
 def test_file_eval_callback_writes_evaluator_outputs(eval_result, tmp_path):
     """Test that FileEvalCallback writes per-evaluator output files."""
     callback = FileEvalCallback()
diff --git a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/trajectory_evaluator.py b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/trajectory_evaluator.py
index 42c63ff9a1..54308544be 100644
--- a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/trajectory_evaluator.py
+++ b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/trajectory_evaluator.py
@@ -13,21 +13,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import asyncio
 import logging
+from collections.abc import Mapping
 
 from langchain_classic.evaluation import TrajectoryEvalChain
 from langchain_core.agents import AgentAction
 from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
+from pydantic import Field
 
 from nat.builder.builder import EvalBuilder
 from nat.builder.evaluator import EvaluatorInfo
 from nat.cli.register_workflow import register_evaluator
 from nat.data_models.evaluator import EvalInputItem
+from nat.data_models.evaluator import EvalOutput
 from nat.data_models.evaluator import EvalOutputItem
 from nat.data_models.evaluator import EvaluatorLLMConfig
 from nat.data_models.intermediate_step import IntermediateStep
 from nat.data_models.intermediate_step import IntermediateStepType
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSampleList
 from nat.plugins.eval.evaluator.base_evaluator import BaseEvaluator
 from nat.utils.exception_handlers.automatic_retries import patch_with_retry
 
@@ -36,10 +42,20 @@
 _DEFAULT_EVENT_FILTER = [IntermediateStepType.LLM_END, IntermediateStepType.TOOL_END]
 
 
+def _coerce_text(value) -> str:
+    """Best-effort coercion to text for judge-chain inputs."""
+    if value is None:
+        return ""
+    return value if isinstance(value, str) else str(value)
+
+
 class TrajectoryEvaluatorConfig(EvaluatorLLMConfig, name="trajectory"):
     """Agent trajectory evaluator configuration."""
 
-    pass
+    enable_atif_evaluator: bool = Field(
+        default=False,
+        description="Enable ATIF-native trajectory evaluator lane. Disabled by default during migration.",
+    )
 
 
 def _to_agent_actions(intermediate_steps: list[IntermediateStep]) -> list[tuple[AgentAction, str]]:
@@ -63,6 +79,89 @@ def _to_agent_actions(intermediate_steps: list[IntermediateStep]) -> list[tuple[
     return agent_actions
 
 
+def _message_to_text(message) -> str:
+    """Convert ATIF message payloads into text for LangChain trajectory scoring."""
+    if message is None:
+        return ""
+    if isinstance(message, str):
+        return message
+
+    if isinstance(message, dict):
+        parts_iterable = message.get("parts")
+        if parts_iterable is None:
+            parts_iterable = [message]
+    else:
+        parts_iterable = message
+
+    text_parts: list[str] = []
+    for part in parts_iterable:
+        part_type = getattr(part, "type", None)
+        part_text = getattr(part, "text", None)
+        part_source = getattr(part, "source", None)
+
+        if isinstance(part, dict):
+            part_type = part.get("type", part_type)
+            part_text = part.get("text", part_text)
+            part_source = part.get("source", part_source)
+
+        if part_type == "text" and isinstance(part_text, str) and part_text:
+            text_parts.append(part_text)
+            continue
+
+        if part_type == "image":
+            source_path = getattr(part_source, "path", None)
+            if isinstance(part_source, dict):
+                source_path = part_source.get("path", source_path)
+            if isinstance(source_path, str) and source_path:
+                text_parts.append(source_path)
+    return "\n".join(text_parts)
+
+
+def _atif_to_agent_actions(trajectory) -> list[tuple[AgentAction, str]]:
+    """Convert an ATIF trajectory into LangChain `agent_trajectory` tuples."""
+    agent_actions: list[tuple[AgentAction, str]] = []
+    for step in trajectory.steps:
+        if step.source != "agent":
+            continue
+
+        agent_message = _message_to_text(step.message)
+        if step.model_name or agent_message:
+            llm_action = AgentAction(tool=step.model_name or "", tool_input="", log="")
+            agent_actions.append((llm_action, agent_message))
+
+        if not step.tool_calls:
+            continue
+
+        observation_by_call_id: dict[str, str] = {}
+        if step.observation:
+            for result in step.observation.results:
+                if result.source_call_id:
+                    observation_by_call_id[result.source_call_id] = _message_to_text(result.content)
+
+        for tool_call in step.tool_calls:
+            if isinstance(tool_call.arguments, dict):
+                tool_input = tool_call.arguments
+            elif isinstance(tool_call.arguments, Mapping):
+                tool_input = dict(tool_call.arguments)
+            else:
+                tool_input = str(tool_call.arguments)
+            action = AgentAction(tool=tool_call.function_name, tool_input=tool_input, log=agent_message)
+            tool_output = observation_by_call_id.get(tool_call.tool_call_id, "")
+            agent_actions.append((action, tool_output))
+
+    return agent_actions
+
+
+def _atif_to_user_input(trajectory) -> str:
+    """Extract first user message from ATIF trajectory."""
+    for step in trajectory.steps:
+        if step.source == "user":
+            text = _message_to_text(step.message)
+            if text:
+                return text
+    return ""
+
+
 class TrajectoryEvaluator(BaseEvaluator):
 
     def __init__(self, llm: BaseChatModel, tools: list[BaseTool] | None = None, max_concurrency: int = 8):
@@ -72,24 +171,62 @@ def __init__(self, llm: BaseChatModel, tools: list[BaseTool] | None = None, max_
                                                             return_reasoning=True,
                                                             requires_reference=True)
 
-    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
-        question = item.input_obj
-        generated_answer = item.output_obj
-        agent_trajectory = _to_agent_actions(item.trajectory)
-
+    async def _evaluate_with_trajectory(self,
+                                        item_id,
+                                        lane: str,
+                                        question: str,
+                                        generated_answer: str,
+                                        agent_trajectory: list[tuple[AgentAction, str]]) -> EvalOutputItem:
+        """Run trajectory scoring for one item regardless of input lane."""
+        question_text = _coerce_text(question)
+        generated_answer_text = _coerce_text(generated_answer)
         try:
-            eval_result = await self.traj_eval_chain.aevaluate_agent_trajectory(input=question,
+            eval_result = await self.traj_eval_chain.aevaluate_agent_trajectory(input=question_text,
                                                                                 agent_trajectory=agent_trajectory,
-                                                                                prediction=generated_answer)
+                                                                                prediction=generated_answer_text)
         except Exception as e:
-            logger.exception("Error evaluating trajectory for question: %s, Error: %s", question, e)
-            return EvalOutputItem(id=item.id, score=0.0, reasoning={}, error=str(e))
+            # Some judge models occasionally miss the strict "Score: " suffix
+            # expected by LangChain's legacy trajectory parser.
+            if isinstance(e, ValueError) and "not enough values to unpack" in str(e):
+                logger.warning("Trajectory judge output parsing failed [lane=%s item_id=%s]: %s", lane, item_id, e)
+            else:
+                logger.exception("Error evaluating trajectory [lane=%s item_id=%s]", lane, item_id)
+            return EvalOutputItem(id=item_id, score=0.0, reasoning={}, error=str(e))
 
         reasoning = {
             "reasoning": eval_result["reasoning"],
             "trajectory": [(action.model_dump(), output) for (action, output) in agent_trajectory],
         }
-        return EvalOutputItem(id=item.id, score=eval_result["score"], reasoning=reasoning)
+        return EvalOutputItem(id=item_id, score=eval_result["score"], reasoning=reasoning)
+
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
+        question = item.input_obj
+        generated_answer = item.output_obj
+        agent_trajectory = _to_agent_actions(item.trajectory)
+        return await self._evaluate_with_trajectory(item.id, "legacy", question, generated_answer, agent_trajectory)
+
+    async def evaluate_atif_item(self, sample: AtifEvalSample) -> EvalOutputItem:
+        """Evaluate a single ATIF-native sample."""
+        question = _atif_to_user_input(sample.trajectory)
+        generated_answer = sample.output_obj if sample.output_obj is not None else ""
+        agent_trajectory = _atif_to_agent_actions(sample.trajectory)
+        return await self._evaluate_with_trajectory(sample.item_id,
+                                                    "atif",
+                                                    question,
+                                                    generated_answer,
+                                                    agent_trajectory)
+
+    async def evaluate_atif_fn(self, atif_samples: AtifEvalSampleList) -> EvalOutput:
+        """ATIF-native evaluation lane for trajectory scoring."""
+
+        async def wrapped(sample: AtifEvalSample) -> EvalOutputItem:
+            async with self.semaphore:
+                return await self.evaluate_atif_item(sample)
+
+        output_items = await asyncio.gather(*[wrapped(sample) for sample in atif_samples])
+        numeric_scores = [item.score for item in output_items if isinstance(item.score, int | float)]
+        avg_score = round(sum(numeric_scores) / len(numeric_scores), 2) if numeric_scores else None
+        return EvalOutput(average_score=avg_score, eval_output_items=output_items)
 
 
 @register_evaluator(config_type=TrajectoryEvaluatorConfig)
@@ -107,4 +244,7 @@ async def register_trajectory_evaluator(config: TrajectoryEvaluatorConfig, build
 
     tools = await builder.get_all_tools(wrapper_type=LLMFrameworkEnum.LANGCHAIN)
     evaluator = TrajectoryEvaluator(llm=llm, tools=tools, max_concurrency=builder.get_max_concurrency())
-    yield EvaluatorInfo(config=config, evaluate_fn=evaluator.evaluate, description="Trajectory Evaluator")
+    evaluator_info = EvaluatorInfo(config=config, evaluate_fn=evaluator.evaluate, description="Trajectory Evaluator")
+    if config.enable_atif_evaluator:
+        evaluator_info.evaluate_atif_fn = evaluator.evaluate_atif_fn
+    yield evaluator_info
diff --git a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/tunable_rag_evaluator.py b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/tunable_rag_evaluator.py
index 8427c9d111..323fe3762b 100644
--- a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/tunable_rag_evaluator.py
+++ b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/eval/tunable_rag_evaluator.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import asyncio
 import logging
 from collections.abc import Callable
 
@@ -28,11 +29,19 @@
 from nat.builder.evaluator import EvaluatorInfo
 from nat.builder.framework_enum import LLMFrameworkEnum
 from nat.cli.register_workflow import register_evaluator
+from nat.data_models.atif import ATIFContentPart
+from nat.data_models.atif import ATIFTrajectory
 from nat.data_models.component_ref import LLMRef
 from nat.data_models.evaluator import EvalInputItem
+from nat.data_models.evaluator import EvalOutput
 from nat.data_models.evaluator import EvalOutputItem
 from nat.data_models.evaluator import EvaluatorBaseConfig
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSampleList
 from nat.plugins.eval.evaluator.base_evaluator import BaseEvaluator
+from nat.utils.atif_message_utils import content_part_to_text
+from nat.utils.atif_message_utils import message_to_text
+from nat.utils.atif_message_utils import trajectory_to_user_input
 
 logger = logging.getLogger(__name__)
 
@@ -52,6 +61,10 @@ class TunableRagEvaluatorConfig(EvaluatorBaseConfig, name="tunable_rag_evaluator
         },
         description="Weights for different scoring components when using default scoring",
     )
+    enable_atif_evaluator: bool = Field(
+        default=False,
+        description="Enable ATIF-native tunable RAG evaluator lane. Disabled by default during migration.",
+    )
 
 
 def evaluation_prompt(judge_llm_prompt: str,
@@ -139,10 +152,8 @@ def __init__(self,
             "relevance": 1 / 3,
         }
 
-    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
-        question = item.input_obj
-        answer_description = item.expected_output_obj
-        generated_answer = item.output_obj
+    async def _evaluate_item_core(self, item_id, question: str, answer_description: str,
+                                  generated_answer: str) -> EvalOutputItem:
         score = 0.0
 
         default_evaluation_schema = [
@@ -246,7 +257,42 @@ async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
                 "reasoning": reasoning,
             }
 
-        return EvalOutputItem(id=item.id, score=score, reasoning=reasoning_obj)
+        return EvalOutputItem(id=item_id, score=score, reasoning=reasoning_obj)
+
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
+        question = str(item.input_obj) if item.input_obj is not None else ""
+        answer_description = str(item.expected_output_obj) if item.expected_output_obj is not None else ""
+        generated_answer = str(item.output_obj) if item.output_obj is not None else ""
+        return await self._evaluate_item_core(item.id, question, answer_description, generated_answer)
+
+    @staticmethod
+    def _content_part_to_text(part: ATIFContentPart) -> str:
+        return content_part_to_text(part)
+
+    @classmethod
+    def _message_to_text(cls, message: str | list[ATIFContentPart] | None) -> str:
+        return message_to_text(message)
+
+    @classmethod
+    def _trajectory_to_user_input(cls, trajectory: ATIFTrajectory) -> str:
+        return trajectory_to_user_input(trajectory)
+
+    async def evaluate_atif_item(self, sample: AtifEvalSample) -> EvalOutputItem:
+        question = self._trajectory_to_user_input(sample.trajectory)
+        answer_description = str(sample.expected_output_obj) if sample.expected_output_obj is not None else ""
+        generated_answer = str(sample.output_obj) if sample.output_obj is not None else ""
+        return await self._evaluate_item_core(sample.item_id, question, answer_description, generated_answer)
+
+    async def evaluate_atif_fn(self, atif_samples: AtifEvalSampleList) -> EvalOutput:
+
+        async def wrapped(sample: AtifEvalSample) -> EvalOutputItem:
+            async with self.semaphore:
+                return await self.evaluate_atif_item(sample)
+
+        output_items = await asyncio.gather(*[wrapped(sample) for sample in atif_samples])
+        numeric_scores = [item.score for item in output_items if isinstance(item.score, int | float)]
+        avg_score = round(sum(numeric_scores) / len(numeric_scores), 2) if numeric_scores else None
+        return EvalOutput(average_score=avg_score, eval_output_items=output_items)
 
 
 @register_evaluator(config_type=TunableRagEvaluatorConfig)
@@ -259,4 +305,7 @@ async def register_tunable_rag_evaluator(config: TunableRagEvaluatorConfig, buil
                                     max_concurrency=builder.get_max_concurrency(),
                                     default_scoring=config.default_scoring,
                                     default_score_weights=config.default_score_weights)
-    yield EvaluatorInfo(config=config, evaluate_fn=evaluator.evaluate, description="Tunable RAG Evaluator")
+    evaluator_info = EvaluatorInfo(config=config, evaluate_fn=evaluator.evaluate, description="Tunable RAG Evaluator")
+    if config.enable_atif_evaluator:
+        evaluator_info.evaluate_atif_fn = evaluator.evaluate_atif_fn
+    yield evaluator_info
diff --git a/packages/nvidia_nat_langchain/tests/eval/test_trajectory_evaluate.py b/packages/nvidia_nat_langchain/tests/eval/test_trajectory_evaluate.py
index 4bd9fcec68..b6e910a2a8 100644
--- a/packages/nvidia_nat_langchain/tests/eval/test_trajectory_evaluate.py
+++ b/packages/nvidia_nat_langchain/tests/eval/test_trajectory_evaluate.py
@@ -21,10 +21,24 @@
 from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 
+from nat.data_models.atif import ATIFAgentConfig
+from nat.data_models.atif import ATIFObservation
+from nat.data_models.atif import ATIFObservationResult
+from nat.data_models.atif import ATIFStep
+from nat.data_models.atif import ATIFToolCall
+from nat.data_models.atif import ATIFTrajectory
 from nat.data_models.evaluator import EvalInput
 from nat.data_models.evaluator import EvalInputItem
 from nat.data_models.evaluator import EvalOutput
+from nat.data_models.intermediate_step import IntermediateStep
+from nat.data_models.intermediate_step import IntermediateStepPayload
+from nat.data_models.intermediate_step import IntermediateStepType
+from nat.data_models.intermediate_step import StreamEventData
+from nat.data_models.invocation_node import InvocationNode
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
 from nat.plugins.langchain.eval.trajectory_evaluator import TrajectoryEvaluator
+from nat.plugins.langchain.eval.trajectory_evaluator import TrajectoryEvaluatorConfig
+from nat.plugins.langchain.eval.trajectory_evaluator import register_trajectory_evaluator
 
 
 @pytest.fixture(name="mock_llm")
@@ -118,3 +132,167 @@ async def test_trajectory_evaluate_failure(trajectory_evaluator, rag_eval_input)
         assert error_message in failed_item.error
         assert successful_item.score == pytest.approx(0.8)
         assert successful_item.reasoning["reasoning"] == "LGTM"
+
+
+@pytest.fixture(name="atif_samples")
+def fixture_atif_samples():
+    return [
+        AtifEvalSample(
+            item_id="1",
+            trajectory=ATIFTrajectory(
+                session_id="atif-1",
+                agent=ATIFAgentConfig(name="test-agent", version="0.0.0"),
+                steps=[
+                    ATIFStep(step_id=1, source="user", message="What is AI?"),
+                    ATIFStep(
+                        step_id=2,
+                        source="agent",
+                        model_name="mock-llm",
+                        message="AI is artificial intelligence.",
+                        tool_calls=[
+                            ATIFToolCall(
+                                tool_call_id="call-1",
+                                function_name="web_search",
+                                arguments={"query": "artificial intelligence"},
+                            )
+                        ],
+                        observation=ATIFObservation(
+                            results=[ATIFObservationResult(source_call_id="call-1", content="Search results context")]),
+                    ),
+                ],
+            ),
+            expected_output_obj="Artificial intelligence.",
+            output_obj="AI is artificial intelligence.",
+            metadata={},
+        ),
+        AtifEvalSample(
+            item_id="2",
+            trajectory=ATIFTrajectory(
+                session_id="atif-2",
+                agent=ATIFAgentConfig(name="test-agent", version="0.0.0"),
+                steps=[
+                    ATIFStep(step_id=1, source="user", message="What is ML?"),
+                    ATIFStep(step_id=2, source="agent", model_name="mock-llm", message="ML is a subset of AI."),
+                ],
+            ),
+            expected_output_obj="Machine learning.",
+            output_obj="ML is a subset of AI.",
+            metadata={},
+        ),
+    ]
+
+
+async def test_trajectory_evaluate_atif_success(trajectory_evaluator, atif_samples):
+    scores = [
+        {
+            "score": 0.9, "reasoning": "atif-1"
+        },
+        {
+            "score": 0.8, "reasoning": "atif-2"
+        },
+    ]
+    expected_average = (0.9 + 0.8) / 2
+
+    with patch.object(trajectory_evaluator, "traj_eval_chain") as mock_traj_eval_chain:
+        mock_traj_eval_chain.aevaluate_agent_trajectory = AsyncMock(side_effect=scores)
+        eval_output = await trajectory_evaluator.evaluate_atif_fn(atif_samples)
+
+    assert isinstance(eval_output, EvalOutput)
+    assert len(eval_output.eval_output_items) == 2
+    assert eval_output.average_score == pytest.approx(expected_average)
+    assert eval_output.eval_output_items[0].score == pytest.approx(0.9)
+    assert eval_output.eval_output_items[1].score == pytest.approx(0.8)
+    assert eval_output.eval_output_items[0].reasoning["reasoning"] == "atif-1"
+    assert eval_output.eval_output_items[1].reasoning["reasoning"] == "atif-2"
+    assert mock_traj_eval_chain.aevaluate_agent_trajectory.call_count == 2
+
+
+async def test_trajectory_legacy_and_atif_lane_parity_with_tolerance(trajectory_evaluator):
+    llm_end_step = IntermediateStep(parent_id="root",
+                                    function_ancestry=InvocationNode(function_name="llm_test",
+                                                                     function_id="test-llm-end"),
+                                    payload=IntermediateStepPayload(event_type=IntermediateStepType.LLM_END,
+                                                                    name="mock-llm",
+                                                                    data=StreamEventData(input="What is AI?",
+                                                                                         output="AI answer")))
+    tool_end_step = IntermediateStep(parent_id="root",
+                                     function_ancestry=InvocationNode(function_name="tool_test",
+                                                                      function_id="test-tool-end"),
+                                     payload=IntermediateStepPayload(event_type=IntermediateStepType.TOOL_END,
+                                                                     name="web_search",
+                                                                     data=StreamEventData(
+                                                                         input={"query": "What is AI?"},
+                                                                         output="Search results context")))
+    legacy_eval_input = EvalInput(eval_input_items=[
+        EvalInputItem(id="1",
+                      input_obj="What is AI?",
+                      expected_output_obj="Artificial intelligence.",
+                      output_obj="AI answer",
+                      expected_trajectory=[],
+                      trajectory=[llm_end_step, tool_end_step],
+                      full_dataset_entry={})
+    ])
+
+    atif_samples = [
+        AtifEvalSample(
+            item_id="1",
+            trajectory=ATIFTrajectory(
+                session_id="atif-parity-1",
+                agent=ATIFAgentConfig(name="test-agent", version="0.0.0"),
+                steps=[
+                    ATIFStep(step_id=1, source="user", message="What is AI?"),
+                    ATIFStep(
+                        step_id=2,
+                        source="agent",
+                        model_name="mock-llm",
+                        message="AI answer",
+                        tool_calls=[
+                            ATIFToolCall(tool_call_id="call-1",
+                                         function_name="web_search",
+                                         arguments={"query": "What is AI?"})
+                        ],
+                        observation=ATIFObservation(
+                            results=[ATIFObservationResult(source_call_id="call-1", content="Search results context")]),
+                    ),
+                ],
+            ),
+            expected_output_obj="Artificial intelligence.",
+            output_obj="AI answer",
+            metadata={},
+        )
+    ]
+
+    async def score_from_trajectory(*, input, agent_trajectory, prediction):  # noqa: ARG001
+        return {"score": float(len(agent_trajectory)), "reasoning": "trajectory-size"}
+
+    with patch.object(trajectory_evaluator, "traj_eval_chain") as mock_traj_eval_chain:
+        mock_traj_eval_chain.aevaluate_agent_trajectory = AsyncMock(side_effect=score_from_trajectory)
+        legacy_output = await trajectory_evaluator.evaluate(legacy_eval_input)
+        atif_output = await trajectory_evaluator.evaluate_atif_fn(atif_samples)
+
+    assert legacy_output.average_score == pytest.approx(atif_output.average_score, abs=0.01)
+    assert legacy_output.eval_output_items[0].score == pytest.approx(atif_output.eval_output_items[0].score, abs=0.01)
+
+
+async def test_register_trajectory_evaluator_exposes_legacy_lane_by_default(mock_llm, mock_tools):
+    config = TrajectoryEvaluatorConfig(llm_name="judge_llm")
+    builder = MagicMock(spec=["get_llm", "get_max_concurrency", "get_all_tools"])
+    builder.get_llm = AsyncMock(return_value=mock_llm)
+    builder.get_all_tools = AsyncMock(return_value=mock_tools)
+    builder.get_max_concurrency.return_value = 2
+
+    async with register_trajectory_evaluator(config, builder) as info:
+        assert callable(info.evaluate_fn)
+        assert not callable(getattr(info, "evaluate_atif_fn", None))
+
+
+async def test_register_trajectory_evaluator_exposes_atif_lane_when_enabled(mock_llm, mock_tools):
+    config = TrajectoryEvaluatorConfig(llm_name="judge_llm", enable_atif_evaluator=True)
+    builder = MagicMock(spec=["get_llm", "get_max_concurrency", "get_all_tools"])
+    builder.get_llm = AsyncMock(return_value=mock_llm)
+    builder.get_all_tools = AsyncMock(return_value=mock_tools)
+    builder.get_max_concurrency.return_value = 2
+
+    async with register_trajectory_evaluator(config, builder) as info:
+        assert callable(info.evaluate_fn)
+        assert callable(getattr(info, "evaluate_atif_fn", None))
diff --git a/packages/nvidia_nat_langchain/tests/eval/test_tunable_rag_evaluate.py b/packages/nvidia_nat_langchain/tests/eval/test_tunable_rag_evaluate.py
index 9252882f9b..09110c3e7a 100644
--- a/packages/nvidia_nat_langchain/tests/eval/test_tunable_rag_evaluate.py
+++ b/packages/nvidia_nat_langchain/tests/eval/test_tunable_rag_evaluate.py
@@ -19,10 +19,16 @@
 import pytest
 from langchain_core.language_models import BaseChatModel
 
+from nat.data_models.atif import ATIFAgentConfig
+from nat.data_models.atif import ATIFStep
+from nat.data_models.atif import ATIFTrajectory
 from nat.data_models.evaluator import EvalInput
 from nat.data_models.evaluator import EvalInputItem
 from nat.data_models.evaluator import EvalOutput
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
 from nat.plugins.langchain.eval.tunable_rag_evaluator import TunableRagEvaluator
+from nat.plugins.langchain.eval.tunable_rag_evaluator import TunableRagEvaluatorConfig
+from nat.plugins.langchain.eval.tunable_rag_evaluator import register_tunable_rag_evaluator
 
 
 @pytest.fixture
@@ -157,3 +163,98 @@ async def test_evaluate_custom_scoring():
     assert len(output.eval_output_items) == 1
     assert output.eval_output_items[0].score == 0.75
     assert output.eval_output_items[0].reasoning["reasoning"] == "Fair explanation."
+
+
+@pytest.fixture(name="atif_samples")
+def fixture_atif_samples():
+    return [
+        AtifEvalSample(
+            item_id="1",
+            trajectory=ATIFTrajectory(
+                session_id="atif-1",
+                agent=ATIFAgentConfig(name="test-agent", version="0.0.0"),
+                steps=[
+                    ATIFStep(step_id=1, source="user", message="What is AI?"),
+                    ATIFStep(step_id=2, source="agent", message="AI is the simulation of human intelligence."),
+                ],
+            ),
+            expected_output_obj="AI is artificial intelligence.",
+            output_obj="AI is the simulation of human intelligence.",
+            metadata={},
+        ),
+        AtifEvalSample(
+            item_id="2",
+            trajectory=ATIFTrajectory(
+                session_id="atif-2",
+                agent=ATIFAgentConfig(name="test-agent", version="0.0.0"),
+                steps=[
+                    ATIFStep(step_id=1, source="user", message="Define ML"),
+                    ATIFStep(step_id=2, source="agent", message="ML helps machines learn."),
+                ],
+            ),
+            expected_output_obj="Machine Learning is a subset of AI.",
+            output_obj="ML helps machines learn.",
+            metadata={},
+        ),
+    ]
+
+
+async def test_evaluate_atif_success(evaluator, atif_samples):
+    evaluator.llm.ainvoke = AsyncMock(side_effect=[
+        MagicMock(content='{"coverage_score": 0.9, "correctness_score": 0.8,'
+                  '"relevance_score": 0.7, "reasoning": "ATIF sample 1"}'),
+        MagicMock(content='{"coverage_score": 0.6, "correctness_score": 0.7,'
+                  '"relevance_score": 0.8, "reasoning": "ATIF sample 2"}')
+    ])
+
+    eval_output: EvalOutput = await evaluator.evaluate_atif_fn(atif_samples)
+    assert isinstance(eval_output, EvalOutput)
+    assert len(eval_output.eval_output_items) == 2
+    assert eval_output.eval_output_items[0].score > 0
+    assert eval_output.eval_output_items[1].score > 0
+    assert eval_output.average_score > 0
+
+
+async def test_legacy_and_atif_lane_parity_with_tolerance(evaluator, rag_eval_input, atif_samples):
+    # Two legacy evaluations then two ATIF evaluations with identical per-item judge outputs.
+    evaluator.llm.ainvoke = AsyncMock(side_effect=[
+        MagicMock(content='{"coverage_score": 0.9, "correctness_score": 0.8,'
+                  '"relevance_score": 0.7, "reasoning": "shared-1"}'),
+        MagicMock(content='{"coverage_score": 0.6, "correctness_score": 0.7,'
+                  '"relevance_score": 0.8, "reasoning": "shared-2"}'),
+        MagicMock(content='{"coverage_score": 0.9, "correctness_score": 0.8,'
+                  '"relevance_score": 0.7, "reasoning": "shared-1"}'),
+        MagicMock(content='{"coverage_score": 0.6, "correctness_score": 0.7,'
+                  '"relevance_score": 0.8, "reasoning": "shared-2"}'),
+    ])
+
+    legacy_output = await evaluator.evaluate(rag_eval_input)
+    atif_output = await evaluator.evaluate_atif_fn(atif_samples)
+
+    assert legacy_output.average_score == pytest.approx(atif_output.average_score, abs=0.01)
+    assert legacy_output.eval_output_items[0].score == pytest.approx(atif_output.eval_output_items[0].score, abs=0.01)
+    assert legacy_output.eval_output_items[1].score == pytest.approx(atif_output.eval_output_items[1].score, abs=0.01)
+
+
+async def test_register_tunable_rag_evaluator_exposes_legacy_lane_by_default(mock_llm):
+    config = TunableRagEvaluatorConfig(llm_name="judge_llm", judge_llm_prompt="Score this answer.")
+    builder = MagicMock(spec=["get_llm", "get_max_concurrency"])
+    builder.get_llm = AsyncMock(return_value=mock_llm)
+    builder.get_max_concurrency.return_value = 2
+
+    async with register_tunable_rag_evaluator(config, builder) as info:
+        assert callable(info.evaluate_fn)
+        assert not callable(getattr(info, "evaluate_atif_fn", None))
+
+
+async def test_register_tunable_rag_evaluator_exposes_atif_lane_when_enabled(mock_llm):
+    config = TunableRagEvaluatorConfig(llm_name="judge_llm",
+                                       judge_llm_prompt="Score this answer.",
+                                       enable_atif_evaluator=True)
+    builder = MagicMock(spec=["get_llm", "get_max_concurrency"])
+    builder.get_llm = AsyncMock(return_value=mock_llm)
+    builder.get_max_concurrency.return_value = 2
+
+    async with register_tunable_rag_evaluator(config, builder) as info:
+        assert callable(info.evaluate_fn)
+        assert callable(getattr(info, "evaluate_atif_fn", None))
diff --git a/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/atif_evaluate.py b/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/atif_evaluate.py
new file mode 100644
index 0000000000..44a727061e
--- /dev/null
+++ b/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/atif_evaluate.py
@@ -0,0 +1,113 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import typing
+from collections.abc import Sequence
+
+from tqdm import tqdm
+
+from nat.data_models.atif import ATIFObservationResult
+from nat.data_models.atif import ATIFTrajectory
+from nat.data_models.evaluator import EvalOutput
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSampleList
+from nat.plugins.eval.utils.tqdm_position_registry import TqdmPositionRegistry
+from nat.utils.atif_message_utils import message_to_text
+from nat.utils.atif_message_utils import trajectory_to_user_input
+
+from .evaluate import _ragas_results_to_eval_output
+
+if typing.TYPE_CHECKING:
+    from ragas import EvaluationDataset
+    from ragas.llms import LangchainLLMWrapper
+    from ragas.metrics import Metric
+
+logger = logging.getLogger(__name__)
+
+
+def _observation_result_to_text(result: ATIFObservationResult) -> str:
+    return message_to_text(result.content)
+
+
+def _trajectory_to_retrieved_contexts(trajectory: ATIFTrajectory) -> list[str]:
+    contexts: list[str] = []
+    for step in trajectory.steps:
+        if not step.observation:
+            continue
+        for result in step.observation.results:
+            text = _observation_result_to_text(result)
+            if text:
+                contexts.append(text)
+    return contexts
+
+
+class RAGAtifEvaluator:
+
+    def __init__(self, evaluator_llm: "LangchainLLMWrapper", metrics: Sequence["Metric"], max_concurrency=8):
+        self.evaluator_llm = evaluator_llm
+        self.metrics = metrics
+        self.max_concurrency = max_concurrency
+
+    def atif_samples_to_ragas(self, atif_samples: AtifEvalSampleList) -> "EvaluationDataset":
+        """Converts ATIF-native samples into a Ragas-compatible EvaluationDataset."""
+        from ragas import EvaluationDataset
+        from ragas import SingleTurnSample
+
+        samples = []
+        for sample in atif_samples:
+            user_input = trajectory_to_user_input(sample.trajectory)
+            reference = sample.expected_output_obj
+            response = sample.output_obj
+            reference_contexts = [""]
+            retrieved_contexts = _trajectory_to_retrieved_contexts(sample.trajectory)
+            ragas_sample = SingleTurnSample(
+                user_input=user_input,
+                reference=reference,
+                response=response,
+                reference_contexts=reference_contexts,
+                retrieved_contexts=retrieved_contexts,
+            )
+            samples.append(ragas_sample)
+        return EvaluationDataset(samples=samples)
+
+    async def evaluate(self, atif_samples: AtifEvalSampleList) -> EvalOutput:
+        """Run Ragas metrics evaluation on ATIF-native samples."""
+        from ragas import evaluate as ragas_evaluate
+        from ragas.run_config import RunConfig
+
+        ragas_dataset = self.atif_samples_to_ragas(atif_samples)
+        tqdm_position = TqdmPositionRegistry.claim()
+        first_metric_name = self.metrics[0].name if self.metrics else "no-metrics"
+        pbar = tqdm(total=len(ragas_dataset), desc=f"Evaluating Ragas {first_metric_name}", position=tqdm_position)
+        try:
+            if not self.metrics:
+                logger.warning("No RAGAS metrics configured for ATIF evaluator; returning empty metric results.")
+                results_dataset = None
+            else:
+                results_dataset = ragas_evaluate(dataset=ragas_dataset,
+                                                 metrics=self.metrics,
+                                                 show_progress=True,
+                                                 llm=self.evaluator_llm,
+                                                 run_config=RunConfig(max_workers=self.max_concurrency),
+                                                 _pbar=pbar)
+        except Exception:
+            logger.exception("Error evaluating ATIF ragas metric")
+            results_dataset = None
+        finally:
+            pbar.close()
+            TqdmPositionRegistry.release(tqdm_position)
+
+        ids = [sample.item_id for sample in atif_samples]
+        return _ragas_results_to_eval_output(results_dataset=results_dataset, ids=ids)
diff --git a/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/evaluate.py b/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/evaluate.py
index 52535cb281..edf94104a7 100644
--- a/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/evaluate.py
+++ b/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/evaluate.py
@@ -17,6 +17,7 @@
 import math
 import typing
 from collections.abc import Sequence
+from typing import Any
 
 from pydantic import BaseModel
 from tqdm import tqdm
@@ -39,6 +40,49 @@
 logger = logging.getLogger(__name__)
 
 
+def _nan_to_zero(v: float | None) -> float:
+    """Convert NaN or None to 0.0 for safe arithmetic/serialization."""
+    return 0.0 if v is None or (isinstance(v, float) and math.isnan(v)) else v
+
+
+def _ragas_results_to_eval_output(results_dataset: "EvaluationResult | None",
+                                  ids: list[Any] | None = None) -> EvalOutput:
+    """Convert a ragas EvaluationResult to NAT EvalOutput."""
+    if not results_dataset:
+        logger.error("Ragas evaluation failed with no results", exc_info=True)
+        return EvalOutput(average_score=0.0, eval_output_items=[])
+
+    scores: list[dict[str, float]] = results_dataset.scores
+    if not scores:
+        logger.warning("Ragas returned empty score list")
+        return EvalOutput(average_score=0.0, eval_output_items=[])
+
+    original_scores_dict = {metric: [score.get(metric) for score in scores] for metric in scores[0]}
+    scores_dict = {metric: [_nan_to_zero(score.get(metric)) for score in scores] for metric in scores[0]}
+    first_metric_name = next(iter(scores_dict.keys()), None)
+
+    average_scores = {metric: (sum(values) / len(values) if values else 0.0) for metric, values in scores_dict.items()}
+    first_avg_score = average_scores.get(first_metric_name, 0.0)
+    if isinstance(first_avg_score, float) and math.isnan(first_avg_score):
+        first_avg_score = 0.0
+
+    df = results_dataset.to_pandas()
+    fallback_ids = df["user_input"].tolist()
+    output_ids = ids if ids and len(ids) >= len(df) else fallback_ids
+
+    eval_output_items = [
+        EvalOutputItem(
+            id=output_ids[i],
+            score=original_scores_dict[first_metric_name][i] if first_metric_name else None,
+            reasoning={
+                key: getattr(row, key, None)
+                for key in ["user_input", "reference", "response", "retrieved_contexts"]
+            },
+        ) for i, row in enumerate(df.itertuples(index=False))
+    ]
+    return EvalOutput(average_score=first_avg_score, eval_output_items=eval_output_items)
+
+
 class RAGEvaluator:
 
     def __init__(self,
@@ -106,59 +150,8 @@ def eval_input_to_ragas(self, eval_input: EvalInput) -> "EvaluationDataset":
 
     def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: "EvaluationResult | None") -> EvalOutput:
         """Converts the ragas EvaluationResult to nat EvalOutput"""
-
-        if not results_dataset:
-            logger.error("Ragas evaluation failed with no results", exc_info=True)
-            return EvalOutput(average_score=0.0, eval_output_items=[])
-
-        scores: list[dict[str, float]] = results_dataset.scores
-
-        # If Ragas returned no scores, return empty output to avoid downstream errors
-        if not scores:
-            logger.warning("Ragas returned empty score list")
-            return EvalOutput(average_score=0.0, eval_output_items=[])
-
-        def _nan_to_zero(v: float | None) -> float:
-            """Convert NaN or None to 0.0 for safe arithmetic/serialization."""
-            return 0.0 if v is None or (isinstance(v, float) and math.isnan(v)) else v
-
-        # Keep original scores (preserving NaN/None) for output
-        original_scores_dict = {metric: [score.get(metric) for score in scores] for metric in scores[0]}
-
-        # Convert from list of dicts to dict of lists, coercing NaN/None to 0.0 for average calculation
-        scores_dict = {metric: [_nan_to_zero(score.get(metric)) for score in scores] for metric in scores[0]}
-        first_metric_name = list(scores_dict.keys())[0] if scores_dict else None
-
-        # Compute the average of each metric using cleaned scores (NaN/None -> 0.0)
-        average_scores = {
-            metric: (sum(values) / len(values) if values else 0.0)
-            for metric, values in scores_dict.items()
-        }
-
-        first_avg_score = average_scores.get(list(scores_dict.keys())[0], 0.0)
-        if isinstance(first_avg_score, float) and math.isnan(first_avg_score):
-            first_avg_score = 0.0
-
-        df = results_dataset.to_pandas()
-        # Get id from eval_input if df size matches number of eval_input_items
-        if len(eval_input.eval_input_items) >= len(df):
-            ids = [item.id for item in eval_input.eval_input_items]  # Extract IDs
-        else:
-            ids = df["user_input"].tolist()  # Use "user_input" as ID fallback
-
-        # Construct EvalOutputItem list using original scores (preserving NaN/None)
-        eval_output_items = [
-            EvalOutputItem(
-                id=ids[i],
-                score=original_scores_dict[first_metric_name][i] if first_metric_name else None,
-                reasoning={
-                    key:
-                        getattr(row, key, None)  # Use getattr to safely access attributes
-                    for key in ["user_input", "reference", "response", "retrieved_contexts"]
-                }) for i, row in enumerate(df.itertuples(index=False))
-        ]
-        # Return EvalOutput
-        return EvalOutput(average_score=first_avg_score, eval_output_items=eval_output_items)
+        ids = [item.id for item in eval_input.eval_input_items]
+        return _ragas_results_to_eval_output(results_dataset=results_dataset, ids=ids)
 
     async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
         """Run Ragas metrics evaluation on the provided EvalInput"""
diff --git a/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/register.py b/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/register.py
index eea82966db..9f4b75ab41 100644
--- a/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/register.py
+++ b/packages/nvidia_nat_ragas/src/nat/plugins/ragas/rag_evaluator/register.py
@@ -26,6 +26,7 @@
 from nat.data_models.evaluator import EvalInput
 from nat.data_models.evaluator import EvalOutput
 from nat.data_models.evaluator import EvaluatorLLMConfig
+from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSampleList
 from nat.utils.exception_handlers.automatic_retries import patch_with_retry
 
 logger = logging.getLogger(__name__)
@@ -49,6 +50,10 @@ class RagasEvaluatorConfig(EvaluatorLLMConfig, name="ragas"):
         default=None,
         description="The field in the input object that contains the content to evaluate.",
     )
+    enable_atif_evaluator: bool = Field(
+        default=False,
+        description="Enable ATIF-native RAGAS evaluator lane. Disabled by default until rollout stabilization.",
+    )
 
     @model_validator(mode="before")
     @classmethod
@@ -112,6 +117,14 @@ async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
             return EvalOutput(average_score=0.0, eval_output_items=[])
         return await evaluator.evaluate(eval_input)
 
+    async def evaluate_atif_fn(atif_samples: AtifEvalSampleList) -> EvalOutput:
+        """Run ATIF-native RAGAS evaluation and return NAT eval output."""
+        if not atif_evaluator:
+            logger.warning("No ATIF evaluator found for RAGAS metrics.")
+            return EvalOutput(average_score=0.0, eval_output_items=[])
+        return await atif_evaluator.evaluate(atif_samples)
+
+    from .atif_evaluate import RAGAtifEvaluator
     from .evaluate import RAGEvaluator
 
     llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
@@ -136,4 +149,11 @@ async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
                              metrics=metrics,
                              max_concurrency=builder.get_max_concurrency(),
                              input_obj_field=config.input_obj_field) if metrics else None
-    yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")
+    atif_evaluator = RAGAtifEvaluator(
+        evaluator_llm=llm, metrics=metrics,
+        max_concurrency=builder.get_max_concurrency()) if (metrics and config.enable_atif_evaluator) else None
+
+    evaluator_info = EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")
+    if config.enable_atif_evaluator:
+        evaluator_info.evaluate_atif_fn = evaluate_atif_fn
+    yield evaluator_info
diff --git a/packages/nvidia_nat_ragas/tests/test_rag_evaluate.py b/packages/nvidia_nat_ragas/tests/test_rag_evaluate.py
index dadda3418f..1bc7579b16 100644
--- a/packages/nvidia_nat_ragas/tests/test_rag_evaluate.py
+++ b/packages/nvidia_nat_ragas/tests/test_rag_evaluate.py
@@ -46,6 +46,41 @@ class ExampleModel(BaseModel):
     other: str
 
 
+@pytest.fixture(name="atif_samples")
+def fixture_atif_samples(rag_user_inputs, rag_expected_outputs, rag_generated_outputs):
+    """ATIF-native samples for testing RAG ATIF evaluator path."""
+    from nat.data_models.atif import ATIFAgentConfig
+    from nat.data_models.atif import ATIFObservation
+    from nat.data_models.atif import ATIFObservationResult
+    from nat.data_models.atif import ATIFStep
+    from nat.data_models.atif import ATIFTrajectory
+    from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
+
+    samples = []
+    for index, (user_input, expected_output,
+                generated_output) in enumerate(zip(rag_user_inputs, rag_expected_outputs, rag_generated_outputs)):
+        trajectory = ATIFTrajectory(
+            session_id=str(index + 1),
+            agent=ATIFAgentConfig(name="nat-agent", version="0.0.0"),
+            steps=[
+                ATIFStep(step_id=1, source="user", message=user_input),
+                ATIFStep(step_id=2,
+                         source="agent",
+                         message=str(generated_output),
+                         observation=ATIFObservation(results=[ATIFObservationResult(content="retrieved context")])),
+            ],
+        )
+        samples.append(
+            AtifEvalSample(
+                item_id=index + 1,
+                trajectory=trajectory,
+                expected_output_obj=expected_output,
+                output_obj=generated_output,
+                metadata={},
+            ))
+    return samples
+
+
 @pytest.fixture
 def ragas_judge_llm() -> "LangchainLLMWrapper":
     """Fixture providing a mocked LangchainLLMWrapper."""
@@ -310,6 +345,204 @@ async def test_rag_evaluate_failure(rag_evaluator, rag_eval_input, ragas_judge_l
         assert output.eval_output_items == []  # No results due to failure
 
 
+def test_atif_samples_to_ragas(ragas_judge_llm, ragas_metrics, atif_samples):
+    """Test ATIF sample mapping to ragas dataset."""
+    from ragas.evaluation import EvaluationDataset
+    from ragas.evaluation import SingleTurnSample
+
+    from nat.plugins.ragas.rag_evaluator.atif_evaluate import RAGAtifEvaluator
+
+    atif_evaluator = RAGAtifEvaluator(evaluator_llm=ragas_judge_llm, metrics=ragas_metrics)
+    dataset = atif_evaluator.atif_samples_to_ragas(atif_samples)
+
+    assert isinstance(dataset, EvaluationDataset)
+    assert len(dataset.samples) == len(atif_samples)
+    for sample in dataset.samples:
+        assert isinstance(sample, SingleTurnSample)
+        assert sample.retrieved_contexts == ["retrieved context"]
+
+
+async def test_rag_atif_evaluate_success(ragas_judge_llm, ragas_metrics, atif_samples):
+    """Test ATIF-native evaluate path for RAGAS evaluator."""
+    from nat.plugins.ragas.rag_evaluator.atif_evaluate import RAGAtifEvaluator
+
+    mock_results_dataset = MagicMock()
+    dataset = "mock_dataset"
+    mock_output = "mock_output"
+    atif_evaluator = RAGAtifEvaluator(evaluator_llm=ragas_judge_llm, metrics=ragas_metrics)
+
+    with patch.object(atif_evaluator, "atif_samples_to_ragas", return_value=dataset) as mock_to_ragas, \
+         patch("ragas.evaluate", new_callable=MagicMock) as mock_ragas_evaluate, \
+         patch("nat.plugins.ragas.rag_evaluator.atif_evaluate._ragas_results_to_eval_output",
+               return_value=mock_output) as mock_to_output:
+        mock_ragas_evaluate.return_value = mock_results_dataset
+        output = await atif_evaluator.evaluate(atif_samples)
+
+        mock_to_ragas.assert_called_once_with(atif_samples)
+        mock_ragas_evaluate.assert_called_once()
+        called_kwargs = mock_ragas_evaluate.call_args.kwargs
+        assert called_kwargs["dataset"] == dataset
+        assert called_kwargs["metrics"] == ragas_metrics
+        assert called_kwargs["show_progress"] is True
+        assert called_kwargs["llm"] == ragas_judge_llm
+        mock_to_output.assert_called_once()
+        assert output == mock_output
+
+
+def test_rag_legacy_and_atif_dataset_parity(rag_evaluator,
+                                            ragas_judge_llm,
+                                            ragas_metrics,
+                                            rag_eval_input,
+                                            intermediate_step_adapter):
+    """Ensure legacy and ATIF lanes produce equivalent ragas input samples."""
+    from nat.data_models.atif import ATIFAgentConfig
+    from nat.data_models.atif import ATIFObservation
+    from nat.data_models.atif import ATIFObservationResult
+    from nat.data_models.atif import ATIFStep
+    from nat.data_models.atif import ATIFTrajectory
+    from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
+    from nat.plugins.ragas.rag_evaluator.atif_evaluate import RAGAtifEvaluator
+
+    atif_samples = []
+    for item in rag_eval_input.eval_input_items:
+        contexts = intermediate_step_adapter.get_context(item.trajectory,
+                                                         intermediate_step_adapter.DEFAULT_EVENT_FILTER)
+        trajectory = ATIFTrajectory(
+            session_id=str(item.id),
+            agent=ATIFAgentConfig(name="nat-agent", version="0.0.0"),
+            steps=[
+                ATIFStep(step_id=1, source="user", message=str(item.input_obj)),
+                ATIFStep(step_id=2,
+                         source="agent",
+                         message=str(item.output_obj),
+                         observation=ATIFObservation(
+                             results=[ATIFObservationResult(content=context) for context in contexts])),
+            ],
+        )
+        atif_samples.append(
+            AtifEvalSample(item_id=item.id,
+                           trajectory=trajectory,
+                           expected_output_obj=item.expected_output_obj,
+                           output_obj=item.output_obj,
+                           metadata={}))
+
+    atif_evaluator = RAGAtifEvaluator(evaluator_llm=ragas_judge_llm, metrics=ragas_metrics)
+    legacy_dataset = rag_evaluator.eval_input_to_ragas(rag_eval_input)
+    atif_dataset = atif_evaluator.atif_samples_to_ragas(atif_samples)
+
+    assert len(legacy_dataset.samples) == len(atif_dataset.samples)
+    for legacy_sample, atif_sample in zip(legacy_dataset.samples, atif_dataset.samples):
+        assert legacy_sample.user_input == atif_sample.user_input
+        assert legacy_sample.reference == atif_sample.reference
+        assert legacy_sample.response == atif_sample.response
+        assert legacy_sample.retrieved_contexts == atif_sample.retrieved_contexts
+
+
+@pytest.mark.parametrize(
+    "atif_trajectory_steps, expected_user_input, expected_contexts",
+    [
+        ([], "", []),
+        ([{
+            "step_id": 1, "source": "user", "message": "question only"
+        }], "question only", []),
+    ],
+)
+def test_atif_samples_to_ragas_edge_cases(ragas_judge_llm,
+                                          ragas_metrics,
+                                          atif_trajectory_steps,
+                                          expected_user_input,
+                                          expected_contexts):
+    """Ensure ATIF lane handles missing/partial trajectory content gracefully."""
+    from nat.data_models.atif import ATIFAgentConfig
+    from nat.data_models.atif import ATIFTrajectory
+    from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
+    from nat.plugins.ragas.rag_evaluator.atif_evaluate import RAGAtifEvaluator
+
+    trajectory = ATIFTrajectory(session_id="edge-case-1",
+                                agent=ATIFAgentConfig(name="nat-agent", version="0.0.0"),
+                                steps=atif_trajectory_steps)
+    atif_samples = [
+        AtifEvalSample(item_id=1, trajectory=trajectory, expected_output_obj="ref", output_obj="resp", metadata={})
+    ]
+
+    atif_evaluator = RAGAtifEvaluator(evaluator_llm=ragas_judge_llm, metrics=ragas_metrics)
+    dataset = atif_evaluator.atif_samples_to_ragas(atif_samples)
+
+    assert len(dataset.samples) == 1
+    assert dataset.samples[0].user_input == expected_user_input
+    assert dataset.samples[0].retrieved_contexts == expected_contexts
+
+
+async def test_rag_legacy_and_atif_score_parity(rag_evaluator,
+                                                ragas_judge_llm,
+                                                ragas_metrics,
+                                                rag_eval_input,
+                                                intermediate_step_adapter):
+    """Ensure legacy and ATIF evaluator lanes produce parity scores on the same dataset."""
+    from nat.data_models.atif import ATIFAgentConfig
+    from nat.data_models.atif import ATIFObservation
+    from nat.data_models.atif import ATIFObservationResult
+    from nat.data_models.atif import ATIFStep
+    from nat.data_models.atif import ATIFTrajectory
+    from nat.plugins.eval.evaluator.atif_evaluator import AtifEvalSample
+    from nat.plugins.ragas.rag_evaluator.atif_evaluate import RAGAtifEvaluator
+
+    metric_name = "AnswerAccuracy"
+
+    def _mock_ragas_evaluate(*_args, **kwargs):
+        dataset = kwargs["dataset"]
+        rows = []
+        scores = []
+        for sample in dataset.samples:
+            score = 0.5 + (0.5 if sample.retrieved_contexts else 0.0)
+            scores.append({metric_name: score})
+            rows.append({
+                "user_input": sample.user_input,
+                "reference": sample.reference,
+                "response": sample.response,
+                "retrieved_contexts": sample.retrieved_contexts,
+                metric_name: score,
+            })
+        result = MagicMock()
+        result.scores = scores
+        result.to_pandas.return_value = pd.DataFrame(rows)
+        return result
+
+    atif_samples = []
+    for item in rag_eval_input.eval_input_items:
+        contexts = intermediate_step_adapter.get_context(item.trajectory,
+                                                         intermediate_step_adapter.DEFAULT_EVENT_FILTER)
+        trajectory = ATIFTrajectory(
+            session_id=str(item.id),
+            agent=ATIFAgentConfig(name="nat-agent", version="0.0.0"),
+            steps=[
+                ATIFStep(step_id=1, source="user", message=str(item.input_obj)),
+                ATIFStep(step_id=2,
+                         source="agent",
+                         message=str(item.output_obj),
+                         observation=ATIFObservation(
+                             results=[ATIFObservationResult(content=context) for context in contexts])),
+            ],
+        )
+        atif_samples.append(
+            AtifEvalSample(item_id=item.id,
+                           trajectory=trajectory,
+                           expected_output_obj=item.expected_output_obj,
+                           output_obj=item.output_obj,
+                           metadata={}))
+
+    atif_evaluator = RAGAtifEvaluator(evaluator_llm=ragas_judge_llm, metrics=ragas_metrics)
+    with patch("ragas.evaluate", side_effect=_mock_ragas_evaluate):
+        legacy_output = await rag_evaluator.evaluate(rag_eval_input)
+        atif_output = await atif_evaluator.evaluate(atif_samples)
+
+    assert legacy_output.average_score == pytest.approx(atif_output.average_score, abs=1e-9)
+    assert len(legacy_output.eval_output_items) == len(atif_output.eval_output_items)
+    for legacy_item, atif_item in zip(legacy_output.eval_output_items, atif_output.eval_output_items):
+        assert legacy_item.id == atif_item.id
+        assert legacy_item.score == pytest.approx(atif_item.score, abs=1e-9)
+
+
 def test_extract_input_obj_base_model_with_field(rag_evaluator_content):
     """Ensure extract_input_obj returns the specified field from a Pydantic BaseModel."""
     model_obj = ExampleModel(content="hello world", other="ignore me")
@@ -343,3 +576,41 @@ def test_extract_input_obj_base_model_without_field(rag_evaluator, rag_evaluator
     assert extracted_with_field == "json hello"
     assert extracted_default != extracted_with_field
     assert '"content":"json hello"' in extracted_default  # basic sanity check on JSON output
+
+
+async def test_register_ragas_evaluator_atif_lane_disabled_by_default():
+    """Ensure RAGAS ATIF lane is opt-in while stabilizing."""
+    from nat.plugins.ragas.rag_evaluator.register import RagasEvaluatorConfig
+    from nat.plugins.ragas.rag_evaluator.register import register_ragas_evaluator
+
+    builder = MagicMock()
+    builder.get_llm = AsyncMock(return_value=MagicMock())
+    builder.get_max_concurrency = MagicMock(return_value=1)
+
+    config = RagasEvaluatorConfig(llm_name="judge", metric={"AnswerAccuracy": {"skip": True}})
+    async with register_ragas_evaluator(config=config, builder=builder) as evaluator_info:
+        assert hasattr(evaluator_info, "evaluate_fn")
+        assert not hasattr(evaluator_info, "evaluate_atif_fn")
+
+    builder.get_llm.assert_awaited_once()
+
+
+async def test_register_ragas_evaluator_atif_lane_enabled():
+    """Ensure RAGAS ATIF lane can be explicitly enabled by config."""
+    from nat.plugins.ragas.rag_evaluator.register import RagasEvaluatorConfig
+    from nat.plugins.ragas.rag_evaluator.register import register_ragas_evaluator
+
+    builder = MagicMock()
+    builder.get_llm = AsyncMock(return_value=MagicMock())
+    builder.get_max_concurrency = MagicMock(return_value=1)
+
+    config = RagasEvaluatorConfig(llm_name="judge",
+                                  metric={"AnswerAccuracy": {
+                                      "skip": True
+                                  }},
+                                  enable_atif_evaluator=True)
+    async with register_ragas_evaluator(config=config, builder=builder) as evaluator_info:
+        assert hasattr(evaluator_info, "evaluate_fn")
+        assert callable(getattr(evaluator_info, "evaluate_atif_fn", None))
+
+    builder.get_llm.assert_awaited_once()
diff --git a/pyproject.toml b/pyproject.toml
index 52de63d06a..b43e9dc10b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,6 +68,7 @@ opentelemetry = ["nvidia-nat-opentelemetry == {version}"]
 phoenix = ["nvidia-nat-phoenix == {version}"]
 profiler = ["nvidia-nat-profiler == {version}"]
 rag = ["nvidia-nat-rag == {version}"]
+ragas = ["nvidia-nat-ragas == {version}"]
 ragaai = ["nvidia-nat-ragaai == {version}"]
 mysql = ["nvidia-nat-mysql == {version}"]
 redis = ["nvidia-nat-redis == {version}"]
diff --git a/uv.lock b/uv.lock
index 0fb0c78fbb..34ee2e4d22 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1002,10 +1002,16 @@ sdist = { url = "https://files.pythonhosted.org/packages/92/88/b8527e1b00c1811db
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/90/543f556fcfcfa270713eef906b6352ab048e1e557afec12925c991dc93c2/caio-0.9.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d6956d9e4a27021c8bd6c9677f3a59eb1d820cc32d0343cea7961a03b1371965", size = 36839, upload-time = "2025-12-26T15:21:40.267Z" },
     { url = "https://files.pythonhosted.org/packages/51/3b/36f3e8ec38dafe8de4831decd2e44c69303d2a3892d16ceda42afed44e1b/caio-0.9.25-cp311-cp311-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bf84bfa039f25ad91f4f52944452a5f6f405e8afab4d445450978cd6241d1478", size = 80255, upload-time = "2025-12-26T15:22:20.271Z" },
+    { url = "https://files.pythonhosted.org/packages/df/ce/65e64867d928e6aff1b4f0e12dba0ef6d5bf412c240dc1df9d421ac10573/caio-0.9.25-cp311-cp311-manylinux_2_34_aarch64.whl", hash = "sha256:ae3d62587332bce600f861a8de6256b1014d6485cfd25d68c15caf1611dd1f7c", size = 80052, upload-time = "2026-03-04T22:08:20.402Z" },
+    { url = "https://files.pythonhosted.org/packages/46/90/e278863c47e14ec58309aa2e38a45882fbe67b4cc29ec9bc8f65852d3e45/caio-0.9.25-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:fc220b8533dcf0f238a6b1a4a937f92024c71e7b10b5a2dfc1c73604a25709bc", size = 78273, upload-time = "2026-03-04T22:08:21.368Z" },
     { url = "https://files.pythonhosted.org/packages/d3/25/79c98ebe12df31548ba4eaf44db11b7cad6b3e7b4203718335620939083c/caio-0.9.25-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fb7ff95af4c31ad3f03179149aab61097a71fd85e05f89b4786de0359dffd044", size = 36983, upload-time = "2025-12-26T15:21:36.075Z" },
     { url = "https://files.pythonhosted.org/packages/a3/2b/21288691f16d479945968a0a4f2856818c1c5be56881d51d4dac9b255d26/caio-0.9.25-cp312-cp312-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:97084e4e30dfa598449d874c4d8e0c8d5ea17d2f752ef5e48e150ff9d240cd64", size = 82012, upload-time = "2025-12-26T15:22:20.983Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c4/8a1b580875303500a9c12b9e0af58cb82e47f5bcf888c2457742a138273c/caio-0.9.25-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:4fa69eba47e0f041b9d4f336e2ad40740681c43e686b18b191b6c5f4c5544bfb", size = 81502, upload-time = "2026-03-04T22:08:22.381Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/1c/0fe770b8ffc8362c48134d1592d653a81a3d8748d764bec33864db36319d/caio-0.9.25-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:6bebf6f079f1341d19f7386db9b8b1f07e8cc15ae13bfdaff573371ba0575d69", size = 80200, upload-time = "2026-03-04T22:08:23.382Z" },
     { url = "https://files.pythonhosted.org/packages/31/57/5e6ff127e6f62c9f15d989560435c642144aa4210882f9494204bc892305/caio-0.9.25-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d6c2a3411af97762a2b03840c3cec2f7f728921ff8adda53d7ea2315a8563451", size = 36979, upload-time = "2025-12-26T15:21:35.484Z" },
     { url = "https://files.pythonhosted.org/packages/a3/9f/f21af50e72117eb528c422d4276cbac11fb941b1b812b182e0a9c70d19c5/caio-0.9.25-cp313-cp313-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0998210a4d5cd5cb565b32ccfe4e53d67303f868a76f212e002a8554692870e6", size = 81900, upload-time = "2025-12-26T15:22:21.919Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/12/c39ae2a4037cb10ad5eb3578eb4d5f8c1a2575c62bba675f3406b7ef0824/caio-0.9.25-cp313-cp313-manylinux_2_34_aarch64.whl", hash = "sha256:1a177d4777141b96f175fe2c37a3d96dec7911ed9ad5f02bac38aaa1c936611f", size = 81523, upload-time = "2026-03-04T22:08:25.187Z" },
+    { url = "https://files.pythonhosted.org/packages/22/59/f8f2e950eb4f1a5a3883e198dca514b9d475415cb6cd7b78b9213a0dd45a/caio-0.9.25-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:9ed3cfb28c0e99fec5e208c934e5c157d0866aa9c32aa4dc5e9b6034af6286b7", size = 80243, upload-time = "2026-03-04T22:08:26.449Z" },
     { url = "https://files.pythonhosted.org/packages/86/93/1f76c8d1bafe3b0614e06b2195784a3765bbf7b0a067661af9e2dd47fc33/caio-0.9.25-py3-none-any.whl", hash = "sha256:06c0bb02d6b929119b1cfbe1ca403c768b2013a369e2db46bfa2a5761cf82e40", size = 19087, upload-time = "2025-12-26T15:22:00.221Z" },
 ]
 
@@ -5675,7 +5681,7 @@ source = { editable = "examples/evaluation_and_profiling/email_phishing_analyzer
 dependencies = [
     { name = "beautifulsoup4" },
     { name = "networkx" },
-    { name = "nvidia-nat", extra = ["eval", "langchain", "phoenix", "profiler", "test"] },
+    { name = "nvidia-nat", extra = ["eval", "langchain", "phoenix", "profiler", "ragas", "test"] },
     { name = "openinference-instrumentation-langchain" },
 ]
 
@@ -5808,7 +5814,7 @@ dependencies = [
     { name = "nat-alert-triage-agent" },
     { name = "nat-simple-calculator" },
     { name = "nbclient" },
-    { name = "nvidia-nat", extra = ["langchain", "llama-index", "mcp", "profiler", "test"] },
+    { name = "nvidia-nat", extra = ["langchain", "llama-index", "mcp", "profiler", "ragas", "test"] },
     { name = "python-dotenv", extra = ["cli"] },
 ]
 
@@ -6163,7 +6169,7 @@ name = "nat-simple-web-query-eval"
 source = { editable = "examples/evaluation_and_profiling/simple_web_query_eval" }
 dependencies = [
     { name = "nat-simple-web-query" },
-    { name = "nvidia-nat", extra = ["eval", "langchain", "profiler", "test"] },
+    { name = "nvidia-nat", extra = ["eval", "langchain", "profiler", "ragas", "test"] },
 ]
 
 [package.metadata]
@@ -6592,6 +6598,9 @@ rag = [
 ragaai = [
     { name = "nvidia-nat-ragaai" },
 ]
+ragas = [
+    { name = "nvidia-nat-ragas" },
+]
 redis = [
     { name = "nvidia-nat-redis" },
 ]
@@ -6744,6 +6753,7 @@ requires-dist = [
     { name = "nvidia-nat-rag", marker = "extra == 'rag'", editable = "packages/nvidia_nat_rag" },
     { name = "nvidia-nat-ragaai", marker = "extra == 'ragaai'", editable = "packages/nvidia_nat_ragaai" },
     { name = "nvidia-nat-ragas", marker = "extra == 'most'", editable = "packages/nvidia_nat_ragas" },
+    { name = "nvidia-nat-ragas", marker = "extra == 'ragas'", editable = "packages/nvidia_nat_ragas" },
     { name = "nvidia-nat-redis", marker = "extra == 'most'", editable = "packages/nvidia_nat_redis" },
     { name = "nvidia-nat-redis", marker = "extra == 'redis'", editable = "packages/nvidia_nat_redis" },
     { name = "nvidia-nat-s3", marker = "extra == 'most'", editable = "packages/nvidia_nat_s3" },
@@ -6764,7 +6774,7 @@ requires-dist = [
     { name = "nvidia-nat-zep-cloud", marker = "extra == 'zep-cloud'", editable = "packages/nvidia_nat_zep_cloud" },
     { name = "text-file-ingest", marker = "extra == 'examples'", editable = "examples/documentation_guides/workflows/text_file_ingest" },
 ]
-provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
+provides-extras = ["a2a", "adk", "agno", "app", "autogen", "core", "crewai", "eval", "data-flywheel", "fastmcp", "langchain", "llama-index", "mcp", "mem0ai", "nemo-customizer", "openpipe-art", "opentelemetry", "phoenix", "profiler", "rag", "ragas", "ragaai", "mysql", "redis", "s3", "security", "semantic-kernel", "strands", "test", "vanna", "weave", "zep-cloud", "async-endpoints", "gunicorn", "pii-defense", "most", "examples"]
 
 [package.metadata.requires-dev]
 dev = [