TranslatorSRI · maximusunc · Nov 7, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/requirements-runners.txt b/requirements-runners.txt
@@ -1,4 +1,5 @@
-ARS_Test_Runner==0.2.4
+# ARS_Test_Runner==0.2.4
 # benchmarks-runner==0.1.3
 # ui-test-runner==0.0.2
 # graph-validation-test-runners==0.1.5
+locust==2.38.1
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 setup(
     name="sri-test-harness",
-    version="0.4.1",
+    version="0.5.0",
     author="Max Wang",
     author_email="max@covar.com",
     url="https://github.com/TranslatorSRI/TestHarness",

diff --git a/test_harness/acceptance_test_runner.py b/test_harness/acceptance_test_runner.py
@@ -0,0 +1,123 @@
+"""Acceptance Test Pass Fail Analysis Runner."""
+
+from typing import Any, Dict, List
+
+
+def run_acceptance_pass_fail_analysis(
+    report: Dict[str, Any],
+    agent: str,
+    results: List[Dict[str, Any]],
+    out_curie: str,
+    expect_output: str,
+):
+    """ "Function to run pass fail analysis on individual results."""
+    # get the top_n result's ids
+    try:
+        all_ids = []
+        for res in results:
+            for res_node, res_value in res["node_bindings"].items():
+                for val in res_value:
+                    ids = str(val["id"])
+                    if ids not in all_ids:
+                        all_ids.append(ids)
+        if expect_output == "TopAnswer":
+            n_perc_res = results[0:30]
+        elif expect_output == "Acceptable":
+            n_perc_res = results[0 : int(len(results) * (float(50) / 100))]
+        elif expect_output == "BadButForgivable":
+            n_perc_res = results[int(len(results) * (float(50) / 100)) :]
+        elif expect_output == "NeverShow":
+            n_perc_res = results
+        else:
+            error_mesg = {
+                "error": "You have indicated a wrong category for expected output",
+            }
+            return error_mesg
+        n_perc_ids = []
+        for res in n_perc_res:
+            for res_value in res["node_bindings"].values():
+                for val in res_value:
+                    ids = str(val["id"])
+                    if ids not in n_perc_ids:
+                        n_perc_ids.append(ids)
+        # get the sugeno score & rank
+        for idx, res in enumerate(results):
+            node_bindings = res.get("node_bindings", {})
+            for k in node_bindings.keys():
+                nb = node_bindings[k]
+                the_id = None
+                for c in nb:
+                    the_id = c.get("id")
+                if the_id == out_curie:
+                    if "sugeno" in res.keys() and "rank" in res.keys():
+                        ars_score = res["sugeno"]
+                        ars_rank = res["rank"]
+                        ara_score = None
+                        ara_rank = None
+                    else:
+                        ars_score = None
+                        ars_rank = None
+                        for anal in res["analyses"]:
+                            if "score" in anal.keys():
+                                ara_score = anal["score"]
+                            else:
+                                ara_score = None
+                        ara_rank = idx + 1
+
+                    report[agent]["actual_output"] = {}
+                    if ars_score is not None and ars_rank is not None:
+                        report[agent]["actual_output"]["ars_score"] = ars_score
+                        report[agent]["actual_output"]["ars_rank"] = ars_rank
+
+                    if ara_score is not None and ara_rank is not None:
+                        report[agent]["actual_output"]["ara_score"] = ara_score
+                        report[agent]["actual_output"]["ara_rank"] = ara_rank
+
+        if expect_output in ["TopAnswer", "Acceptable"]:
+            if out_curie in n_perc_ids:
+                report[agent]["status"] = "PASSED"
+            elif out_curie not in n_perc_ids:
+                if out_curie in all_ids:
+                    report[agent]["status"] = "FAILED"
+                else:
+                    report[agent]["status"] = "FAILED"
+                    report[agent]["actual_output"] = {}
+                    if agent == "ars":
+                        report[agent]["actual_output"]["ars_score"] = None
+                        report[agent]["actual_output"]["ars_rank"] = None
+                    else:
+                        report[agent]["actual_output"]["ara_score"] = None
+                        report[agent]["actual_output"]["ara_rank"] = None
+
+        elif expect_output == "BadButForgivable":
+            if out_curie in n_perc_ids:
+                report[agent]["status"] = "PASSED"
+            elif out_curie not in n_perc_ids and out_curie in all_ids:
+                report[agent]["status"] = "FAILED"
+            elif out_curie not in n_perc_ids and out_curie not in all_ids:
+                report[agent]["status"] = "PASSED"
+                report[agent]["actual_output"] = {}
+                if agent == "ars":
+                    report[agent]["actual_output"]["ars_score"] = None
+                    report[agent]["actual_output"]["ars_rank"] = None
+                else:
+                    report[agent]["actual_output"]["ara_score"] = None
+                    report[agent]["actual_output"]["ara_rank"] = None
+
+        elif expect_output == "NeverShow":
+            if out_curie in n_perc_ids:
+                report[agent]["status"] = "FAILED"
+            elif out_curie not in all_ids:
+                report[agent]["status"] = "PASSED"
+                report[agent]["actual_output"] = {}
+                if agent == "ars":
+                    report[agent]["actual_output"]["ars_score"] = None
+                    report[agent]["actual_output"]["ars_rank"] = None
+                else:
+                    report[agent]["actual_output"]["ara_score"] = None
+                    report[agent]["actual_output"]["ara_rank"] = None
+    except Exception as e:
+        report[agent]["status"] = "FAILED"
+        report[agent]["message"] = f"An exception happened: {type(e), str(e)}"
+
+    return report
diff --git a/test_harness/download.py b/test_harness/download.py
@@ -1,25 +1,25 @@
 """Download tests."""
 
 import glob
-import httpx
 import io
 import json
 import logging
-from pathlib import Path
 import tempfile
-from typing import List, Union, Dict
 import zipfile
+from pathlib import Path
+from typing import Dict, List, Union
 
+import httpx
 from translator_testing_model.datamodel.pydanticmodel import (
-    TestCase,
     PathfinderTestCase,
+    TestCase,
     TestSuite,
 )
 
 
 def download_tests(
     suite: Union[str, List[str]],
-    url: Path,
+    url: str,
     logger: logging.Logger,
 ) -> Dict[str, Union[TestCase, PathfinderTestCase]]:
     """Download tests from specified location."""
@@ -87,5 +87,15 @@ def download_tests(
     #     test.test_case_type = "acceptance"
     # tests = all_tests
     # tests = list(filter((lambda x: x for x in all_tests for asset in x.test_assets if asset.output_id), all_tests))
-    logger.info(f"Passing along {len(test_suite.test_cases)} queries")
+    logger.info(f"Passing along {len(test_suite.test_cases.keys())} queries")
     return test_suite.test_cases
+
+
+if __name__ == "__main__":
+    tests = download_tests(
+        "performance_tests",
+        "https://github.com/NCATSTranslator/Tests/archive/refs/heads/performance_tests.zip",
+        logging.Logger("tester"),
+    )
+    for test_case_id, test in tests.items():
+        print(type(test))
diff --git a/test_harness/main.py b/test_harness/main.py
@@ -1,16 +1,22 @@
 """Translator SRI Automated Test Harness."""
 
-from argparse import ArgumentParser
-import asyncio
+from gevent import monkey
+
+monkey.patch_all()
+
 import json
-from setproctitle import setproctitle
+from argparse import ArgumentParser
+import time
 from urllib.parse import urlparse
 from uuid import uuid4
 
-from test_harness.run import run_tests
+from setproctitle import setproctitle
+
 from test_harness.download import download_tests
 from test_harness.logger import get_logger, setup_logger
 from test_harness.reporter import Reporter
+from test_harness.result_collector import ResultCollector
+from test_harness.run import run_tests
 from test_harness.slacker import Slacker
 
 setproctitle("TestHarness")
@@ -24,7 +30,7 @@ def url_type(arg):
     raise TypeError("Invalid URL")
 
 
-async def main(args):
+def main(args):
     """Main Test Harness entrypoint."""
     qid = str(uuid4())[:8]
     logger = get_logger(qid, args["log_level"])
@@ -47,18 +53,57 @@ async def main(args):
         refresh_token=args.get("reporter_access_token"),
         logger=logger,
     )
-    await reporter.get_auth()
-    await reporter.create_test_run(next(iter(tests.values())).test_env, args["suite"])
+    reporter.get_auth()
+    reporter.create_test_run(next(iter(tests.values())).test_env, args["suite"])
     slacker = Slacker()
-    report = await run_tests(reporter, slacker, tests, logger, args)
+    collector = ResultCollector(logger)
+    queried_envs = set()
+    for test in tests.values():
+        queried_envs.add(test.test_env)
+    slacker.post_notification(
+        messages=[
+            f"Running {args['suite']} ({sum([len(test.test_assets) for test in tests.values()])} tests, {len(tests.values())} queries)...\n<{reporter.base_path}/test-runs/{reporter.test_run_id}|View in the Information Radiator>"
+        ]
+    )
+    start_time = time.time()
+    run_tests(tests, reporter, collector, logger, args)
+
+    slacker.post_notification(
+        messages=[
+            """Test Suite: {test_suite}\nDuration: {duration} | Environment(s): {envs}\n<{ir_url}|View in the Information Radiator>\n{result_summary}""".format(
+                test_suite=args["suite"],
+                duration=round(time.time() - start_time, 2),
+                envs=(",").join(list(queried_envs)),
+                ir_url=f"{reporter.base_path}/test-runs/{reporter.test_run_id}",
+                result_summary=collector.dump_result_summary(),
+            )
+        ]
+    )
+    if collector.has_acceptance_results:
+        slacker.upload_test_results_file(
+            reporter.test_name,
+            "json",
+            collector.acceptance_stats,
+        )
+        slacker.upload_test_results_file(
+            reporter.test_name,
+            "csv",
+            collector.acceptance_csv,
+        )
+    if collector.has_performance_results:
+        slacker.upload_test_results_file(
+            reporter.test_name,
+            "json",
+            collector.performance_stats,
+        )
 
     logger.info("Finishing up test run...")
-    await reporter.finish_test_run()
+    reporter.finish_test_run()
 
     if args["json_output"]:
         # logger.info("Saving report as JSON...")
         with open("test_report.json", "w") as f:
-            json.dump(report, f)
+            json.dump(collector.acceptance_report, f)
 
     return logger.info("All tests have completed!")
 
@@ -135,7 +180,7 @@ def cli():
     )
 
     args = parser.parse_args()
-    asyncio.run(main(vars(args)))
+    main(vars(args))
 
 
 if __name__ == "__main__":

diff --git a/test_harness/pathfinder_test_runner.py b/test_harness/pathfinder_test_runner.py
@@ -1,13 +1,13 @@
-from typing import Dict, Union, List
+from typing import Any, Dict, List
 
 
-async def pathfinder_pass_fail_analysis(
-    report: Dict[str, any],
+def pathfinder_pass_fail_analysis(
+    report: Dict[str, Any],
     agent: str,
-    message: Dict[str, any],
+    message: Dict[str, Any],
     path_nodes: List[List[str]],
     minimum_required_path_nodes: int,
-) -> Dict[str, any]:
+) -> Dict[str, Any]:
     found_path_nodes = set()
     unmatched_paths = set()
     for analysis in message["results"][0]["analyses"]: