Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion requirements-runners.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ARS_Test_Runner==0.2.4
# ARS_Test_Runner==0.2.4
# benchmarks-runner==0.1.3
# ui-test-runner==0.0.2
# graph-validation-test-runners==0.1.5
locust==2.38.1
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name="sri-test-harness",
version="0.4.1",
version="0.5.0",
author="Max Wang",
author_email="max@covar.com",
url="https://github.com/TranslatorSRI/TestHarness",
Expand Down
123 changes: 123 additions & 0 deletions test_harness/acceptance_test_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Acceptance Test Pass Fail Analysis Runner."""

from typing import Any, Dict, List


def run_acceptance_pass_fail_analysis(
report: Dict[str, Any],
agent: str,
results: List[Dict[str, Any]],
out_curie: str,
expect_output: str,
):
""" "Function to run pass fail analysis on individual results."""
# get the top_n result's ids
try:
all_ids = []
for res in results:
for res_node, res_value in res["node_bindings"].items():
for val in res_value:
ids = str(val["id"])
if ids not in all_ids:
all_ids.append(ids)
if expect_output == "TopAnswer":
n_perc_res = results[0:30]
elif expect_output == "Acceptable":
n_perc_res = results[0 : int(len(results) * (float(50) / 100))]
elif expect_output == "BadButForgivable":
n_perc_res = results[int(len(results) * (float(50) / 100)) :]
elif expect_output == "NeverShow":
n_perc_res = results
else:
error_mesg = {
"error": "You have indicated a wrong category for expected output",
}
return error_mesg
n_perc_ids = []
for res in n_perc_res:
for res_value in res["node_bindings"].values():
for val in res_value:
ids = str(val["id"])
if ids not in n_perc_ids:
n_perc_ids.append(ids)
# get the sugeno score & rank
for idx, res in enumerate(results):
node_bindings = res.get("node_bindings", {})
for k in node_bindings.keys():
nb = node_bindings[k]
the_id = None
for c in nb:
the_id = c.get("id")
if the_id == out_curie:
if "sugeno" in res.keys() and "rank" in res.keys():
ars_score = res["sugeno"]
ars_rank = res["rank"]
ara_score = None
ara_rank = None
else:
ars_score = None
ars_rank = None
for anal in res["analyses"]:
if "score" in anal.keys():
ara_score = anal["score"]
else:
ara_score = None
ara_rank = idx + 1

report[agent]["actual_output"] = {}
if ars_score is not None and ars_rank is not None:
report[agent]["actual_output"]["ars_score"] = ars_score
report[agent]["actual_output"]["ars_rank"] = ars_rank

if ara_score is not None and ara_rank is not None:
report[agent]["actual_output"]["ara_score"] = ara_score
report[agent]["actual_output"]["ara_rank"] = ara_rank

if expect_output in ["TopAnswer", "Acceptable"]:
if out_curie in n_perc_ids:
report[agent]["status"] = "PASSED"
elif out_curie not in n_perc_ids:
if out_curie in all_ids:
report[agent]["status"] = "FAILED"
else:
report[agent]["status"] = "FAILED"
report[agent]["actual_output"] = {}
if agent == "ars":
report[agent]["actual_output"]["ars_score"] = None
report[agent]["actual_output"]["ars_rank"] = None
else:
report[agent]["actual_output"]["ara_score"] = None
report[agent]["actual_output"]["ara_rank"] = None

elif expect_output == "BadButForgivable":
if out_curie in n_perc_ids:
report[agent]["status"] = "PASSED"
elif out_curie not in n_perc_ids and out_curie in all_ids:
report[agent]["status"] = "FAILED"
elif out_curie not in n_perc_ids and out_curie not in all_ids:
report[agent]["status"] = "PASSED"
report[agent]["actual_output"] = {}
if agent == "ars":
report[agent]["actual_output"]["ars_score"] = None
report[agent]["actual_output"]["ars_rank"] = None
else:
report[agent]["actual_output"]["ara_score"] = None
report[agent]["actual_output"]["ara_rank"] = None

elif expect_output == "NeverShow":
if out_curie in n_perc_ids:
report[agent]["status"] = "FAILED"
elif out_curie not in all_ids:
report[agent]["status"] = "PASSED"
report[agent]["actual_output"] = {}
if agent == "ars":
report[agent]["actual_output"]["ars_score"] = None
report[agent]["actual_output"]["ars_rank"] = None
else:
report[agent]["actual_output"]["ara_score"] = None
report[agent]["actual_output"]["ara_rank"] = None
except Exception as e:
report[agent]["status"] = "FAILED"
report[agent]["message"] = f"An exception happened: {type(e), str(e)}"

return report
22 changes: 16 additions & 6 deletions test_harness/download.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
"""Download tests."""

import glob
import httpx
import io
import json
import logging
from pathlib import Path
import tempfile
from typing import List, Union, Dict
import zipfile
from pathlib import Path
from typing import Dict, List, Union

import httpx
from translator_testing_model.datamodel.pydanticmodel import (
TestCase,
PathfinderTestCase,
TestCase,
TestSuite,
)


def download_tests(
suite: Union[str, List[str]],
url: Path,
url: str,
logger: logging.Logger,
) -> Dict[str, Union[TestCase, PathfinderTestCase]]:
"""Download tests from specified location."""
Expand Down Expand Up @@ -87,5 +87,15 @@ def download_tests(
# test.test_case_type = "acceptance"
# tests = all_tests
# tests = list(filter((lambda x: x for x in all_tests for asset in x.test_assets if asset.output_id), all_tests))
logger.info(f"Passing along {len(test_suite.test_cases)} queries")
logger.info(f"Passing along {len(test_suite.test_cases.keys())} queries")
return test_suite.test_cases


if __name__ == "__main__":
tests = download_tests(
"performance_tests",
"https://github.com/NCATSTranslator/Tests/archive/refs/heads/performance_tests.zip",
logging.Logger("tester"),
)
for test_case_id, test in tests.items():
print(type(test))
67 changes: 56 additions & 11 deletions test_harness/main.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
"""Translator SRI Automated Test Harness."""

from argparse import ArgumentParser
import asyncio
from gevent import monkey

monkey.patch_all()

import json
from setproctitle import setproctitle
from argparse import ArgumentParser
import time
from urllib.parse import urlparse
from uuid import uuid4

from test_harness.run import run_tests
from setproctitle import setproctitle

from test_harness.download import download_tests
from test_harness.logger import get_logger, setup_logger
from test_harness.reporter import Reporter
from test_harness.result_collector import ResultCollector
from test_harness.run import run_tests
from test_harness.slacker import Slacker

setproctitle("TestHarness")
Expand All @@ -24,7 +30,7 @@ def url_type(arg):
raise TypeError("Invalid URL")


async def main(args):
def main(args):
"""Main Test Harness entrypoint."""
qid = str(uuid4())[:8]
logger = get_logger(qid, args["log_level"])
Expand All @@ -47,18 +53,57 @@ async def main(args):
refresh_token=args.get("reporter_access_token"),
logger=logger,
)
await reporter.get_auth()
await reporter.create_test_run(next(iter(tests.values())).test_env, args["suite"])
reporter.get_auth()
reporter.create_test_run(next(iter(tests.values())).test_env, args["suite"])
slacker = Slacker()
report = await run_tests(reporter, slacker, tests, logger, args)
collector = ResultCollector(logger)
queried_envs = set()
for test in tests.values():
queried_envs.add(test.test_env)
slacker.post_notification(
messages=[
f"Running {args['suite']} ({sum([len(test.test_assets) for test in tests.values()])} tests, {len(tests.values())} queries)...\n<{reporter.base_path}/test-runs/{reporter.test_run_id}|View in the Information Radiator>"
]
)
start_time = time.time()
run_tests(tests, reporter, collector, logger, args)

slacker.post_notification(
messages=[
"""Test Suite: {test_suite}\nDuration: {duration} | Environment(s): {envs}\n<{ir_url}|View in the Information Radiator>\n{result_summary}""".format(
test_suite=args["suite"],
duration=round(time.time() - start_time, 2),
envs=(",").join(list(queried_envs)),
ir_url=f"{reporter.base_path}/test-runs/{reporter.test_run_id}",
result_summary=collector.dump_result_summary(),
)
]
)
if collector.has_acceptance_results:
slacker.upload_test_results_file(
reporter.test_name,
"json",
collector.acceptance_stats,
)
slacker.upload_test_results_file(
reporter.test_name,
"csv",
collector.acceptance_csv,
)
if collector.has_performance_results:
slacker.upload_test_results_file(
reporter.test_name,
"json",
collector.performance_stats,
)

logger.info("Finishing up test run...")
await reporter.finish_test_run()
reporter.finish_test_run()

if args["json_output"]:
# logger.info("Saving report as JSON...")
with open("test_report.json", "w") as f:
json.dump(report, f)
json.dump(collector.acceptance_report, f)

return logger.info("All tests have completed!")

Expand Down Expand Up @@ -135,7 +180,7 @@ def cli():
)

args = parser.parse_args()
asyncio.run(main(vars(args)))
main(vars(args))


if __name__ == "__main__":
Expand Down
10 changes: 5 additions & 5 deletions test_harness/pathfinder_test_runner.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import Dict, Union, List
from typing import Any, Dict, List


async def pathfinder_pass_fail_analysis(
report: Dict[str, any],
def pathfinder_pass_fail_analysis(
report: Dict[str, Any],
agent: str,
message: Dict[str, any],
message: Dict[str, Any],
path_nodes: List[List[str]],
minimum_required_path_nodes: int,
) -> Dict[str, any]:
) -> Dict[str, Any]:
found_path_nodes = set()
unmatched_paths = set()
for analysis in message["results"][0]["analyses"]:
Expand Down
Loading