-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_result.py
More file actions
85 lines (68 loc) · 3.86 KB
/
get_result.py
File metadata and controls
85 lines (68 loc) · 3.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import json
import re
import sys
from pathlib import Path
from itertools import product
QUESTION_TYPES = ["desc", "reason", "open"]
QUESTION_RELATIONS = ["irrel", "inexist", "contra", "normal"]
def process_eval(eval_data):
metrics = {
"total_queries": 0,
"total_correct": 0,
"overall_score": 0,
"exceptions": 0,
"score_per_type": {q_type: 0 for q_type in QUESTION_TYPES},
"score_per_relation": {q_relation: 0 for q_relation in QUESTION_RELATIONS},
"score_per_type_relation": {f"{q_type}_{q_relation}": 0 for q_type, q_relation in product(QUESTION_TYPES, QUESTION_RELATIONS)},
"ratio_per_type": {q_type: 0 for q_type in QUESTION_TYPES},
"ratio_per_relation": {q_relation: 0 for q_relation in QUESTION_RELATIONS},
"ratio_per_type_relation": {f"{q_type}_{q_relation}": 0 for q_type, q_relation in product(QUESTION_TYPES, QUESTION_RELATIONS)},
}
for item in eval_data.values():
metrics["total_queries"] += 1
if "score" not in item or item["score"] not in [0, 1]:
metrics["exceptions"] += 1
continue
metrics["total_correct"] += item["score"]
metrics["score_per_type"][item["q_type"]] += item["score"]
metrics["score_per_relation"][item["q_relation"]] += item["score"]
metrics["score_per_type_relation"][f"{item['q_type']}_{item['q_relation']}"] += item["score"]
metrics["ratio_per_type"][item["q_type"]] += 1
metrics["ratio_per_relation"][item["q_relation"]] += 1
metrics["ratio_per_type_relation"][f"{item['q_type']}_{item['q_relation']}"] += 1
# calculate ratios
metrics["overall_score"] = round(metrics["total_correct"] / (metrics["total_queries"] - metrics["exceptions"]) * 100, 2)
# express ratio in "score/total_queries"
for q_type in metrics["ratio_per_type"]:
metrics["ratio_per_type"][q_type] = f"{metrics['score_per_type'][q_type]}/{metrics['ratio_per_type'][q_type]}={metrics['score_per_type'][q_type] / metrics['ratio_per_type'][q_type] * 100:.2f}"
for q_relation in metrics["ratio_per_relation"]:
metrics["ratio_per_relation"][q_relation] = f"{metrics['score_per_relation'][q_relation]}/{metrics['ratio_per_relation'][q_relation]}={metrics['score_per_relation'][q_relation] / metrics['ratio_per_relation'][q_relation] * 100:.2f}"
for q_type_relation in metrics["ratio_per_type_relation"]:
metrics["ratio_per_type_relation"][q_type_relation] = f"{metrics['score_per_type_relation'][q_type_relation]}/{metrics['ratio_per_type_relation'][q_type_relation]}={metrics['score_per_type_relation'][q_type_relation] / metrics['ratio_per_type_relation'][q_type_relation] * 100:.2f}"
# 将所有的 ratio_per_type 和 ratio_per_relation 百分比指标用 & 连接起来,便于打印 Latex 表格
all_ratios = [v.split('=')[-1] for v in list(metrics["ratio_per_type"].values()) + list(metrics["ratio_per_relation"].values())]
all_ratios.append(str(metrics["overall_score"]))
metrics["latex_line"] = " & ".join(all_ratios)
tiny_results = {
"total_queries": metrics["total_queries"],
"total_correct": metrics["total_correct"],
"overall_score": metrics["overall_score"],
}
return metrics, tiny_results
if __name__ == "__main__":
eval_file = sys.argv[1]
save_file = eval_file.replace("eval.json", "score.json")
# open eval data
with open(eval_file, "r", encoding='utf-8') as f:
eval_data = json.load(f)
# scoring
scoring_result, tiny_results = process_eval(eval_data)
# save scoring result
with open(save_file, "w", encoding='utf-8') as f:
json.dump(scoring_result, f, ensure_ascii=False, indent=4)
print(f"Scoring results saved to {save_file}")
# print tiny results
#print("Tiny scoring results:")
for key, value in tiny_results.items():
print(f" {key}: {value}")