-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmetric.json
More file actions
89 lines (89 loc) · 3.06 KB
/
metric.json
File metadata and controls
89 lines (89 loc) · 3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
{'Overall': {'average_score': 0.403380940081282,
'standard deviation' : 0.2,
'metric_type_averages': {'high_school_european_history': 0.37083333333333335,
'business_ethics': 0.7933564007944811,
'clinical_knowledge': 0.08666666666,
'medical_genetics': 1.0,
'high_school_us_history': 0.5699735449735449,
'high_school_physics': 0.0,
'high_school_world_history': 0.0028366348076148684,
'virology': 0.0028366348076148684,
'high_school_microeconomics': 0.0028366348076148684,
'econometrics': 0.0028366348076148684,
'college_computer_science': 0.0028366348076148684,
'high_school_biology': 0.0028366348076148684,
'abstract_algebra': 0.0028366348076148684,
'professional_accounting': 0.0028366348076148684,
'philosophy': 0.0028366348076148684,
'professional_medicine': 0.0028366348076148684,
'nutrition': 0.0028366348076148684,
'global_facts': 0.0028366348076148684,
'machine_learning': 0.0028366348076148684,
'security_studies': 0.0028366348076148684,
'public_relations': 0.0028366348076148684,
'professional_psychology': 0.0028366348076148684,
'prehistory': 0.0028366348076148684,
'anatomy': 0.0028366348076148684,
'human_sexuality': 0.0028366348076148684,
'college_medicine': 0.0028366348076148684,
'high_school_government_and_politics': 0.0028366348076148684,
'college_chemistry': 0.0028366348076148684,
'logical_fallacies': 0.0028366348076148684,
'high_school_geography': 0.0028366348076148684,
'elementary_mathematics': 0.0028366348076148684,
'human_aging': 0.0028366348076148684,
'college_mathematics': 0.0028366348076148684,
'high_school_psychology': 0.0028366348076148684,
'formal_logic': 0.0028366348076148684,
'high_school_statistics': 0.0028366348076148684,
'international_law': 0.0028366348076148684,
'high_school_mathematics': 0.0028366348076148684,
'high_school_computer_science': 0.8,
'conceptual_physics': 0.8,
'miscellaneous': 0.8,
'high_school_chemistry': 0.8,
'marketing': 0.8,
'professional_law': 0.8,
'management': 0.8,
'college_physics': 0.8,
'jurisprudence': 0.6,
'world_religions': 0.6,
'sociology': 0.6,
'us_foreign_policy': 0.6,
'high_school_macroeconomics': 0.6,
'computer_security': 0.6,
'moral_scenarios': 0.6,
'moral_disputes': 0.6,
'electrical_engineering': 0.6,
'astronomy': 0.6,
'college_biology': 0.6,
'jailbreak_behavior Correctness': 0.6,
'Toxicity': 0.0,
'qmsum AnswerRelevancy': 0.8,
'allanai AnswerRelevancy': 0.8,
'equi AnswerRelevancy': 0.7,
'Ragas': 0.7,
'Contextual Relevancy': 0.9,
'ContextualPrecision': 0.7,
'Faithfulness': 0.9,
'Bias': 0.0,
'Halluciation': 0.5,
'bleu & Rouge': 1.0,
'bleu & Rouge L':1.0,
'gender stereotype':0.0,
'race stereotype':0.0 },
'regular_metrics': ['jailbreak_behavior Correctness',
'Toxicity',
'qmsum AnswerRelevancy',
'allanai AnswerRelevancy',
'equi AnswerRelevancy',
'Ragas',
'Contextual Relevancy',
'ContextualPrecision',
'Faithfulness',
'Bias',
'Halluciation',
'bleu & Rouge',
'bleu & Rouge L',
'gender stereotype',
'race stereotype'] }}