Update README.md
Browse files
README.md
CHANGED
@@ -11,4 +11,66 @@ language:
|
|
11 |
|truthfulqa_mc2|Yaml |none | 0|acc |0.4457|± |0.0152|
|
12 |
|winogrande|Yaml |none | 5|acc |0.5154|± | 0.014|
|
13 |
|hellaswag|Yaml |none | 10|acc |0.2832|± |0.0045|
|
14 |
-
| | |none | 10|acc_norm|0.3024|± |0.0046|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|truthfulqa_mc2|Yaml |none | 0|acc |0.4457|± |0.0152|
|
12 |
|winogrande|Yaml |none | 5|acc |0.5154|± | 0.014|
|
13 |
|hellaswag|Yaml |none | 10|acc |0.2832|± |0.0045|
|
14 |
+
| | |none | 10|acc_norm|0.3024|± |0.0046|
|
15 |
+
|
16 |
+
### MMLU
|
17 |
+
|
18 |
+
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
19 |
+
|-----------------------------------|-------|------|-----:|------|-----:|---|-----:|
|
20 |
+
|abstract_algebra |Yaml |none | 5|acc |0.2200|± |0.0416|
|
21 |
+
|anatomy |Yaml |none | 5|acc |0.2222|± |0.0359|
|
22 |
+
|astronomy |Yaml |none | 5|acc |0.1776|± |0.0311|
|
23 |
+
|business_ethics |Yaml |none | 5|acc |0.2300|± |0.0423|
|
24 |
+
|clinical_knowledge |Yaml |none | 5|acc |0.2415|± |0.0263|
|
25 |
+
|college_biology |Yaml |none | 5|acc |0.3194|± |0.0390|
|
26 |
+
|college_chemistry |Yaml |none | 5|acc |0.2000|± |0.0402|
|
27 |
+
|college_computer_science |Yaml |none | 5|acc |0.2800|± |0.0451|
|
28 |
+
|college_mathematics |Yaml |none | 5|acc |0.2800|± |0.0451|
|
29 |
+
|college_medicine |Yaml |none | 5|acc |0.2254|± |0.0319|
|
30 |
+
|college_physics |Yaml |none | 5|acc |0.2157|± |0.0409|
|
31 |
+
|computer_security |Yaml |none | 5|acc |0.2200|± |0.0416|
|
32 |
+
|conceptual_physics |Yaml |none | 5|acc |0.2553|± |0.0285|
|
33 |
+
|econometrics |Yaml |none | 5|acc |0.2368|± |0.0400|
|
34 |
+
|electrical_engineering |Yaml |none | 5|acc |0.2345|± |0.0353|
|
35 |
+
|elementary_mathematics |Yaml |none | 5|acc |0.2646|± |0.0227|
|
36 |
+
|formal_logic |Yaml |none | 5|acc |0.2302|± |0.0376|
|
37 |
+
|global_facts |Yaml |none | 5|acc |0.1700|± |0.0378|
|
38 |
+
|high_school_biology |Yaml |none | 5|acc |0.2903|± |0.0258|
|
39 |
+
|high_school_chemistry |Yaml |none | 5|acc |0.2611|± |0.0309|
|
40 |
+
|high_school_computer_science |Yaml |none | 5|acc |0.2300|± |0.0423|
|
41 |
+
|high_school_european_history |Yaml |none | 5|acc |0.2788|± |0.0350|
|
42 |
+
|high_school_geography |Yaml |none | 5|acc |0.3081|± |0.0329|
|
43 |
+
|high_school_government_and_politics|Yaml |none | 5|acc |0.3731|± |0.0349|
|
44 |
+
|high_school_macroeconomics |Yaml |none | 5|acc |0.2923|± |0.0231|
|
45 |
+
|high_school_mathematics |Yaml |none | 5|acc |0.2630|± |0.0268|
|
46 |
+
|high_school_microeconomics |Yaml |none | 5|acc |0.3403|± |0.0308|
|
47 |
+
|high_school_physics |Yaml |none | 5|acc |0.2715|± |0.0363|
|
48 |
+
|high_school_psychology |Yaml |none | 5|acc |0.2881|± |0.0194|
|
49 |
+
|high_school_statistics |Yaml |none | 5|acc |0.4722|± |0.0340|
|
50 |
+
|high_school_us_history |Yaml |none | 5|acc |0.3529|± |0.0335|
|
51 |
+
|high_school_world_history |Yaml |none | 5|acc |0.2532|± |0.0283|
|
52 |
+
|human_aging |Yaml |none | 5|acc |0.2108|± |0.0274|
|
53 |
+
|human_sexuality |Yaml |none | 5|acc |0.2672|± |0.0388|
|
54 |
+
|international_law |Yaml |none | 5|acc |0.2479|± |0.0394|
|
55 |
+
|jurisprudence |Yaml |none | 5|acc |0.2500|± |0.0419|
|
56 |
+
|logical_fallacies |Yaml |none | 5|acc |0.2393|± |0.0335|
|
57 |
+
|machine_learning |Yaml |none | 5|acc |0.2946|± |0.0433|
|
58 |
+
|management |Yaml |none | 5|acc |0.1650|± |0.0368|
|
59 |
+
|marketing |Yaml |none | 5|acc |0.1923|± |0.0258|
|
60 |
+
|medical_genetics |Yaml |none | 5|acc |0.3000|± |0.0461|
|
61 |
+
|miscellaneous |Yaml |none | 5|acc |0.2720|± |0.0159|
|
62 |
+
|moral_disputes |Yaml |none | 5|acc |0.1936|± |0.0213|
|
63 |
+
|moral_scenarios |Yaml |none | 5|acc |0.2380|± |0.0142|
|
64 |
+
|nutrition |Yaml |none | 5|acc |0.2484|± |0.0247|
|
65 |
+
|philosophy |Yaml |none | 5|acc |0.2283|± |0.0238|
|
66 |
+
|prehistory |Yaml |none | 5|acc |0.2346|± |0.0236|
|
67 |
+
|professional_accounting |Yaml |none | 5|acc |0.2589|± |0.0261|
|
68 |
+
|professional_law |Yaml |none | 5|acc |0.2445|± |0.0110|
|
69 |
+
|professional_medicine |Yaml |none | 5|acc |0.4485|± |0.0302|
|
70 |
+
|professional_psychology |Yaml |none | 5|acc |0.2614|± |0.0178|
|
71 |
+
|public_relations |Yaml |none | 5|acc |0.2364|± |0.0407|
|
72 |
+
|security_studies |Yaml |none | 5|acc |0.4000|± |0.0314|
|
73 |
+
|sociology |Yaml |none | 5|acc |0.3035|± |0.0325|
|
74 |
+
|us_foreign_policy |Yaml |none | 5|acc |0.2800|± |0.0451|
|
75 |
+
|virology |Yaml |none | 5|acc |0.2048|± |0.0314|
|
76 |
+
|world_religions |Yaml |none | 5|acc |0.1988|± |0.0306|
|