crumb commited on
Commit
f470d9d
1 Parent(s): e23f2d7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +63 -1
README.md CHANGED
@@ -11,4 +11,66 @@ language:
11
  |truthfulqa_mc2|Yaml |none | 0|acc |0.4457|± |0.0152|
12
  |winogrande|Yaml |none | 5|acc |0.5154|± | 0.014|
13
  |hellaswag|Yaml |none | 10|acc |0.2832|± |0.0045|
14
- | | |none | 10|acc_norm|0.3024|± |0.0046|
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  |truthfulqa_mc2|Yaml |none | 0|acc |0.4457|± |0.0152|
12
  |winogrande|Yaml |none | 5|acc |0.5154|± | 0.014|
13
  |hellaswag|Yaml |none | 10|acc |0.2832|± |0.0045|
14
+ | | |none | 10|acc_norm|0.3024|± |0.0046|
15
+
16
+ ### MMLU
17
+
18
+ | Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
19
+ |-----------------------------------|-------|------|-----:|------|-----:|---|-----:|
20
+ |abstract_algebra |Yaml |none | 5|acc |0.2200|± |0.0416|
21
+ |anatomy |Yaml |none | 5|acc |0.2222|± |0.0359|
22
+ |astronomy |Yaml |none | 5|acc |0.1776|± |0.0311|
23
+ |business_ethics |Yaml |none | 5|acc |0.2300|± |0.0423|
24
+ |clinical_knowledge |Yaml |none | 5|acc |0.2415|± |0.0263|
25
+ |college_biology |Yaml |none | 5|acc |0.3194|± |0.0390|
26
+ |college_chemistry |Yaml |none | 5|acc |0.2000|± |0.0402|
27
+ |college_computer_science |Yaml |none | 5|acc |0.2800|± |0.0451|
28
+ |college_mathematics |Yaml |none | 5|acc |0.2800|± |0.0451|
29
+ |college_medicine |Yaml |none | 5|acc |0.2254|± |0.0319|
30
+ |college_physics |Yaml |none | 5|acc |0.2157|± |0.0409|
31
+ |computer_security |Yaml |none | 5|acc |0.2200|± |0.0416|
32
+ |conceptual_physics |Yaml |none | 5|acc |0.2553|± |0.0285|
33
+ |econometrics |Yaml |none | 5|acc |0.2368|± |0.0400|
34
+ |electrical_engineering |Yaml |none | 5|acc |0.2345|± |0.0353|
35
+ |elementary_mathematics |Yaml |none | 5|acc |0.2646|± |0.0227|
36
+ |formal_logic |Yaml |none | 5|acc |0.2302|± |0.0376|
37
+ |global_facts |Yaml |none | 5|acc |0.1700|± |0.0378|
38
+ |high_school_biology |Yaml |none | 5|acc |0.2903|± |0.0258|
39
+ |high_school_chemistry |Yaml |none | 5|acc |0.2611|± |0.0309|
40
+ |high_school_computer_science |Yaml |none | 5|acc |0.2300|± |0.0423|
41
+ |high_school_european_history |Yaml |none | 5|acc |0.2788|± |0.0350|
42
+ |high_school_geography |Yaml |none | 5|acc |0.3081|± |0.0329|
43
+ |high_school_government_and_politics|Yaml |none | 5|acc |0.3731|± |0.0349|
44
+ |high_school_macroeconomics |Yaml |none | 5|acc |0.2923|± |0.0231|
45
+ |high_school_mathematics |Yaml |none | 5|acc |0.2630|± |0.0268|
46
+ |high_school_microeconomics |Yaml |none | 5|acc |0.3403|± |0.0308|
47
+ |high_school_physics |Yaml |none | 5|acc |0.2715|± |0.0363|
48
+ |high_school_psychology |Yaml |none | 5|acc |0.2881|± |0.0194|
49
+ |high_school_statistics |Yaml |none | 5|acc |0.4722|± |0.0340|
50
+ |high_school_us_history |Yaml |none | 5|acc |0.3529|± |0.0335|
51
+ |high_school_world_history |Yaml |none | 5|acc |0.2532|± |0.0283|
52
+ |human_aging |Yaml |none | 5|acc |0.2108|± |0.0274|
53
+ |human_sexuality |Yaml |none | 5|acc |0.2672|± |0.0388|
54
+ |international_law |Yaml |none | 5|acc |0.2479|± |0.0394|
55
+ |jurisprudence |Yaml |none | 5|acc |0.2500|± |0.0419|
56
+ |logical_fallacies |Yaml |none | 5|acc |0.2393|± |0.0335|
57
+ |machine_learning |Yaml |none | 5|acc |0.2946|± |0.0433|
58
+ |management |Yaml |none | 5|acc |0.1650|± |0.0368|
59
+ |marketing |Yaml |none | 5|acc |0.1923|± |0.0258|
60
+ |medical_genetics |Yaml |none | 5|acc |0.3000|± |0.0461|
61
+ |miscellaneous |Yaml |none | 5|acc |0.2720|± |0.0159|
62
+ |moral_disputes |Yaml |none | 5|acc |0.1936|± |0.0213|
63
+ |moral_scenarios |Yaml |none | 5|acc |0.2380|± |0.0142|
64
+ |nutrition |Yaml |none | 5|acc |0.2484|± |0.0247|
65
+ |philosophy |Yaml |none | 5|acc |0.2283|± |0.0238|
66
+ |prehistory |Yaml |none | 5|acc |0.2346|± |0.0236|
67
+ |professional_accounting |Yaml |none | 5|acc |0.2589|± |0.0261|
68
+ |professional_law |Yaml |none | 5|acc |0.2445|± |0.0110|
69
+ |professional_medicine |Yaml |none | 5|acc |0.4485|± |0.0302|
70
+ |professional_psychology |Yaml |none | 5|acc |0.2614|± |0.0178|
71
+ |public_relations |Yaml |none | 5|acc |0.2364|± |0.0407|
72
+ |security_studies |Yaml |none | 5|acc |0.4000|± |0.0314|
73
+ |sociology |Yaml |none | 5|acc |0.3035|± |0.0325|
74
+ |us_foreign_policy |Yaml |none | 5|acc |0.2800|± |0.0451|
75
+ |virology |Yaml |none | 5|acc |0.2048|± |0.0314|
76
+ |world_religions |Yaml |none | 5|acc |0.1988|± |0.0306|