crumb commited on
Commit
cfc16d7
1 Parent(s): c09e155

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +66 -1
README.md CHANGED
@@ -13,7 +13,72 @@ tags: []
13
  | | |none | 25|acc_norm|0.2423|± |0.0125|
14
  |truthfulqa_mc2| 2|none | 0|acc |0.4356|± |0.0151|
15
  |winogrande| 1|none | 5|acc |0.5138|± | 0.014|
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  ## Model Details
19
 
 
13
  | | |none | 25|acc_norm|0.2423|± |0.0125|
14
  |truthfulqa_mc2| 2|none | 0|acc |0.4356|± |0.0151|
15
  |winogrande| 1|none | 5|acc |0.5138|± | 0.014|
16
+ |hellaswag| 1|none | 10|acc |0.2938|± |0.0045|
17
+ | | |none | 10|acc_norm|0.3242|± |0.0047|
18
+ |gsm8k| 3|strict-match | 5|exact_match|0.0129|± |0.0031|
19
+ | | |flexible-extract| 5|exact_match|0.0197|± |0.0038|
20
+
21
+ ### MMLU *(0.2649701754385965, 0.004451753262466369)*
22
+
23
+ | Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
24
+ |-----------------------------------|------:|------|-----:|------|-----:|---|-----:|
25
+ |world_religions | 0|none | 5|acc |0.2281|± |0.0322|
26
+ |virology | 0|none | 5|acc |0.1747|± |0.0296|
27
+ |us_foreign_policy | 0|none | 5|acc |0.2600|± |0.0441|
28
+ |sociology | 0|none | 5|acc |0.2736|± |0.0315|
29
+ |security_studies | 0|none | 5|acc |0.4000|± |0.0314|
30
+ |public_relations | 0|none | 5|acc |0.2273|± |0.0401|
31
+ |professional_psychology | 0|none | 5|acc |0.2467|± |0.0174|
32
+ |professional_medicine | 0|none | 5|acc |0.4485|± |0.0302|
33
+ |professional_law | 0|none | 5|acc |0.2490|± |0.0110|
34
+ |professional_accounting | 0|none | 5|acc |0.2340|± |0.0253|
35
+ |prehistory | 0|none | 5|acc |0.2315|± |0.0235|
36
+ |philosophy | 0|none | 5|acc |0.2154|± |0.0234|
37
+ |nutrition | 0|none | 5|acc |0.2516|± |0.0248|
38
+ |moral_scenarios | 0|none | 5|acc |0.2536|± |0.0146|
39
+ |moral_disputes | 0|none | 5|acc |0.1879|± |0.0210|
40
+ |miscellaneous | 0|none | 5|acc |0.2197|± |0.0148|
41
+ |medical_genetics | 0|none | 5|acc |0.1900|± |0.0394|
42
+ |marketing | 0|none | 5|acc |0.1923|± |0.0258|
43
+ |management | 0|none | 5|acc |0.3301|± |0.0466|
44
+ |machine_learning | 0|none | 5|acc |0.1875|± |0.0370|
45
+ |logical_fallacies | 0|none | 5|acc |0.2577|± |0.0344|
46
+ |jurisprudence | 0|none | 5|acc |0.2222|± |0.0402|
47
+ |international_law | 0|none | 5|acc |0.3802|± |0.0443|
48
+ |human_sexuality | 0|none | 5|acc |0.2137|± |0.0360|
49
+ |human_aging | 0|none | 5|acc |0.1121|± |0.0212|
50
+ |high_school_world_history | 0|none | 5|acc |0.2743|± |0.0290|
51
+ |high_school_us_history | 0|none | 5|acc |0.2353|± |0.0298|
52
+ |high_school_statistics | 0|none | 5|acc |0.4722|± |0.0340|
53
+ |high_school_psychology | 0|none | 5|acc |0.3358|± |0.0202|
54
+ |high_school_physics | 0|none | 5|acc |0.3245|± |0.0382|
55
+ |high_school_microeconomics | 0|none | 5|acc |0.2605|± |0.0285|
56
+ |high_school_mathematics | 0|none | 5|acc |0.2741|± |0.0272|
57
+ |high_school_macroeconomics | 0|none | 5|acc |0.3615|± |0.0244|
58
+ |high_school_government_and_politics| 0|none | 5|acc |0.3679|± |0.0348|
59
+ |high_school_geography | 0|none | 5|acc |0.3535|± |0.0341|
60
+ |high_school_european_history | 0|none | 5|acc |0.2485|± |0.0337|
61
+ |high_school_computer_science | 0|none | 5|acc |0.1600|± |0.0368|
62
+ |high_school_chemistry | 0|none | 5|acc |0.2709|± |0.0313|
63
+ |high_school_biology | 0|none | 5|acc |0.3032|± |0.0261|
64
+ |global_facts | 0|none | 5|acc |0.2500|± |0.0435|
65
+ |formal_logic | 0|none | 5|acc |0.1587|± |0.0327|
66
+ |elementary_mathematics | 0|none | 5|acc |0.2857|± |0.0233|
67
+ |electrical_engineering | 0|none | 5|acc |0.2483|± |0.0360|
68
+ |econometrics | 0|none | 5|acc |0.2895|± |0.0427|
69
+ |conceptual_physics | 0|none | 5|acc |0.2894|± |0.0296|
70
+ |computer_security | 0|none | 5|acc |0.1900|± |0.0394|
71
+ |college_physics | 0|none | 5|acc |0.2451|± |0.0428|
72
+ |college_medicine | 0|none | 5|acc |0.2775|± |0.0341|
73
+ |college_mathematics | 0|none | 5|acc |0.2800|± |0.0451|
74
+ |college_computer_science | 0|none | 5|acc |0.2400|± |0.0429|
75
+ |college_chemistry | 0|none | 5|acc |0.3300|± |0.0473|
76
+ |college_biology | 0|none | 5|acc |0.2639|± |0.0369|
77
+ |clinical_knowledge | 0|none | 5|acc |0.3094|± |0.0285|
78
+ |business_ethics | 0|none | 5|acc |0.1900|± |0.0394|
79
+ |astronomy | 0|none | 5|acc |0.2303|± |0.0343|
80
+ |anatomy | 0|none | 5|acc |0.3259|± |0.0405|
81
+ |abstract_algebra | 0|none | 5|acc |0.2700|± |0.0446|
82
 
83
  ## Model Details
84