Update README.md
Browse files
README.md
CHANGED
@@ -48,7 +48,72 @@ print(text)
|
|
48 |
|
49 |
# Benchmark Scores
|
50 |
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
## Citations
|
54 |
|
|
|
48 |
|
49 |
# Benchmark Scores
|
50 |
|
51 |
+
| Test Name | Accuracy |
|
52 |
+
|------------------------------------------------------|----------------------|
|
53 |
+
| all | 0.6566791267920726 |
|
54 |
+
|arc:challenge | 0.7005119453924915 |
|
55 |
+
|hellaswag | 0.7103166699860586 |
|
56 |
+
|hendrycksTest-abstract_algebra | 0.34 |
|
57 |
+
|hendrycksTest-anatomy | 0.6666666666666666 |
|
58 |
+
|hendrycksTest-astronomy | 0.6907894736842105 |
|
59 |
+
|hendrycksTest-business_ethics | 0.65 |
|
60 |
+
|hendrycksTest-clinical_knowledge | 0.7132075471698113 |
|
61 |
+
|hendrycksTest-college_biology | 0.7708333333333334 |
|
62 |
+
|hendrycksTest-college_chemistry | 0.48 |
|
63 |
+
|hendrycksTest-college_computer_science | 0.53 |
|
64 |
+
|hendrycksTest-college_mathematics | 0.33 |
|
65 |
+
|hendrycksTest-college_medicine | 0.6705202312138728 |
|
66 |
+
|hendrycksTest-college_physics | 0.4019607843137255 |
|
67 |
+
|hendrycksTest-computer_security | 0.77 |
|
68 |
+
|hendrycksTest-conceptual_physics | 0.5787234042553191 |
|
69 |
+
|hendrycksTest-econometrics | 0.5 |
|
70 |
+
|hendrycksTest-electrical_engineering | 0.5517241379310345 |
|
71 |
+
|hendrycksTest-elementary_mathematics | 0.42592592592592593 |
|
72 |
+
|hendrycksTest-formal_logic | 0.48412698412698413 |
|
73 |
+
|hendrycksTest-global_facts | 0.37 |
|
74 |
+
|hendrycksTest-high_school_biology | 0.7806451612903226 |
|
75 |
+
|hendrycksTest-high_school_chemistry | 0.4975369458128079 |
|
76 |
+
|hendrycksTest-high_school_computer_science | 0.69 |
|
77 |
+
|hendrycksTest-high_school_european_history | 0.7757575757575758 |
|
78 |
+
|hendrycksTest-high_school_geography | 0.803030303030303 |
|
79 |
+
|hendrycksTest-high_school_government_and_politics | 0.8963730569948186 |
|
80 |
+
|hendrycksTest-high_school_macroeconomics | 0.6641025641025641 |
|
81 |
+
|hendrycksTest-high_school_mathematics | 0.36666666666666664 |
|
82 |
+
|hendrycksTest-high_school_microeconomics | 0.6890756302521008 |
|
83 |
+
|hendrycksTest-high_school_physics | 0.37748344370860926 |
|
84 |
+
|hendrycksTest-high_school_psychology | 0.8403669724770643 |
|
85 |
+
|hendrycksTest-high_school_statistics | 0.5 |
|
86 |
+
|hendrycksTest-high_school_us_history | 0.8480392156862745 |
|
87 |
+
|hendrycksTest-high_school_world_history | 0.8059071729957806 |
|
88 |
+
|hendrycksTest-human_aging | 0.6995515695067265 |
|
89 |
+
|hendrycksTest-human_sexuality | 0.7938931297709924 |
|
90 |
+
|hendrycksTest-international_law | 0.8099173553719008 |
|
91 |
+
|hendrycksTest-jurisprudence | 0.7870370370370371 |
|
92 |
+
|hendrycksTest-logical_fallacies | 0.7484662576687117 |
|
93 |
+
|hendrycksTest-machine_learning | 0.4375 |
|
94 |
+
|hendrycksTest-management | 0.7766990291262136 |
|
95 |
+
|hendrycksTest-marketing | 0.8888888888888888 |
|
96 |
+
|hendrycksTest-medical_genetics | 0.72 |
|
97 |
+
|hendrycksTest-miscellaneous | 0.8314176245210728 |
|
98 |
+
|hendrycksTest-moral_disputes | 0.7398843930635838 |
|
99 |
+
|hendrycksTest-moral_scenarios | 0.4324022346368715 |
|
100 |
+
|hendrycksTest-nutrition | 0.7189542483660131 |
|
101 |
+
|hendrycksTest-philosophy | 0.7041800643086816 |
|
102 |
+
|hendrycksTest-prehistory | 0.7469135802469136 |
|
103 |
+
|hendrycksTest-professional_accounting | 0.5035460992907801 |
|
104 |
+
|hendrycksTest-professional_law | 0.4758800521512386 |
|
105 |
+
|hendrycksTest-professional_medicine | 0.6727941176470589 |
|
106 |
+
|hendrycksTest-professional_psychology | 0.6666666666666666 |
|
107 |
+
|hendrycksTest-public_relations | 0.6727272727272727 |
|
108 |
+
|hendrycksTest-security_studies | 0.7183673469387755 |
|
109 |
+
|hendrycksTest-sociology | 0.8407960199004975 |
|
110 |
+
|hendrycksTest-us_foreign_policy | 0.85 |
|
111 |
+
|hendrycksTest-virology | 0.5542168674698795 |
|
112 |
+
|hendrycksTest-world_religions | 0.8421052631578947 |
|
113 |
+
|truthfulqa:mc | 0.6707176642401714 |
|
114 |
+
|winogrande | 0.8492501973164956 |
|
115 |
+
|gsm8k | 0.7050796057619408 |
|
116 |
+
|
117 |
|
118 |
## Citations
|
119 |
|