Commit
•
565b517
1
Parent(s):
18bacb2
Update README.md
Browse files
README.md
CHANGED
@@ -1 +1,246 @@
|
|
1 |
-
https://wandb.ai/alexwortega/tiny_llama/runs/n0je6urv?workspace=user-alexwortega
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
https://wandb.ai/alexwortega/tiny_llama/runs/n0je6urv?workspace=user-alexwortega
|
2 |
+
|
3 |
+
hf-causal (pretrained=../../tiny_3ep,dtype=float16), limit: None, provide_description: False, num_fewshot: 0, batch_size: 16
|
4 |
+
| Task |Version| Metric |Value | |Stderr|
|
5 |
+
|---------------------------------------------------|------:|--------|-----:|---|-----:|
|
6 |
+
|danetqa | 1|acc |0.5018|± |0.0175|
|
7 |
+
|hendrycksTest-abstract_algebra | 1|acc |0.2600|± |0.0441|
|
8 |
+
| | |acc_norm|0.2600|± |0.0441|
|
9 |
+
|hendrycksTest-anatomy | 1|acc |0.2741|± |0.0385|
|
10 |
+
| | |acc_norm|0.2741|± |0.0385|
|
11 |
+
|hendrycksTest-astronomy | 1|acc |0.1776|± |0.0311|
|
12 |
+
| | |acc_norm|0.1776|± |0.0311|
|
13 |
+
|hendrycksTest-business_ethics | 1|acc |0.2500|± |0.0435|
|
14 |
+
| | |acc_norm|0.2500|± |0.0435|
|
15 |
+
|hendrycksTest-clinical_knowledge | 1|acc |0.2604|± |0.0270|
|
16 |
+
| | |acc_norm|0.2604|± |0.0270|
|
17 |
+
|hendrycksTest-college_biology | 1|acc |0.2153|± |0.0344|
|
18 |
+
| | |acc_norm|0.2153|± |0.0344|
|
19 |
+
|hendrycksTest-college_chemistry | 1|acc |0.1700|± |0.0378|
|
20 |
+
| | |acc_norm|0.1700|± |0.0378|
|
21 |
+
|hendrycksTest-college_computer_science | 1|acc |0.2800|± |0.0451|
|
22 |
+
| | |acc_norm|0.2800|± |0.0451|
|
23 |
+
|hendrycksTest-college_mathematics | 1|acc |0.2700|± |0.0446|
|
24 |
+
| | |acc_norm|0.2700|± |0.0446|
|
25 |
+
|hendrycksTest-college_medicine | 1|acc |0.2543|± |0.0332|
|
26 |
+
| | |acc_norm|0.2543|± |0.0332|
|
27 |
+
|hendrycksTest-college_physics | 1|acc |0.1961|± |0.0395|
|
28 |
+
| | |acc_norm|0.1961|± |0.0395|
|
29 |
+
|hendrycksTest-computer_security | 1|acc |0.3000|± |0.0461|
|
30 |
+
| | |acc_norm|0.3000|± |0.0461|
|
31 |
+
|hendrycksTest-conceptual_physics | 1|acc |0.2766|± |0.0292|
|
32 |
+
| | |acc_norm|0.2766|± |0.0292|
|
33 |
+
|hendrycksTest-econometrics | 1|acc |0.2807|± |0.0423|
|
34 |
+
| | |acc_norm|0.2807|± |0.0423|
|
35 |
+
|hendrycksTest-electrical_engineering | 1|acc |0.2690|± |0.0370|
|
36 |
+
| | |acc_norm|0.2690|± |0.0370|
|
37 |
+
|hendrycksTest-elementary_mathematics | 1|acc |0.2434|± |0.0221|
|
38 |
+
| | |acc_norm|0.2434|± |0.0221|
|
39 |
+
|hendrycksTest-formal_logic | 1|acc |0.2698|± |0.0397|
|
40 |
+
| | |acc_norm|0.2698|± |0.0397|
|
41 |
+
|hendrycksTest-global_facts | 1|acc |0.2700|± |0.0446|
|
42 |
+
| | |acc_norm|0.2700|± |0.0446|
|
43 |
+
|hendrycksTest-high_school_biology | 1|acc |0.2161|± |0.0234|
|
44 |
+
| | |acc_norm|0.2161|± |0.0234|
|
45 |
+
|hendrycksTest-high_school_chemistry | 1|acc |0.1970|± |0.0280|
|
46 |
+
| | |acc_norm|0.1970|± |0.0280|
|
47 |
+
|hendrycksTest-high_school_computer_science | 1|acc |0.3600|± |0.0482|
|
48 |
+
| | |acc_norm|0.3600|± |0.0482|
|
49 |
+
|hendrycksTest-high_school_european_history | 1|acc |0.2182|± |0.0323|
|
50 |
+
| | |acc_norm|0.2182|± |0.0323|
|
51 |
+
|hendrycksTest-high_school_geography | 1|acc |0.2222|± |0.0296|
|
52 |
+
| | |acc_norm|0.2222|± |0.0296|
|
53 |
+
|hendrycksTest-high_school_government_and_politics | 1|acc |0.1969|± |0.0287|
|
54 |
+
| | |acc_norm|0.1969|± |0.0287|
|
55 |
+
|hendrycksTest-high_school_macroeconomics | 1|acc |0.2282|± |0.0213|
|
56 |
+
| | |acc_norm|0.2282|± |0.0213|
|
57 |
+
|hendrycksTest-high_school_mathematics | 1|acc |0.2556|± |0.0266|
|
58 |
+
| | |acc_norm|0.2556|± |0.0266|
|
59 |
+
|hendrycksTest-high_school_microeconomics | 1|acc |0.2227|± |0.0270|
|
60 |
+
| | |acc_norm|0.2227|± |0.0270|
|
61 |
+
|hendrycksTest-high_school_physics | 1|acc |0.2914|± |0.0371|
|
62 |
+
| | |acc_norm|0.2914|± |0.0371|
|
63 |
+
|hendrycksTest-high_school_psychology | 1|acc |0.2275|± |0.0180|
|
64 |
+
| | |acc_norm|0.2275|± |0.0180|
|
65 |
+
|hendrycksTest-high_school_statistics | 1|acc |0.1759|± |0.0260|
|
66 |
+
| | |acc_norm|0.1759|± |0.0260|
|
67 |
+
|hendrycksTest-high_school_us_history | 1|acc |0.2598|± |0.0308|
|
68 |
+
| | |acc_norm|0.2598|± |0.0308|
|
69 |
+
|hendrycksTest-high_school_world_history | 1|acc |0.2827|± |0.0293|
|
70 |
+
| | |acc_norm|0.2827|± |0.0293|
|
71 |
+
|hendrycksTest-human_aging | 1|acc |0.3049|± |0.0309|
|
72 |
+
| | |acc_norm|0.3049|± |0.0309|
|
73 |
+
|hendrycksTest-human_sexuality | 1|acc |0.2824|± |0.0395|
|
74 |
+
| | |acc_norm|0.2824|± |0.0395|
|
75 |
+
|hendrycksTest-international_law | 1|acc |0.2562|± |0.0398|
|
76 |
+
| | |acc_norm|0.2562|± |0.0398|
|
77 |
+
|hendrycksTest-jurisprudence | 1|acc |0.3611|± |0.0464|
|
78 |
+
| | |acc_norm|0.3611|± |0.0464|
|
79 |
+
|hendrycksTest-logical_fallacies | 1|acc |0.2515|± |0.0341|
|
80 |
+
| | |acc_norm|0.2515|± |0.0341|
|
81 |
+
|hendrycksTest-machine_learning | 1|acc |0.1964|± |0.0377|
|
82 |
+
| | |acc_norm|0.1964|± |0.0377|
|
83 |
+
|hendrycksTest-management | 1|acc |0.1553|± |0.0359|
|
84 |
+
| | |acc_norm|0.1553|± |0.0359|
|
85 |
+
|hendrycksTest-marketing | 1|acc |0.3248|± |0.0307|
|
86 |
+
| | |acc_norm|0.3248|± |0.0307|
|
87 |
+
|hendrycksTest-medical_genetics | 1|acc |0.3400|± |0.0476|
|
88 |
+
| | |acc_norm|0.3400|± |0.0476|
|
89 |
+
|hendrycksTest-miscellaneous | 1|acc |0.2669|± |0.0158|
|
90 |
+
| | |acc_norm|0.2669|± |0.0158|
|
91 |
+
|hendrycksTest-moral_disputes | 1|acc |0.2919|± |0.0245|
|
92 |
+
| | |acc_norm|0.2919|± |0.0245|
|
93 |
+
|hendrycksTest-moral_scenarios | 1|acc |0.2447|± |0.0144|
|
94 |
+
| | |acc_norm|0.2447|± |0.0144|
|
95 |
+
|hendrycksTest-nutrition | 1|acc |0.2549|± |0.0250|
|
96 |
+
| | |acc_norm|0.2549|± |0.0250|
|
97 |
+
|hendrycksTest-philosophy | 1|acc |0.2122|± |0.0232|
|
98 |
+
| | |acc_norm|0.2122|± |0.0232|
|
99 |
+
|hendrycksTest-prehistory | 1|acc |0.2685|± |0.0247|
|
100 |
+
| | |acc_norm|0.2685|± |0.0247|
|
101 |
+
|hendrycksTest-professional_accounting | 1|acc |0.2021|± |0.0240|
|
102 |
+
| | |acc_norm|0.2021|± |0.0240|
|
103 |
+
|hendrycksTest-professional_law | 1|acc |0.2432|± |0.0110|
|
104 |
+
| | |acc_norm|0.2432|± |0.0110|
|
105 |
+
|hendrycksTest-professional_medicine | 1|acc |0.1654|± |0.0226|
|
106 |
+
| | |acc_norm|0.1654|± |0.0226|
|
107 |
+
|hendrycksTest-professional_psychology | 1|acc |0.2582|± |0.0177|
|
108 |
+
| | |acc_norm|0.2582|± |0.0177|
|
109 |
+
|hendrycksTest-public_relations | 1|acc |0.2909|± |0.0435|
|
110 |
+
| | |acc_norm|0.2909|± |0.0435|
|
111 |
+
|hendrycksTest-security_studies | 1|acc |0.2041|± |0.0258|
|
112 |
+
| | |acc_norm|0.2041|± |0.0258|
|
113 |
+
|hendrycksTest-sociology | 1|acc |0.2637|± |0.0312|
|
114 |
+
| | |acc_norm|0.2637|± |0.0312|
|
115 |
+
|hendrycksTest-us_foreign_policy | 1|acc |0.2900|± |0.0456|
|
116 |
+
| | |acc_norm|0.2900|± |0.0456|
|
117 |
+
|hendrycksTest-virology | 1|acc |0.2651|± |0.0344|
|
118 |
+
| | |acc_norm|0.2651|± |0.0344|
|
119 |
+
|hendrycksTest-world_religions | 1|acc |0.3450|± |0.0365|
|
120 |
+
| | |acc_norm|0.3450|± |0.0365|
|
121 |
+
|hendrycksTestRu-abstract_algebra | 1|acc |0.2300|± |0.0423|
|
122 |
+
| | |acc_norm|0.2300|± |0.0423|
|
123 |
+
|hendrycksTestRu-anatomy | 1|acc |0.1778|± |0.0330|
|
124 |
+
| | |acc_norm|0.1778|± |0.0330|
|
125 |
+
|hendrycksTestRu-astronomy | 1|acc |0.1776|± |0.0311|
|
126 |
+
| | |acc_norm|0.1776|± |0.0311|
|
127 |
+
|hendrycksTestRu-business_ethics | 1|acc |0.2200|± |0.0416|
|
128 |
+
| | |acc_norm|0.2200|± |0.0416|
|
129 |
+
|hendrycksTestRu-clinical_knowledge | 1|acc |0.2151|± |0.0253|
|
130 |
+
| | |acc_norm|0.2151|± |0.0253|
|
131 |
+
|hendrycksTestRu-college_biology | 1|acc |0.2569|± |0.0365|
|
132 |
+
| | |acc_norm|0.2569|± |0.0365|
|
133 |
+
|hendrycksTestRu-college_chemistry | 1|acc |0.1800|± |0.0386|
|
134 |
+
| | |acc_norm|0.1800|± |0.0386|
|
135 |
+
|hendrycksTestRu-college_computer_science | 1|acc |0.2700|± |0.0446|
|
136 |
+
| | |acc_norm|0.2700|± |0.0446|
|
137 |
+
|hendrycksTestRu-college_mathematics | 1|acc |0.2200|± |0.0416|
|
138 |
+
| | |acc_norm|0.2200|± |0.0416|
|
139 |
+
|hendrycksTestRu-college_medicine | 1|acc |0.1908|± |0.0300|
|
140 |
+
| | |acc_norm|0.1908|± |0.0300|
|
141 |
+
|hendrycksTestRu-college_physics | 1|acc |0.2059|± |0.0402|
|
142 |
+
| | |acc_norm|0.2059|± |0.0402|
|
143 |
+
|hendrycksTestRu-computer_security | 1|acc |0.3000|± |0.0461|
|
144 |
+
| | |acc_norm|0.3000|± |0.0461|
|
145 |
+
|hendrycksTestRu-conceptual_physics | 1|acc |0.2681|± |0.0290|
|
146 |
+
| | |acc_norm|0.2681|± |0.0290|
|
147 |
+
|hendrycksTestRu-econometrics | 1|acc |0.2368|± |0.0400|
|
148 |
+
| | |acc_norm|0.2368|± |0.0400|
|
149 |
+
|hendrycksTestRu-electrical_engineering | 1|acc |0.2483|± |0.0360|
|
150 |
+
| | |acc_norm|0.2483|± |0.0360|
|
151 |
+
|hendrycksTestRu-elementary_mathematics | 1|acc |0.2063|± |0.0208|
|
152 |
+
| | |acc_norm|0.2063|± |0.0208|
|
153 |
+
|hendrycksTestRu-formal_logic | 1|acc |0.2937|± |0.0407|
|
154 |
+
| | |acc_norm|0.2937|± |0.0407|
|
155 |
+
|hendrycksTestRu-global_facts | 1|acc |0.2000|± |0.0402|
|
156 |
+
| | |acc_norm|0.2000|± |0.0402|
|
157 |
+
|hendrycksTestRu-high_school_biology | 1|acc |0.1871|± |0.0222|
|
158 |
+
| | |acc_norm|0.1871|± |0.0222|
|
159 |
+
|hendrycksTestRu-high_school_chemistry | 1|acc |0.1724|± |0.0266|
|
160 |
+
| | |acc_norm|0.1724|± |0.0266|
|
161 |
+
|hendrycksTestRu-high_school_computer_science | 1|acc |0.2900|± |0.0456|
|
162 |
+
| | |acc_norm|0.2900|± |0.0456|
|
163 |
+
|hendrycksTestRu-high_school_european_history | 1|acc |0.2242|± |0.0326|
|
164 |
+
| | |acc_norm|0.2242|± |0.0326|
|
165 |
+
|hendrycksTestRu-high_school_geography | 1|acc |0.1869|± |0.0278|
|
166 |
+
| | |acc_norm|0.1869|± |0.0278|
|
167 |
+
|hendrycksTestRu-high_school_government_and_politics| 1|acc |0.2124|± |0.0295|
|
168 |
+
| | |acc_norm|0.2124|± |0.0295|
|
169 |
+
|hendrycksTestRu-high_school_macroeconomics | 1|acc |0.2128|± |0.0208|
|
170 |
+
| | |acc_norm|0.2128|± |0.0208|
|
171 |
+
|hendrycksTestRu-high_school_mathematics | 1|acc |0.2074|± |0.0247|
|
172 |
+
| | |acc_norm|0.2074|± |0.0247|
|
173 |
+
|hendrycksTestRu-high_school_microeconomics | 1|acc |0.2227|± |0.0270|
|
174 |
+
| | |acc_norm|0.2227|± |0.0270|
|
175 |
+
|hendrycksTestRu-high_school_physics | 1|acc |0.1987|± |0.0326|
|
176 |
+
| | |acc_norm|0.1987|± |0.0326|
|
177 |
+
|hendrycksTestRu-high_school_psychology | 1|acc |0.2000|± |0.0171|
|
178 |
+
| | |acc_norm|0.2000|± |0.0171|
|
179 |
+
|hendrycksTestRu-high_school_statistics | 1|acc |0.1713|± |0.0257|
|
180 |
+
| | |acc_norm|0.1713|± |0.0257|
|
181 |
+
|hendrycksTestRu-high_school_us_history | 1|acc |0.2647|± |0.0310|
|
182 |
+
| | |acc_norm|0.2647|± |0.0310|
|
183 |
+
|hendrycksTestRu-high_school_world_history | 1|acc |0.2658|± |0.0288|
|
184 |
+
| | |acc_norm|0.2658|± |0.0288|
|
185 |
+
|hendrycksTestRu-human_aging | 1|acc |0.2780|± |0.0301|
|
186 |
+
| | |acc_norm|0.2780|± |0.0301|
|
187 |
+
|hendrycksTestRu-human_sexuality | 1|acc |0.2443|± |0.0377|
|
188 |
+
| | |acc_norm|0.2443|± |0.0377|
|
189 |
+
|hendrycksTestRu-international_law | 1|acc |0.2314|± |0.0385|
|
190 |
+
| | |acc_norm|0.2314|± |0.0385|
|
191 |
+
|hendrycksTestRu-jurisprudence | 1|acc |0.2593|± |0.0424|
|
192 |
+
| | |acc_norm|0.2593|± |0.0424|
|
193 |
+
|hendrycksTestRu-logical_fallacies | 1|acc |0.2270|± |0.0329|
|
194 |
+
| | |acc_norm|0.2270|± |0.0329|
|
195 |
+
|hendrycksTestRu-machine_learning | 1|acc |0.2857|± |0.0429|
|
196 |
+
| | |acc_norm|0.2857|± |0.0429|
|
197 |
+
|hendrycksTestRu-management | 1|acc |0.2136|± |0.0406|
|
198 |
+
| | |acc_norm|0.2136|± |0.0406|
|
199 |
+
|hendrycksTestRu-marketing | 1|acc |0.2991|± |0.0300|
|
200 |
+
| | |acc_norm|0.2991|± |0.0300|
|
201 |
+
|hendrycksTestRu-medical_genetics | 1|acc |0.2700|± |0.0446|
|
202 |
+
| | |acc_norm|0.2700|± |0.0446|
|
203 |
+
|hendrycksTestRu-miscellaneous | 1|acc |0.2490|± |0.0155|
|
204 |
+
| | |acc_norm|0.2490|± |0.0155|
|
205 |
+
|hendrycksTestRu-moral_disputes | 1|acc |0.2601|± |0.0236|
|
206 |
+
| | |acc_norm|0.2601|± |0.0236|
|
207 |
+
|hendrycksTestRu-moral_scenarios | 1|acc |0.2369|± |0.0142|
|
208 |
+
| | |acc_norm|0.2369|± |0.0142|
|
209 |
+
|hendrycksTestRu-nutrition | 1|acc |0.2190|± |0.0237|
|
210 |
+
| | |acc_norm|0.2190|± |0.0237|
|
211 |
+
|hendrycksTestRu-philosophy | 1|acc |0.1897|± |0.0223|
|
212 |
+
| | |acc_norm|0.1897|± |0.0223|
|
213 |
+
|hendrycksTestRu-prehistory | 1|acc |0.2191|± |0.0230|
|
214 |
+
| | |acc_norm|0.2191|± |0.0230|
|
215 |
+
|hendrycksTestRu-professional_accounting | 1|acc |0.2092|± |0.0243|
|
216 |
+
| | |acc_norm|0.2092|± |0.0243|
|
217 |
+
|hendrycksTestRu-professional_law | 1|acc |0.2627|± |0.0112|
|
218 |
+
| | |acc_norm|0.2627|± |0.0112|
|
219 |
+
|hendrycksTestRu-professional_medicine | 1|acc |0.1801|± |0.0233|
|
220 |
+
| | |acc_norm|0.1801|± |0.0233|
|
221 |
+
|hendrycksTestRu-professional_psychology | 1|acc |0.2533|± |0.0176|
|
222 |
+
| | |acc_norm|0.2533|± |0.0176|
|
223 |
+
|hendrycksTestRu-public_relations | 1|acc |0.2273|± |0.0401|
|
224 |
+
| | |acc_norm|0.2273|± |0.0401|
|
225 |
+
|hendrycksTestRu-security_studies | 1|acc |0.1959|± |0.0254|
|
226 |
+
| | |acc_norm|0.1959|± |0.0254|
|
227 |
+
|hendrycksTestRu-sociology | 1|acc |0.2239|± |0.0295|
|
228 |
+
| | |acc_norm|0.2239|± |0.0295|
|
229 |
+
|hendrycksTestRu-us_foreign_policy | 1|acc |0.2300|± |0.0423|
|
230 |
+
| | |acc_norm|0.2300|± |0.0423|
|
231 |
+
|hendrycksTestRu-virology | 1|acc |0.2831|± |0.0351|
|
232 |
+
| | |acc_norm|0.2831|± |0.0351|
|
233 |
+
|hendrycksTestRu-world_religions | 1|acc |0.3158|± |0.0357|
|
234 |
+
| | |acc_norm|0.3158|± |0.0357|
|
235 |
+
|muserc | 1|acc |0.0000|± |0.0000|
|
236 |
+
|parus | 0|acc |0.6500|± |0.0479|
|
237 |
+
|rcb | 1|acc |0.5273|± |0.0337|
|
238 |
+
| | |f1 |0.2302| | |
|
239 |
+
|rucos | 0|f1 |0.5248|± |0.0057|
|
240 |
+
| | |em |0.5108|± |0.0057|
|
241 |
+
|russe | 0|acc |0.3691|± |0.0052|
|
242 |
+
|ruterra | 1|acc |0.4984|± |0.0286|
|
243 |
+
| | |f1 |0.2217| | |
|
244 |
+
|rwsd | 0|acc |0.5539|± |0.0349|
|
245 |
+
|xwinograd_ru | 0|acc |0.5587|± |0.0280|
|
246 |
+
|xnli_ru | 0|acc |0.3940|± |0.0069|
|