Niraya666 commited on
Commit
aee3631
1 Parent(s): b688fb4

End of training

Browse files
Files changed (6) hide show
  1. README.md +203 -203
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. pytorch_model.bin +1 -1
  5. train_results.json +4 -4
  6. trainer_state.json +1035 -1035
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8142857142857143
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.6875
36
- - Accuracy: 0.8143
37
 
38
  ## Model description
39
 
@@ -67,206 +67,206 @@ The following hyperparameters were used during training:
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
- | No log | 1.0 | 2 | 1.0694 | 0.4143 |
71
- | No log | 2.0 | 4 | 1.0689 | 0.4143 |
72
- | No log | 3.0 | 6 | 1.0682 | 0.4143 |
73
- | No log | 4.0 | 8 | 1.0671 | 0.4143 |
74
- | 1.096 | 5.0 | 10 | 1.0657 | 0.4286 |
75
- | 1.096 | 6.0 | 12 | 1.0640 | 0.4286 |
76
- | 1.096 | 7.0 | 14 | 1.0621 | 0.4143 |
77
- | 1.096 | 8.0 | 16 | 1.0598 | 0.4 |
78
- | 1.096 | 9.0 | 18 | 1.0572 | 0.4 |
79
- | 1.0906 | 10.0 | 20 | 1.0545 | 0.4 |
80
- | 1.0906 | 11.0 | 22 | 1.0517 | 0.4143 |
81
- | 1.0906 | 12.0 | 24 | 1.0486 | 0.4143 |
82
- | 1.0906 | 13.0 | 26 | 1.0453 | 0.4143 |
83
- | 1.0906 | 14.0 | 28 | 1.0418 | 0.4143 |
84
- | 1.0647 | 15.0 | 30 | 1.0380 | 0.4143 |
85
- | 1.0647 | 16.0 | 32 | 1.0343 | 0.4143 |
86
- | 1.0647 | 17.0 | 34 | 1.0307 | 0.4143 |
87
- | 1.0647 | 18.0 | 36 | 1.0268 | 0.4286 |
88
- | 1.0647 | 19.0 | 38 | 1.0229 | 0.4286 |
89
- | 1.0451 | 20.0 | 40 | 1.0191 | 0.4429 |
90
- | 1.0451 | 21.0 | 42 | 1.0153 | 0.4571 |
91
- | 1.0451 | 22.0 | 44 | 1.0116 | 0.4714 |
92
- | 1.0451 | 23.0 | 46 | 1.0082 | 0.4714 |
93
- | 1.0451 | 24.0 | 48 | 1.0049 | 0.4714 |
94
- | 1.037 | 25.0 | 50 | 1.0016 | 0.4714 |
95
- | 1.037 | 26.0 | 52 | 0.9979 | 0.4714 |
96
- | 1.037 | 27.0 | 54 | 0.9944 | 0.4714 |
97
- | 1.037 | 28.0 | 56 | 0.9913 | 0.4714 |
98
- | 1.037 | 29.0 | 58 | 0.9883 | 0.4714 |
99
- | 1.0214 | 30.0 | 60 | 0.9847 | 0.4714 |
100
- | 1.0214 | 31.0 | 62 | 0.9809 | 0.4571 |
101
- | 1.0214 | 32.0 | 64 | 0.9768 | 0.4714 |
102
- | 1.0214 | 33.0 | 66 | 0.9723 | 0.4714 |
103
- | 1.0214 | 34.0 | 68 | 0.9671 | 0.4714 |
104
- | 1.0181 | 35.0 | 70 | 0.9616 | 0.4714 |
105
- | 1.0181 | 36.0 | 72 | 0.9561 | 0.4857 |
106
- | 1.0181 | 37.0 | 74 | 0.9505 | 0.5 |
107
- | 1.0181 | 38.0 | 76 | 0.9446 | 0.5286 |
108
- | 1.0181 | 39.0 | 78 | 0.9388 | 0.5286 |
109
- | 0.9646 | 40.0 | 80 | 0.9331 | 0.5286 |
110
- | 0.9646 | 41.0 | 82 | 0.9276 | 0.5143 |
111
- | 0.9646 | 42.0 | 84 | 0.9224 | 0.5286 |
112
- | 0.9646 | 43.0 | 86 | 0.9172 | 0.5286 |
113
- | 0.9646 | 44.0 | 88 | 0.9120 | 0.5286 |
114
- | 0.946 | 45.0 | 90 | 0.9070 | 0.5143 |
115
- | 0.946 | 46.0 | 92 | 0.9021 | 0.5286 |
116
- | 0.946 | 47.0 | 94 | 0.8976 | 0.5429 |
117
- | 0.946 | 48.0 | 96 | 0.8933 | 0.5429 |
118
- | 0.946 | 49.0 | 98 | 0.8891 | 0.5714 |
119
- | 0.9244 | 50.0 | 100 | 0.8846 | 0.5714 |
120
- | 0.9244 | 51.0 | 102 | 0.8803 | 0.5714 |
121
- | 0.9244 | 52.0 | 104 | 0.8759 | 0.5714 |
122
- | 0.9244 | 53.0 | 106 | 0.8716 | 0.5714 |
123
- | 0.9244 | 54.0 | 108 | 0.8674 | 0.5714 |
124
- | 0.9228 | 55.0 | 110 | 0.8634 | 0.5857 |
125
- | 0.9228 | 56.0 | 112 | 0.8598 | 0.6 |
126
- | 0.9228 | 57.0 | 114 | 0.8562 | 0.5857 |
127
- | 0.9228 | 58.0 | 116 | 0.8527 | 0.6 |
128
- | 0.9228 | 59.0 | 118 | 0.8492 | 0.6 |
129
- | 0.8956 | 60.0 | 120 | 0.8456 | 0.6143 |
130
- | 0.8956 | 61.0 | 122 | 0.8421 | 0.6 |
131
- | 0.8956 | 62.0 | 124 | 0.8385 | 0.6 |
132
- | 0.8956 | 63.0 | 126 | 0.8351 | 0.6 |
133
- | 0.8956 | 64.0 | 128 | 0.8318 | 0.6143 |
134
- | 0.8943 | 65.0 | 130 | 0.8286 | 0.6143 |
135
- | 0.8943 | 66.0 | 132 | 0.8255 | 0.6 |
136
- | 0.8943 | 67.0 | 134 | 0.8223 | 0.6286 |
137
- | 0.8943 | 68.0 | 136 | 0.8191 | 0.6429 |
138
- | 0.8943 | 69.0 | 138 | 0.8159 | 0.6286 |
139
- | 0.854 | 70.0 | 140 | 0.8129 | 0.6429 |
140
- | 0.854 | 71.0 | 142 | 0.8100 | 0.6714 |
141
- | 0.854 | 72.0 | 144 | 0.8073 | 0.6714 |
142
- | 0.854 | 73.0 | 146 | 0.8048 | 0.6571 |
143
- | 0.854 | 74.0 | 148 | 0.8025 | 0.6714 |
144
- | 0.8615 | 75.0 | 150 | 0.8001 | 0.6571 |
145
- | 0.8615 | 76.0 | 152 | 0.7976 | 0.6571 |
146
- | 0.8615 | 77.0 | 154 | 0.7952 | 0.6571 |
147
- | 0.8615 | 78.0 | 156 | 0.7928 | 0.6571 |
148
- | 0.8615 | 79.0 | 158 | 0.7904 | 0.6571 |
149
- | 0.8507 | 80.0 | 160 | 0.7882 | 0.6714 |
150
- | 0.8507 | 81.0 | 162 | 0.7858 | 0.6714 |
151
- | 0.8507 | 82.0 | 164 | 0.7835 | 0.6857 |
152
- | 0.8507 | 83.0 | 166 | 0.7811 | 0.6857 |
153
- | 0.8507 | 84.0 | 168 | 0.7788 | 0.6857 |
154
- | 0.838 | 85.0 | 170 | 0.7765 | 0.6857 |
155
- | 0.838 | 86.0 | 172 | 0.7743 | 0.6857 |
156
- | 0.838 | 87.0 | 174 | 0.7723 | 0.6857 |
157
- | 0.838 | 88.0 | 176 | 0.7703 | 0.6857 |
158
- | 0.838 | 89.0 | 178 | 0.7684 | 0.6857 |
159
- | 0.8245 | 90.0 | 180 | 0.7664 | 0.6857 |
160
- | 0.8245 | 91.0 | 182 | 0.7644 | 0.6857 |
161
- | 0.8245 | 92.0 | 184 | 0.7625 | 0.6857 |
162
- | 0.8245 | 93.0 | 186 | 0.7606 | 0.7143 |
163
- | 0.8245 | 94.0 | 188 | 0.7587 | 0.7143 |
164
- | 0.8124 | 95.0 | 190 | 0.7569 | 0.7143 |
165
- | 0.8124 | 96.0 | 192 | 0.7551 | 0.7286 |
166
- | 0.8124 | 97.0 | 194 | 0.7533 | 0.7286 |
167
- | 0.8124 | 98.0 | 196 | 0.7517 | 0.7286 |
168
- | 0.8124 | 99.0 | 198 | 0.7500 | 0.7429 |
169
- | 0.8102 | 100.0 | 200 | 0.7483 | 0.7429 |
170
- | 0.8102 | 101.0 | 202 | 0.7465 | 0.7429 |
171
- | 0.8102 | 102.0 | 204 | 0.7450 | 0.7429 |
172
- | 0.8102 | 103.0 | 206 | 0.7434 | 0.7429 |
173
- | 0.8102 | 104.0 | 208 | 0.7419 | 0.7429 |
174
- | 0.821 | 105.0 | 210 | 0.7404 | 0.7571 |
175
- | 0.821 | 106.0 | 212 | 0.7389 | 0.7571 |
176
- | 0.821 | 107.0 | 214 | 0.7374 | 0.7571 |
177
- | 0.821 | 108.0 | 216 | 0.7359 | 0.7571 |
178
- | 0.821 | 109.0 | 218 | 0.7345 | 0.7571 |
179
- | 0.7918 | 110.0 | 220 | 0.7330 | 0.7571 |
180
- | 0.7918 | 111.0 | 222 | 0.7316 | 0.7571 |
181
- | 0.7918 | 112.0 | 224 | 0.7302 | 0.7571 |
182
- | 0.7918 | 113.0 | 226 | 0.7289 | 0.7571 |
183
- | 0.7918 | 114.0 | 228 | 0.7275 | 0.7571 |
184
- | 0.8063 | 115.0 | 230 | 0.7262 | 0.7714 |
185
- | 0.8063 | 116.0 | 232 | 0.7247 | 0.7714 |
186
- | 0.8063 | 117.0 | 234 | 0.7232 | 0.7571 |
187
- | 0.8063 | 118.0 | 236 | 0.7218 | 0.7571 |
188
- | 0.8063 | 119.0 | 238 | 0.7204 | 0.7571 |
189
- | 0.7897 | 120.0 | 240 | 0.7192 | 0.7571 |
190
- | 0.7897 | 121.0 | 242 | 0.7180 | 0.7571 |
191
- | 0.7897 | 122.0 | 244 | 0.7168 | 0.7571 |
192
- | 0.7897 | 123.0 | 246 | 0.7158 | 0.7571 |
193
- | 0.7897 | 124.0 | 248 | 0.7149 | 0.7714 |
194
- | 0.7845 | 125.0 | 250 | 0.7140 | 0.7571 |
195
- | 0.7845 | 126.0 | 252 | 0.7131 | 0.7571 |
196
- | 0.7845 | 127.0 | 254 | 0.7121 | 0.7571 |
197
- | 0.7845 | 128.0 | 256 | 0.7110 | 0.7571 |
198
- | 0.7845 | 129.0 | 258 | 0.7099 | 0.7571 |
199
- | 0.7781 | 130.0 | 260 | 0.7088 | 0.7571 |
200
- | 0.7781 | 131.0 | 262 | 0.7076 | 0.7571 |
201
- | 0.7781 | 132.0 | 264 | 0.7066 | 0.7571 |
202
- | 0.7781 | 133.0 | 266 | 0.7055 | 0.7571 |
203
- | 0.7781 | 134.0 | 268 | 0.7045 | 0.7714 |
204
- | 0.7708 | 135.0 | 270 | 0.7034 | 0.7714 |
205
- | 0.7708 | 136.0 | 272 | 0.7025 | 0.7571 |
206
- | 0.7708 | 137.0 | 274 | 0.7016 | 0.7571 |
207
- | 0.7708 | 138.0 | 276 | 0.7008 | 0.7571 |
208
- | 0.7708 | 139.0 | 278 | 0.6999 | 0.7571 |
209
- | 0.797 | 140.0 | 280 | 0.6990 | 0.7571 |
210
- | 0.797 | 141.0 | 282 | 0.6981 | 0.7714 |
211
- | 0.797 | 142.0 | 284 | 0.6973 | 0.7714 |
212
- | 0.797 | 143.0 | 286 | 0.6966 | 0.7714 |
213
- | 0.797 | 144.0 | 288 | 0.6959 | 0.7714 |
214
- | 0.7768 | 145.0 | 290 | 0.6952 | 0.7714 |
215
- | 0.7768 | 146.0 | 292 | 0.6944 | 0.7714 |
216
- | 0.7768 | 147.0 | 294 | 0.6936 | 0.7714 |
217
- | 0.7768 | 148.0 | 296 | 0.6928 | 0.7857 |
218
- | 0.7768 | 149.0 | 298 | 0.6920 | 0.7857 |
219
- | 0.7569 | 150.0 | 300 | 0.6912 | 0.7857 |
220
- | 0.7569 | 151.0 | 302 | 0.6904 | 0.8 |
221
- | 0.7569 | 152.0 | 304 | 0.6897 | 0.8 |
222
- | 0.7569 | 153.0 | 306 | 0.6890 | 0.8 |
223
- | 0.7569 | 154.0 | 308 | 0.6882 | 0.8 |
224
- | 0.7807 | 155.0 | 310 | 0.6875 | 0.8143 |
225
- | 0.7807 | 156.0 | 312 | 0.6868 | 0.8143 |
226
- | 0.7807 | 157.0 | 314 | 0.6861 | 0.8143 |
227
- | 0.7807 | 158.0 | 316 | 0.6854 | 0.8143 |
228
- | 0.7807 | 159.0 | 318 | 0.6848 | 0.8143 |
229
- | 0.7472 | 160.0 | 320 | 0.6842 | 0.8143 |
230
- | 0.7472 | 161.0 | 322 | 0.6836 | 0.8143 |
231
- | 0.7472 | 162.0 | 324 | 0.6831 | 0.8143 |
232
- | 0.7472 | 163.0 | 326 | 0.6826 | 0.8143 |
233
- | 0.7472 | 164.0 | 328 | 0.6822 | 0.8143 |
234
- | 0.7665 | 165.0 | 330 | 0.6818 | 0.8 |
235
- | 0.7665 | 166.0 | 332 | 0.6814 | 0.8 |
236
- | 0.7665 | 167.0 | 334 | 0.6810 | 0.8 |
237
- | 0.7665 | 168.0 | 336 | 0.6807 | 0.7857 |
238
- | 0.7665 | 169.0 | 338 | 0.6803 | 0.7857 |
239
- | 0.7684 | 170.0 | 340 | 0.6800 | 0.7857 |
240
- | 0.7684 | 171.0 | 342 | 0.6797 | 0.7857 |
241
- | 0.7684 | 172.0 | 344 | 0.6794 | 0.7857 |
242
- | 0.7684 | 173.0 | 346 | 0.6790 | 0.7857 |
243
- | 0.7684 | 174.0 | 348 | 0.6787 | 0.7857 |
244
- | 0.7459 | 175.0 | 350 | 0.6784 | 0.7857 |
245
- | 0.7459 | 176.0 | 352 | 0.6781 | 0.7857 |
246
- | 0.7459 | 177.0 | 354 | 0.6778 | 0.7857 |
247
- | 0.7459 | 178.0 | 356 | 0.6775 | 0.7857 |
248
- | 0.7459 | 179.0 | 358 | 0.6772 | 0.7857 |
249
- | 0.742 | 180.0 | 360 | 0.6769 | 0.7857 |
250
- | 0.742 | 181.0 | 362 | 0.6766 | 0.7857 |
251
- | 0.742 | 182.0 | 364 | 0.6764 | 0.7857 |
252
- | 0.742 | 183.0 | 366 | 0.6762 | 0.7857 |
253
- | 0.742 | 184.0 | 368 | 0.6760 | 0.7857 |
254
- | 0.7642 | 185.0 | 370 | 0.6758 | 0.7857 |
255
- | 0.7642 | 186.0 | 372 | 0.6756 | 0.7857 |
256
- | 0.7642 | 187.0 | 374 | 0.6754 | 0.7857 |
257
- | 0.7642 | 188.0 | 376 | 0.6752 | 0.7857 |
258
- | 0.7642 | 189.0 | 378 | 0.6750 | 0.7857 |
259
- | 0.7277 | 190.0 | 380 | 0.6749 | 0.7857 |
260
- | 0.7277 | 191.0 | 382 | 0.6748 | 0.7857 |
261
- | 0.7277 | 192.0 | 384 | 0.6746 | 0.7857 |
262
- | 0.7277 | 193.0 | 386 | 0.6745 | 0.7857 |
263
- | 0.7277 | 194.0 | 388 | 0.6745 | 0.7857 |
264
- | 0.764 | 195.0 | 390 | 0.6744 | 0.7857 |
265
- | 0.764 | 196.0 | 392 | 0.6743 | 0.7857 |
266
- | 0.764 | 197.0 | 394 | 0.6742 | 0.7857 |
267
- | 0.764 | 198.0 | 396 | 0.6742 | 0.8 |
268
- | 0.764 | 199.0 | 398 | 0.6742 | 0.8 |
269
- | 0.7444 | 200.0 | 400 | 0.6742 | 0.8 |
270
 
271
 
272
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8285714285714286
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.6771
36
+ - Accuracy: 0.8286
37
 
38
  ## Model description
39
 
 
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | No log | 1.0 | 2 | 0.6875 | 0.8143 |
71
+ | No log | 2.0 | 4 | 0.6874 | 0.8143 |
72
+ | No log | 3.0 | 6 | 0.6873 | 0.8143 |
73
+ | No log | 4.0 | 8 | 0.6871 | 0.8143 |
74
+ | 0.7555 | 5.0 | 10 | 0.6869 | 0.8143 |
75
+ | 0.7555 | 6.0 | 12 | 0.6866 | 0.8143 |
76
+ | 0.7555 | 7.0 | 14 | 0.6862 | 0.8143 |
77
+ | 0.7555 | 8.0 | 16 | 0.6858 | 0.8143 |
78
+ | 0.7555 | 9.0 | 18 | 0.6853 | 0.8143 |
79
+ | 0.7576 | 10.0 | 20 | 0.6848 | 0.8143 |
80
+ | 0.7576 | 11.0 | 22 | 0.6842 | 0.8143 |
81
+ | 0.7576 | 12.0 | 24 | 0.6836 | 0.8143 |
82
+ | 0.7576 | 13.0 | 26 | 0.6830 | 0.8143 |
83
+ | 0.7576 | 14.0 | 28 | 0.6823 | 0.8143 |
84
+ | 0.769 | 15.0 | 30 | 0.6816 | 0.8 |
85
+ | 0.769 | 16.0 | 32 | 0.6808 | 0.8 |
86
+ | 0.769 | 17.0 | 34 | 0.6800 | 0.8143 |
87
+ | 0.769 | 18.0 | 36 | 0.6791 | 0.8143 |
88
+ | 0.769 | 19.0 | 38 | 0.6781 | 0.8143 |
89
+ | 0.7564 | 20.0 | 40 | 0.6771 | 0.8286 |
90
+ | 0.7564 | 21.0 | 42 | 0.6760 | 0.8143 |
91
+ | 0.7564 | 22.0 | 44 | 0.6748 | 0.8143 |
92
+ | 0.7564 | 23.0 | 46 | 0.6737 | 0.8 |
93
+ | 0.7564 | 24.0 | 48 | 0.6725 | 0.8 |
94
+ | 0.7508 | 25.0 | 50 | 0.6713 | 0.8143 |
95
+ | 0.7508 | 26.0 | 52 | 0.6701 | 0.8143 |
96
+ | 0.7508 | 27.0 | 54 | 0.6689 | 0.8143 |
97
+ | 0.7508 | 28.0 | 56 | 0.6674 | 0.8143 |
98
+ | 0.7508 | 29.0 | 58 | 0.6660 | 0.8143 |
99
+ | 0.747 | 30.0 | 60 | 0.6646 | 0.8143 |
100
+ | 0.747 | 31.0 | 62 | 0.6631 | 0.8143 |
101
+ | 0.747 | 32.0 | 64 | 0.6616 | 0.8143 |
102
+ | 0.747 | 33.0 | 66 | 0.6601 | 0.8143 |
103
+ | 0.747 | 34.0 | 68 | 0.6586 | 0.8143 |
104
+ | 0.7343 | 35.0 | 70 | 0.6570 | 0.8143 |
105
+ | 0.7343 | 36.0 | 72 | 0.6553 | 0.8143 |
106
+ | 0.7343 | 37.0 | 74 | 0.6536 | 0.8143 |
107
+ | 0.7343 | 38.0 | 76 | 0.6517 | 0.8143 |
108
+ | 0.7343 | 39.0 | 78 | 0.6499 | 0.8143 |
109
+ | 0.7532 | 40.0 | 80 | 0.6480 | 0.8143 |
110
+ | 0.7532 | 41.0 | 82 | 0.6461 | 0.8143 |
111
+ | 0.7532 | 42.0 | 84 | 0.6442 | 0.8143 |
112
+ | 0.7532 | 43.0 | 86 | 0.6423 | 0.8143 |
113
+ | 0.7532 | 44.0 | 88 | 0.6405 | 0.8143 |
114
+ | 0.7239 | 45.0 | 90 | 0.6387 | 0.8143 |
115
+ | 0.7239 | 46.0 | 92 | 0.6368 | 0.8143 |
116
+ | 0.7239 | 47.0 | 94 | 0.6352 | 0.8143 |
117
+ | 0.7239 | 48.0 | 96 | 0.6337 | 0.8143 |
118
+ | 0.7239 | 49.0 | 98 | 0.6321 | 0.8286 |
119
+ | 0.7085 | 50.0 | 100 | 0.6307 | 0.8286 |
120
+ | 0.7085 | 51.0 | 102 | 0.6294 | 0.8286 |
121
+ | 0.7085 | 52.0 | 104 | 0.6278 | 0.8286 |
122
+ | 0.7085 | 53.0 | 106 | 0.6263 | 0.8286 |
123
+ | 0.7085 | 54.0 | 108 | 0.6248 | 0.8143 |
124
+ | 0.7203 | 55.0 | 110 | 0.6233 | 0.8143 |
125
+ | 0.7203 | 56.0 | 112 | 0.6219 | 0.8143 |
126
+ | 0.7203 | 57.0 | 114 | 0.6205 | 0.8143 |
127
+ | 0.7203 | 58.0 | 116 | 0.6191 | 0.8143 |
128
+ | 0.7203 | 59.0 | 118 | 0.6179 | 0.8143 |
129
+ | 0.7136 | 60.0 | 120 | 0.6167 | 0.8143 |
130
+ | 0.7136 | 61.0 | 122 | 0.6157 | 0.8143 |
131
+ | 0.7136 | 62.0 | 124 | 0.6148 | 0.8 |
132
+ | 0.7136 | 63.0 | 126 | 0.6138 | 0.8 |
133
+ | 0.7136 | 64.0 | 128 | 0.6125 | 0.8 |
134
+ | 0.7123 | 65.0 | 130 | 0.6111 | 0.8 |
135
+ | 0.7123 | 66.0 | 132 | 0.6096 | 0.8143 |
136
+ | 0.7123 | 67.0 | 134 | 0.6083 | 0.8143 |
137
+ | 0.7123 | 68.0 | 136 | 0.6070 | 0.8143 |
138
+ | 0.7123 | 69.0 | 138 | 0.6057 | 0.8143 |
139
+ | 0.7076 | 70.0 | 140 | 0.6046 | 0.8143 |
140
+ | 0.7076 | 71.0 | 142 | 0.6035 | 0.8143 |
141
+ | 0.7076 | 72.0 | 144 | 0.6023 | 0.8143 |
142
+ | 0.7076 | 73.0 | 146 | 0.6011 | 0.8143 |
143
+ | 0.7076 | 74.0 | 148 | 0.5999 | 0.8143 |
144
+ | 0.6878 | 75.0 | 150 | 0.5988 | 0.8143 |
145
+ | 0.6878 | 76.0 | 152 | 0.5975 | 0.8143 |
146
+ | 0.6878 | 77.0 | 154 | 0.5964 | 0.8143 |
147
+ | 0.6878 | 78.0 | 156 | 0.5953 | 0.8143 |
148
+ | 0.6878 | 79.0 | 158 | 0.5942 | 0.8143 |
149
+ | 0.6657 | 80.0 | 160 | 0.5932 | 0.8143 |
150
+ | 0.6657 | 81.0 | 162 | 0.5923 | 0.8143 |
151
+ | 0.6657 | 82.0 | 164 | 0.5914 | 0.8143 |
152
+ | 0.6657 | 83.0 | 166 | 0.5906 | 0.8143 |
153
+ | 0.6657 | 84.0 | 168 | 0.5897 | 0.8143 |
154
+ | 0.6434 | 85.0 | 170 | 0.5888 | 0.8143 |
155
+ | 0.6434 | 86.0 | 172 | 0.5878 | 0.8143 |
156
+ | 0.6434 | 87.0 | 174 | 0.5868 | 0.8143 |
157
+ | 0.6434 | 88.0 | 176 | 0.5859 | 0.8143 |
158
+ | 0.6434 | 89.0 | 178 | 0.5851 | 0.8143 |
159
+ | 0.6825 | 90.0 | 180 | 0.5843 | 0.8143 |
160
+ | 0.6825 | 91.0 | 182 | 0.5836 | 0.8143 |
161
+ | 0.6825 | 92.0 | 184 | 0.5828 | 0.8143 |
162
+ | 0.6825 | 93.0 | 186 | 0.5823 | 0.8143 |
163
+ | 0.6825 | 94.0 | 188 | 0.5817 | 0.8286 |
164
+ | 0.6695 | 95.0 | 190 | 0.5809 | 0.8143 |
165
+ | 0.6695 | 96.0 | 192 | 0.5801 | 0.8143 |
166
+ | 0.6695 | 97.0 | 194 | 0.5793 | 0.8143 |
167
+ | 0.6695 | 98.0 | 196 | 0.5787 | 0.8143 |
168
+ | 0.6695 | 99.0 | 198 | 0.5780 | 0.8143 |
169
+ | 0.6672 | 100.0 | 200 | 0.5772 | 0.8143 |
170
+ | 0.6672 | 101.0 | 202 | 0.5762 | 0.8143 |
171
+ | 0.6672 | 102.0 | 204 | 0.5754 | 0.8143 |
172
+ | 0.6672 | 103.0 | 206 | 0.5746 | 0.8143 |
173
+ | 0.6672 | 104.0 | 208 | 0.5738 | 0.8143 |
174
+ | 0.6569 | 105.0 | 210 | 0.5731 | 0.8143 |
175
+ | 0.6569 | 106.0 | 212 | 0.5724 | 0.8143 |
176
+ | 0.6569 | 107.0 | 214 | 0.5716 | 0.8143 |
177
+ | 0.6569 | 108.0 | 216 | 0.5708 | 0.8143 |
178
+ | 0.6569 | 109.0 | 218 | 0.5701 | 0.8143 |
179
+ | 0.6748 | 110.0 | 220 | 0.5694 | 0.8143 |
180
+ | 0.6748 | 111.0 | 222 | 0.5687 | 0.8143 |
181
+ | 0.6748 | 112.0 | 224 | 0.5680 | 0.8143 |
182
+ | 0.6748 | 113.0 | 226 | 0.5674 | 0.8143 |
183
+ | 0.6748 | 114.0 | 228 | 0.5668 | 0.8143 |
184
+ | 0.6388 | 115.0 | 230 | 0.5662 | 0.8143 |
185
+ | 0.6388 | 116.0 | 232 | 0.5657 | 0.8143 |
186
+ | 0.6388 | 117.0 | 234 | 0.5652 | 0.8143 |
187
+ | 0.6388 | 118.0 | 236 | 0.5648 | 0.8286 |
188
+ | 0.6388 | 119.0 | 238 | 0.5645 | 0.8286 |
189
+ | 0.6551 | 120.0 | 240 | 0.5641 | 0.8286 |
190
+ | 0.6551 | 121.0 | 242 | 0.5636 | 0.8143 |
191
+ | 0.6551 | 122.0 | 244 | 0.5631 | 0.8143 |
192
+ | 0.6551 | 123.0 | 246 | 0.5627 | 0.8143 |
193
+ | 0.6551 | 124.0 | 248 | 0.5624 | 0.8143 |
194
+ | 0.6452 | 125.0 | 250 | 0.5622 | 0.8143 |
195
+ | 0.6452 | 126.0 | 252 | 0.5620 | 0.8143 |
196
+ | 0.6452 | 127.0 | 254 | 0.5618 | 0.8143 |
197
+ | 0.6452 | 128.0 | 256 | 0.5615 | 0.8143 |
198
+ | 0.6452 | 129.0 | 258 | 0.5613 | 0.8143 |
199
+ | 0.645 | 130.0 | 260 | 0.5611 | 0.8143 |
200
+ | 0.645 | 131.0 | 262 | 0.5608 | 0.8143 |
201
+ | 0.645 | 132.0 | 264 | 0.5606 | 0.8143 |
202
+ | 0.645 | 133.0 | 266 | 0.5602 | 0.8143 |
203
+ | 0.645 | 134.0 | 268 | 0.5596 | 0.8143 |
204
+ | 0.629 | 135.0 | 270 | 0.5590 | 0.8143 |
205
+ | 0.629 | 136.0 | 272 | 0.5582 | 0.8143 |
206
+ | 0.629 | 137.0 | 274 | 0.5576 | 0.8143 |
207
+ | 0.629 | 138.0 | 276 | 0.5571 | 0.8143 |
208
+ | 0.629 | 139.0 | 278 | 0.5568 | 0.8143 |
209
+ | 0.7126 | 140.0 | 280 | 0.5565 | 0.8143 |
210
+ | 0.7126 | 141.0 | 282 | 0.5563 | 0.8143 |
211
+ | 0.7126 | 142.0 | 284 | 0.5561 | 0.8143 |
212
+ | 0.7126 | 143.0 | 286 | 0.5559 | 0.8143 |
213
+ | 0.7126 | 144.0 | 288 | 0.5555 | 0.8143 |
214
+ | 0.669 | 145.0 | 290 | 0.5552 | 0.8143 |
215
+ | 0.669 | 146.0 | 292 | 0.5547 | 0.8143 |
216
+ | 0.669 | 147.0 | 294 | 0.5542 | 0.8143 |
217
+ | 0.669 | 148.0 | 296 | 0.5538 | 0.8143 |
218
+ | 0.669 | 149.0 | 298 | 0.5534 | 0.8143 |
219
+ | 0.6481 | 150.0 | 300 | 0.5530 | 0.8143 |
220
+ | 0.6481 | 151.0 | 302 | 0.5526 | 0.8143 |
221
+ | 0.6481 | 152.0 | 304 | 0.5522 | 0.8143 |
222
+ | 0.6481 | 153.0 | 306 | 0.5519 | 0.8143 |
223
+ | 0.6481 | 154.0 | 308 | 0.5515 | 0.8143 |
224
+ | 0.6211 | 155.0 | 310 | 0.5510 | 0.8143 |
225
+ | 0.6211 | 156.0 | 312 | 0.5506 | 0.8143 |
226
+ | 0.6211 | 157.0 | 314 | 0.5502 | 0.8143 |
227
+ | 0.6211 | 158.0 | 316 | 0.5499 | 0.8143 |
228
+ | 0.6211 | 159.0 | 318 | 0.5496 | 0.8143 |
229
+ | 0.6458 | 160.0 | 320 | 0.5492 | 0.8286 |
230
+ | 0.6458 | 161.0 | 322 | 0.5490 | 0.8143 |
231
+ | 0.6458 | 162.0 | 324 | 0.5488 | 0.8143 |
232
+ | 0.6458 | 163.0 | 326 | 0.5486 | 0.8143 |
233
+ | 0.6458 | 164.0 | 328 | 0.5484 | 0.8143 |
234
+ | 0.6317 | 165.0 | 330 | 0.5481 | 0.8143 |
235
+ | 0.6317 | 166.0 | 332 | 0.5479 | 0.8286 |
236
+ | 0.6317 | 167.0 | 334 | 0.5476 | 0.8286 |
237
+ | 0.6317 | 168.0 | 336 | 0.5473 | 0.8286 |
238
+ | 0.6317 | 169.0 | 338 | 0.5471 | 0.8286 |
239
+ | 0.6154 | 170.0 | 340 | 0.5470 | 0.8286 |
240
+ | 0.6154 | 171.0 | 342 | 0.5468 | 0.8286 |
241
+ | 0.6154 | 172.0 | 344 | 0.5466 | 0.8286 |
242
+ | 0.6154 | 173.0 | 346 | 0.5464 | 0.8286 |
243
+ | 0.6154 | 174.0 | 348 | 0.5462 | 0.8286 |
244
+ | 0.6323 | 175.0 | 350 | 0.5460 | 0.8286 |
245
+ | 0.6323 | 176.0 | 352 | 0.5459 | 0.8286 |
246
+ | 0.6323 | 177.0 | 354 | 0.5457 | 0.8286 |
247
+ | 0.6323 | 178.0 | 356 | 0.5456 | 0.8286 |
248
+ | 0.6323 | 179.0 | 358 | 0.5455 | 0.8286 |
249
+ | 0.6331 | 180.0 | 360 | 0.5453 | 0.8286 |
250
+ | 0.6331 | 181.0 | 362 | 0.5452 | 0.8286 |
251
+ | 0.6331 | 182.0 | 364 | 0.5451 | 0.8286 |
252
+ | 0.6331 | 183.0 | 366 | 0.5449 | 0.8286 |
253
+ | 0.6331 | 184.0 | 368 | 0.5448 | 0.8286 |
254
+ | 0.6333 | 185.0 | 370 | 0.5447 | 0.8286 |
255
+ | 0.6333 | 186.0 | 372 | 0.5447 | 0.8286 |
256
+ | 0.6333 | 187.0 | 374 | 0.5446 | 0.8286 |
257
+ | 0.6333 | 188.0 | 376 | 0.5445 | 0.8286 |
258
+ | 0.6333 | 189.0 | 378 | 0.5445 | 0.8286 |
259
+ | 0.608 | 190.0 | 380 | 0.5444 | 0.8286 |
260
+ | 0.608 | 191.0 | 382 | 0.5444 | 0.8286 |
261
+ | 0.608 | 192.0 | 384 | 0.5443 | 0.8286 |
262
+ | 0.608 | 193.0 | 386 | 0.5443 | 0.8286 |
263
+ | 0.608 | 194.0 | 388 | 0.5442 | 0.8286 |
264
+ | 0.6155 | 195.0 | 390 | 0.5442 | 0.8286 |
265
+ | 0.6155 | 196.0 | 392 | 0.5442 | 0.8286 |
266
+ | 0.6155 | 197.0 | 394 | 0.5442 | 0.8286 |
267
+ | 0.6155 | 198.0 | 396 | 0.5441 | 0.8286 |
268
+ | 0.6155 | 199.0 | 398 | 0.5441 | 0.8286 |
269
+ | 0.6272 | 200.0 | 400 | 0.5441 | 0.8286 |
270
 
271
 
272
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 200.0,
3
- "eval_accuracy": 0.8142857142857143,
4
- "eval_loss": 0.6875176429748535,
5
- "eval_runtime": 0.839,
6
- "eval_samples_per_second": 83.432,
7
- "eval_steps_per_second": 2.384,
8
  "total_flos": 2.23710151698432e+18,
9
- "train_loss": 0.8548950719833374,
10
- "train_runtime": 1030.1946,
11
- "train_samples_per_second": 87.362,
12
- "train_steps_per_second": 0.388
13
  }
 
1
  {
2
  "epoch": 200.0,
3
+ "eval_accuracy": 0.8285714285714286,
4
+ "eval_loss": 0.6770716309547424,
5
+ "eval_runtime": 0.6932,
6
+ "eval_samples_per_second": 100.985,
7
+ "eval_steps_per_second": 2.885,
8
  "total_flos": 2.23710151698432e+18,
9
+ "train_loss": 0.6791047298908234,
10
+ "train_runtime": 1022.1437,
11
+ "train_samples_per_second": 88.05,
12
+ "train_steps_per_second": 0.391
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 200.0,
3
- "eval_accuracy": 0.8142857142857143,
4
- "eval_loss": 0.6875176429748535,
5
- "eval_runtime": 0.839,
6
- "eval_samples_per_second": 83.432,
7
- "eval_steps_per_second": 2.384
8
  }
 
1
  {
2
  "epoch": 200.0,
3
+ "eval_accuracy": 0.8285714285714286,
4
+ "eval_loss": 0.6770716309547424,
5
+ "eval_runtime": 0.6932,
6
+ "eval_samples_per_second": 100.985,
7
+ "eval_steps_per_second": 2.885
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b62d9ef2983c93a25662930965516fed575d4b4bf9ef9e40f118fd77873bd11f
3
  size 110397937
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3948bfd3caacca713d9cd1f9c0bcc4bab7d1122e553b8f0d46454329a59a0401
3
  size 110397937
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 200.0,
3
  "total_flos": 2.23710151698432e+18,
4
- "train_loss": 0.8548950719833374,
5
- "train_runtime": 1030.1946,
6
- "train_samples_per_second": 87.362,
7
- "train_steps_per_second": 0.388
8
  }
 
1
  {
2
  "epoch": 200.0,
3
  "total_flos": 2.23710151698432e+18,
4
+ "train_loss": 0.6791047298908234,
5
+ "train_runtime": 1022.1437,
6
+ "train_samples_per_second": 88.05,
7
+ "train_steps_per_second": 0.391
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8142857142857143,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-3cls-0922/checkpoint-310",
4
  "epoch": 200.0,
5
  "eval_steps": 500,
6
  "global_step": 400,
@@ -10,2052 +10,2052 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.4142857142857143,
14
- "eval_loss": 1.0693532228469849,
15
- "eval_runtime": 1.0286,
16
- "eval_samples_per_second": 68.056,
17
- "eval_steps_per_second": 1.944,
18
  "step": 2
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_accuracy": 0.4142857142857143,
23
- "eval_loss": 1.0689432621002197,
24
- "eval_runtime": 1.4012,
25
- "eval_samples_per_second": 49.956,
26
- "eval_steps_per_second": 1.427,
27
  "step": 4
28
  },
29
  {
30
  "epoch": 3.0,
31
- "eval_accuracy": 0.4142857142857143,
32
- "eval_loss": 1.0682058334350586,
33
- "eval_runtime": 0.8857,
34
- "eval_samples_per_second": 79.031,
35
- "eval_steps_per_second": 2.258,
36
  "step": 6
37
  },
38
  {
39
  "epoch": 4.0,
40
- "eval_accuracy": 0.4142857142857143,
41
- "eval_loss": 1.0670955181121826,
42
- "eval_runtime": 1.6997,
43
- "eval_samples_per_second": 41.183,
44
- "eval_steps_per_second": 1.177,
45
  "step": 8
46
  },
47
  {
48
  "epoch": 5.0,
49
  "learning_rate": 1.25e-05,
50
- "loss": 1.096,
51
  "step": 10
52
  },
53
  {
54
  "epoch": 5.0,
55
- "eval_accuracy": 0.42857142857142855,
56
- "eval_loss": 1.065665602684021,
57
- "eval_runtime": 0.6117,
58
- "eval_samples_per_second": 114.441,
59
- "eval_steps_per_second": 3.27,
60
  "step": 10
61
  },
62
  {
63
  "epoch": 6.0,
64
- "eval_accuracy": 0.42857142857142855,
65
- "eval_loss": 1.0639806985855103,
66
- "eval_runtime": 0.6447,
67
- "eval_samples_per_second": 108.583,
68
- "eval_steps_per_second": 3.102,
69
  "step": 12
70
  },
71
  {
72
  "epoch": 7.0,
73
- "eval_accuracy": 0.4142857142857143,
74
- "eval_loss": 1.0620665550231934,
75
- "eval_runtime": 0.806,
76
- "eval_samples_per_second": 86.844,
77
- "eval_steps_per_second": 2.481,
78
  "step": 14
79
  },
80
  {
81
  "epoch": 8.0,
82
- "eval_accuracy": 0.4,
83
- "eval_loss": 1.0598403215408325,
84
- "eval_runtime": 0.6374,
85
- "eval_samples_per_second": 109.817,
86
- "eval_steps_per_second": 3.138,
87
  "step": 16
88
  },
89
  {
90
  "epoch": 9.0,
91
- "eval_accuracy": 0.4,
92
- "eval_loss": 1.0572247505187988,
93
- "eval_runtime": 0.6352,
94
- "eval_samples_per_second": 110.2,
95
- "eval_steps_per_second": 3.149,
96
  "step": 18
97
  },
98
  {
99
  "epoch": 10.0,
100
  "learning_rate": 2.5e-05,
101
- "loss": 1.0906,
102
  "step": 20
103
  },
104
  {
105
  "epoch": 10.0,
106
- "eval_accuracy": 0.4,
107
- "eval_loss": 1.0545086860656738,
108
- "eval_runtime": 0.8412,
109
- "eval_samples_per_second": 83.211,
110
- "eval_steps_per_second": 2.377,
111
  "step": 20
112
  },
113
  {
114
  "epoch": 11.0,
115
- "eval_accuracy": 0.4142857142857143,
116
- "eval_loss": 1.0516999959945679,
117
- "eval_runtime": 0.6382,
118
- "eval_samples_per_second": 109.685,
119
- "eval_steps_per_second": 3.134,
120
  "step": 22
121
  },
122
  {
123
  "epoch": 12.0,
124
- "eval_accuracy": 0.4142857142857143,
125
- "eval_loss": 1.0486340522766113,
126
- "eval_runtime": 0.6383,
127
- "eval_samples_per_second": 109.67,
128
- "eval_steps_per_second": 3.133,
129
  "step": 24
130
  },
131
  {
132
  "epoch": 13.0,
133
- "eval_accuracy": 0.4142857142857143,
134
- "eval_loss": 1.045298457145691,
135
- "eval_runtime": 0.8655,
136
- "eval_samples_per_second": 80.883,
137
- "eval_steps_per_second": 2.311,
138
  "step": 26
139
  },
140
  {
141
  "epoch": 14.0,
142
- "eval_accuracy": 0.4142857142857143,
143
- "eval_loss": 1.0417920351028442,
144
- "eval_runtime": 0.64,
145
- "eval_samples_per_second": 109.372,
146
- "eval_steps_per_second": 3.125,
147
  "step": 28
148
  },
149
  {
150
  "epoch": 15.0,
151
  "learning_rate": 3.7500000000000003e-05,
152
- "loss": 1.0647,
153
  "step": 30
154
  },
155
  {
156
  "epoch": 15.0,
157
- "eval_accuracy": 0.4142857142857143,
158
- "eval_loss": 1.0380207300186157,
159
- "eval_runtime": 0.6461,
160
- "eval_samples_per_second": 108.335,
161
- "eval_steps_per_second": 3.095,
162
  "step": 30
163
  },
164
  {
165
  "epoch": 16.0,
166
- "eval_accuracy": 0.4142857142857143,
167
- "eval_loss": 1.0343334674835205,
168
- "eval_runtime": 0.8283,
169
- "eval_samples_per_second": 84.515,
170
- "eval_steps_per_second": 2.415,
171
  "step": 32
172
  },
173
  {
174
  "epoch": 17.0,
175
- "eval_accuracy": 0.4142857142857143,
176
- "eval_loss": 1.030653953552246,
177
- "eval_runtime": 0.6353,
178
- "eval_samples_per_second": 110.181,
179
- "eval_steps_per_second": 3.148,
180
  "step": 34
181
  },
182
  {
183
  "epoch": 18.0,
184
- "eval_accuracy": 0.42857142857142855,
185
- "eval_loss": 1.0267900228500366,
186
- "eval_runtime": 0.6316,
187
- "eval_samples_per_second": 110.829,
188
- "eval_steps_per_second": 3.167,
189
  "step": 36
190
  },
191
  {
192
  "epoch": 19.0,
193
- "eval_accuracy": 0.42857142857142855,
194
- "eval_loss": 1.0229403972625732,
195
- "eval_runtime": 1.4107,
196
- "eval_samples_per_second": 49.619,
197
- "eval_steps_per_second": 1.418,
198
  "step": 38
199
  },
200
  {
201
  "epoch": 20.0,
202
  "learning_rate": 5e-05,
203
- "loss": 1.0451,
204
  "step": 40
205
  },
206
  {
207
  "epoch": 20.0,
208
- "eval_accuracy": 0.44285714285714284,
209
- "eval_loss": 1.0190969705581665,
210
- "eval_runtime": 0.6384,
211
- "eval_samples_per_second": 109.648,
212
- "eval_steps_per_second": 3.133,
213
  "step": 40
214
  },
215
  {
216
  "epoch": 21.0,
217
- "eval_accuracy": 0.45714285714285713,
218
- "eval_loss": 1.0152583122253418,
219
- "eval_runtime": 0.6302,
220
- "eval_samples_per_second": 111.074,
221
- "eval_steps_per_second": 3.174,
222
  "step": 42
223
  },
224
  {
225
  "epoch": 22.0,
226
- "eval_accuracy": 0.4714285714285714,
227
- "eval_loss": 1.011588454246521,
228
- "eval_runtime": 0.8016,
229
- "eval_samples_per_second": 87.325,
230
- "eval_steps_per_second": 2.495,
231
  "step": 44
232
  },
233
  {
234
  "epoch": 23.0,
235
- "eval_accuracy": 0.4714285714285714,
236
- "eval_loss": 1.0081889629364014,
237
- "eval_runtime": 0.6258,
238
- "eval_samples_per_second": 111.852,
239
- "eval_steps_per_second": 3.196,
240
  "step": 46
241
  },
242
  {
243
  "epoch": 24.0,
244
- "eval_accuracy": 0.4714285714285714,
245
- "eval_loss": 1.0049412250518799,
246
- "eval_runtime": 0.6247,
247
- "eval_samples_per_second": 112.06,
248
- "eval_steps_per_second": 3.202,
249
  "step": 48
250
  },
251
  {
252
  "epoch": 25.0,
253
  "learning_rate": 6.25e-05,
254
- "loss": 1.037,
255
  "step": 50
256
  },
257
  {
258
  "epoch": 25.0,
259
- "eval_accuracy": 0.4714285714285714,
260
- "eval_loss": 1.0015885829925537,
261
- "eval_runtime": 0.8268,
262
- "eval_samples_per_second": 84.66,
263
- "eval_steps_per_second": 2.419,
264
  "step": 50
265
  },
266
  {
267
  "epoch": 26.0,
268
- "eval_accuracy": 0.4714285714285714,
269
- "eval_loss": 0.9978756308555603,
270
- "eval_runtime": 0.635,
271
- "eval_samples_per_second": 110.241,
272
- "eval_steps_per_second": 3.15,
273
  "step": 52
274
  },
275
  {
276
  "epoch": 27.0,
277
- "eval_accuracy": 0.4714285714285714,
278
- "eval_loss": 0.9943951368331909,
279
- "eval_runtime": 0.6443,
280
- "eval_samples_per_second": 108.649,
281
- "eval_steps_per_second": 3.104,
282
  "step": 54
283
  },
284
  {
285
  "epoch": 28.0,
286
- "eval_accuracy": 0.4714285714285714,
287
- "eval_loss": 0.9912726283073425,
288
- "eval_runtime": 0.8063,
289
- "eval_samples_per_second": 86.821,
290
- "eval_steps_per_second": 2.481,
291
  "step": 56
292
  },
293
  {
294
  "epoch": 29.0,
295
- "eval_accuracy": 0.4714285714285714,
296
- "eval_loss": 0.9882513880729675,
297
- "eval_runtime": 0.635,
298
- "eval_samples_per_second": 110.236,
299
- "eval_steps_per_second": 3.15,
300
  "step": 58
301
  },
302
  {
303
  "epoch": 30.0,
304
  "learning_rate": 7.500000000000001e-05,
305
- "loss": 1.0214,
306
  "step": 60
307
  },
308
  {
309
  "epoch": 30.0,
310
- "eval_accuracy": 0.4714285714285714,
311
- "eval_loss": 0.9846696257591248,
312
- "eval_runtime": 0.6341,
313
- "eval_samples_per_second": 110.395,
314
- "eval_steps_per_second": 3.154,
315
  "step": 60
316
  },
317
  {
318
  "epoch": 31.0,
319
- "eval_accuracy": 0.45714285714285713,
320
- "eval_loss": 0.9809077382087708,
321
- "eval_runtime": 0.8112,
322
- "eval_samples_per_second": 86.297,
323
- "eval_steps_per_second": 2.466,
324
  "step": 62
325
  },
326
  {
327
  "epoch": 32.0,
328
- "eval_accuracy": 0.4714285714285714,
329
- "eval_loss": 0.9768250584602356,
330
- "eval_runtime": 0.6357,
331
- "eval_samples_per_second": 110.106,
332
- "eval_steps_per_second": 3.146,
333
  "step": 64
334
  },
335
  {
336
  "epoch": 33.0,
337
- "eval_accuracy": 0.4714285714285714,
338
- "eval_loss": 0.9722528457641602,
339
- "eval_runtime": 0.6387,
340
- "eval_samples_per_second": 109.603,
341
- "eval_steps_per_second": 3.132,
342
  "step": 66
343
  },
344
  {
345
  "epoch": 34.0,
346
- "eval_accuracy": 0.4714285714285714,
347
- "eval_loss": 0.9670786261558533,
348
- "eval_runtime": 0.83,
349
- "eval_samples_per_second": 84.335,
350
- "eval_steps_per_second": 2.41,
351
  "step": 68
352
  },
353
  {
354
  "epoch": 35.0,
355
  "learning_rate": 8.75e-05,
356
- "loss": 1.0181,
357
  "step": 70
358
  },
359
  {
360
  "epoch": 35.0,
361
- "eval_accuracy": 0.4714285714285714,
362
- "eval_loss": 0.9616244435310364,
363
- "eval_runtime": 0.6343,
364
- "eval_samples_per_second": 110.364,
365
- "eval_steps_per_second": 3.153,
366
  "step": 70
367
  },
368
  {
369
  "epoch": 36.0,
370
- "eval_accuracy": 0.4857142857142857,
371
- "eval_loss": 0.9561253190040588,
372
- "eval_runtime": 0.6551,
373
- "eval_samples_per_second": 106.853,
374
- "eval_steps_per_second": 3.053,
375
  "step": 72
376
  },
377
  {
378
  "epoch": 37.0,
379
- "eval_accuracy": 0.5,
380
- "eval_loss": 0.950469434261322,
381
- "eval_runtime": 0.8176,
382
- "eval_samples_per_second": 85.619,
383
- "eval_steps_per_second": 2.446,
384
  "step": 74
385
  },
386
  {
387
  "epoch": 38.0,
388
- "eval_accuracy": 0.5285714285714286,
389
- "eval_loss": 0.9445714950561523,
390
- "eval_runtime": 0.6352,
391
- "eval_samples_per_second": 110.194,
392
- "eval_steps_per_second": 3.148,
393
  "step": 76
394
  },
395
  {
396
  "epoch": 39.0,
397
- "eval_accuracy": 0.5285714285714286,
398
- "eval_loss": 0.9387871026992798,
399
- "eval_runtime": 0.629,
400
- "eval_samples_per_second": 111.283,
401
- "eval_steps_per_second": 3.18,
402
  "step": 78
403
  },
404
  {
405
  "epoch": 40.0,
406
  "learning_rate": 0.0001,
407
- "loss": 0.9646,
408
  "step": 80
409
  },
410
  {
411
  "epoch": 40.0,
412
- "eval_accuracy": 0.5285714285714286,
413
- "eval_loss": 0.9331315159797668,
414
- "eval_runtime": 0.8121,
415
- "eval_samples_per_second": 86.194,
416
- "eval_steps_per_second": 2.463,
417
  "step": 80
418
  },
419
  {
420
  "epoch": 41.0,
421
- "eval_accuracy": 0.5142857142857142,
422
- "eval_loss": 0.9276390075683594,
423
- "eval_runtime": 0.6356,
424
- "eval_samples_per_second": 110.129,
425
- "eval_steps_per_second": 3.147,
426
  "step": 82
427
  },
428
  {
429
  "epoch": 42.0,
430
- "eval_accuracy": 0.5285714285714286,
431
- "eval_loss": 0.9224144220352173,
432
- "eval_runtime": 0.6341,
433
- "eval_samples_per_second": 110.396,
434
- "eval_steps_per_second": 3.154,
435
  "step": 84
436
  },
437
  {
438
  "epoch": 43.0,
439
- "eval_accuracy": 0.5285714285714286,
440
- "eval_loss": 0.917235791683197,
441
- "eval_runtime": 0.8135,
442
- "eval_samples_per_second": 86.048,
443
- "eval_steps_per_second": 2.459,
444
  "step": 86
445
  },
446
  {
447
  "epoch": 44.0,
448
- "eval_accuracy": 0.5285714285714286,
449
- "eval_loss": 0.9120003581047058,
450
- "eval_runtime": 0.6333,
451
- "eval_samples_per_second": 110.541,
452
- "eval_steps_per_second": 3.158,
453
  "step": 88
454
  },
455
  {
456
  "epoch": 45.0,
457
  "learning_rate": 9.687500000000001e-05,
458
- "loss": 0.946,
459
  "step": 90
460
  },
461
  {
462
  "epoch": 45.0,
463
- "eval_accuracy": 0.5142857142857142,
464
- "eval_loss": 0.9070144891738892,
465
- "eval_runtime": 0.6382,
466
- "eval_samples_per_second": 109.685,
467
- "eval_steps_per_second": 3.134,
468
  "step": 90
469
  },
470
  {
471
  "epoch": 46.0,
472
- "eval_accuracy": 0.5285714285714286,
473
- "eval_loss": 0.9021272659301758,
474
- "eval_runtime": 0.8166,
475
- "eval_samples_per_second": 85.719,
476
- "eval_steps_per_second": 2.449,
477
  "step": 92
478
  },
479
  {
480
  "epoch": 47.0,
481
- "eval_accuracy": 0.5428571428571428,
482
- "eval_loss": 0.897597074508667,
483
- "eval_runtime": 0.634,
484
- "eval_samples_per_second": 110.411,
485
- "eval_steps_per_second": 3.155,
486
  "step": 94
487
  },
488
  {
489
  "epoch": 48.0,
490
- "eval_accuracy": 0.5428571428571428,
491
- "eval_loss": 0.8932848572731018,
492
- "eval_runtime": 0.6408,
493
- "eval_samples_per_second": 109.231,
494
- "eval_steps_per_second": 3.121,
495
  "step": 96
496
  },
497
  {
498
  "epoch": 49.0,
499
- "eval_accuracy": 0.5714285714285714,
500
- "eval_loss": 0.8890884518623352,
501
- "eval_runtime": 0.8162,
502
- "eval_samples_per_second": 85.76,
503
- "eval_steps_per_second": 2.45,
504
  "step": 98
505
  },
506
  {
507
  "epoch": 50.0,
508
  "learning_rate": 9.375e-05,
509
- "loss": 0.9244,
510
  "step": 100
511
  },
512
  {
513
  "epoch": 50.0,
514
- "eval_accuracy": 0.5714285714285714,
515
- "eval_loss": 0.8845995664596558,
516
- "eval_runtime": 0.6368,
517
- "eval_samples_per_second": 109.929,
518
- "eval_steps_per_second": 3.141,
519
  "step": 100
520
  },
521
  {
522
  "epoch": 51.0,
523
- "eval_accuracy": 0.5714285714285714,
524
- "eval_loss": 0.8802858591079712,
525
- "eval_runtime": 0.6401,
526
- "eval_samples_per_second": 109.353,
527
- "eval_steps_per_second": 3.124,
528
  "step": 102
529
  },
530
  {
531
  "epoch": 52.0,
532
- "eval_accuracy": 0.5714285714285714,
533
- "eval_loss": 0.8758621215820312,
534
- "eval_runtime": 0.8118,
535
- "eval_samples_per_second": 86.227,
536
- "eval_steps_per_second": 2.464,
537
  "step": 104
538
  },
539
  {
540
  "epoch": 53.0,
541
- "eval_accuracy": 0.5714285714285714,
542
- "eval_loss": 0.8715727925300598,
543
- "eval_runtime": 0.6434,
544
- "eval_samples_per_second": 108.8,
545
- "eval_steps_per_second": 3.109,
546
  "step": 106
547
  },
548
  {
549
  "epoch": 54.0,
550
- "eval_accuracy": 0.5714285714285714,
551
- "eval_loss": 0.8674018383026123,
552
- "eval_runtime": 0.63,
553
- "eval_samples_per_second": 111.108,
554
- "eval_steps_per_second": 3.175,
555
  "step": 108
556
  },
557
  {
558
  "epoch": 55.0,
559
  "learning_rate": 9.062500000000001e-05,
560
- "loss": 0.9228,
561
  "step": 110
562
  },
563
  {
564
  "epoch": 55.0,
565
- "eval_accuracy": 0.5857142857142857,
566
- "eval_loss": 0.86343914270401,
567
- "eval_runtime": 0.7324,
568
- "eval_samples_per_second": 95.574,
569
- "eval_steps_per_second": 2.731,
570
  "step": 110
571
  },
572
  {
573
  "epoch": 56.0,
574
- "eval_accuracy": 0.6,
575
- "eval_loss": 0.8597754240036011,
576
- "eval_runtime": 0.6359,
577
- "eval_samples_per_second": 110.082,
578
- "eval_steps_per_second": 3.145,
579
  "step": 112
580
  },
581
  {
582
  "epoch": 57.0,
583
- "eval_accuracy": 0.5857142857142857,
584
- "eval_loss": 0.8562148213386536,
585
- "eval_runtime": 0.6308,
586
- "eval_samples_per_second": 110.969,
587
- "eval_steps_per_second": 3.171,
588
  "step": 114
589
  },
590
  {
591
  "epoch": 58.0,
592
- "eval_accuracy": 0.6,
593
- "eval_loss": 0.852704644203186,
594
- "eval_runtime": 0.6483,
595
- "eval_samples_per_second": 107.968,
596
- "eval_steps_per_second": 3.085,
597
  "step": 116
598
  },
599
  {
600
  "epoch": 59.0,
601
- "eval_accuracy": 0.6,
602
- "eval_loss": 0.8491949439048767,
603
- "eval_runtime": 0.7037,
604
- "eval_samples_per_second": 99.48,
605
- "eval_steps_per_second": 2.842,
606
  "step": 118
607
  },
608
  {
609
  "epoch": 60.0,
610
  "learning_rate": 8.75e-05,
611
- "loss": 0.8956,
612
  "step": 120
613
  },
614
  {
615
  "epoch": 60.0,
616
- "eval_accuracy": 0.6142857142857143,
617
- "eval_loss": 0.8456201553344727,
618
- "eval_runtime": 0.6463,
619
- "eval_samples_per_second": 108.309,
620
- "eval_steps_per_second": 3.095,
621
  "step": 120
622
  },
623
  {
624
  "epoch": 61.0,
625
- "eval_accuracy": 0.6,
626
- "eval_loss": 0.8420506715774536,
627
- "eval_runtime": 0.63,
628
- "eval_samples_per_second": 111.119,
629
- "eval_steps_per_second": 3.175,
630
  "step": 122
631
  },
632
  {
633
  "epoch": 62.0,
634
- "eval_accuracy": 0.6,
635
- "eval_loss": 0.8385196924209595,
636
- "eval_runtime": 0.7958,
637
- "eval_samples_per_second": 87.963,
638
- "eval_steps_per_second": 2.513,
639
  "step": 124
640
  },
641
  {
642
  "epoch": 63.0,
643
- "eval_accuracy": 0.6,
644
- "eval_loss": 0.8351073861122131,
645
- "eval_runtime": 0.6308,
646
- "eval_samples_per_second": 110.971,
647
- "eval_steps_per_second": 3.171,
648
  "step": 126
649
  },
650
  {
651
  "epoch": 64.0,
652
- "eval_accuracy": 0.6142857142857143,
653
- "eval_loss": 0.8317676186561584,
654
- "eval_runtime": 0.6457,
655
- "eval_samples_per_second": 108.412,
656
- "eval_steps_per_second": 3.097,
657
  "step": 128
658
  },
659
  {
660
  "epoch": 65.0,
661
  "learning_rate": 8.4375e-05,
662
- "loss": 0.8943,
663
  "step": 130
664
  },
665
  {
666
  "epoch": 65.0,
667
- "eval_accuracy": 0.6142857142857143,
668
- "eval_loss": 0.8285678029060364,
669
- "eval_runtime": 0.8132,
670
- "eval_samples_per_second": 86.08,
671
- "eval_steps_per_second": 2.459,
672
  "step": 130
673
  },
674
  {
675
  "epoch": 66.0,
676
- "eval_accuracy": 0.6,
677
- "eval_loss": 0.825462281703949,
678
- "eval_runtime": 0.6417,
679
- "eval_samples_per_second": 109.078,
680
- "eval_steps_per_second": 3.117,
681
  "step": 132
682
  },
683
  {
684
  "epoch": 67.0,
685
- "eval_accuracy": 0.6285714285714286,
686
- "eval_loss": 0.8222988247871399,
687
- "eval_runtime": 0.6393,
688
- "eval_samples_per_second": 109.486,
689
- "eval_steps_per_second": 3.128,
690
  "step": 134
691
  },
692
  {
693
  "epoch": 68.0,
694
- "eval_accuracy": 0.6428571428571429,
695
- "eval_loss": 0.8190925121307373,
696
- "eval_runtime": 0.8213,
697
- "eval_samples_per_second": 85.231,
698
- "eval_steps_per_second": 2.435,
699
  "step": 136
700
  },
701
  {
702
  "epoch": 69.0,
703
- "eval_accuracy": 0.6285714285714286,
704
- "eval_loss": 0.8158699870109558,
705
- "eval_runtime": 0.6409,
706
- "eval_samples_per_second": 109.217,
707
- "eval_steps_per_second": 3.12,
708
  "step": 138
709
  },
710
  {
711
  "epoch": 70.0,
712
  "learning_rate": 8.125000000000001e-05,
713
- "loss": 0.854,
714
  "step": 140
715
  },
716
  {
717
  "epoch": 70.0,
718
- "eval_accuracy": 0.6428571428571429,
719
- "eval_loss": 0.8128588199615479,
720
- "eval_runtime": 0.6403,
721
- "eval_samples_per_second": 109.332,
722
- "eval_steps_per_second": 3.124,
723
  "step": 140
724
  },
725
  {
726
  "epoch": 71.0,
727
- "eval_accuracy": 0.6714285714285714,
728
- "eval_loss": 0.8099709749221802,
729
- "eval_runtime": 0.8076,
730
- "eval_samples_per_second": 86.675,
731
- "eval_steps_per_second": 2.476,
732
  "step": 142
733
  },
734
  {
735
  "epoch": 72.0,
736
- "eval_accuracy": 0.6714285714285714,
737
- "eval_loss": 0.8072643280029297,
738
- "eval_runtime": 0.6395,
739
- "eval_samples_per_second": 109.469,
740
- "eval_steps_per_second": 3.128,
741
  "step": 144
742
  },
743
  {
744
  "epoch": 73.0,
745
- "eval_accuracy": 0.6571428571428571,
746
- "eval_loss": 0.804807722568512,
747
- "eval_runtime": 0.6346,
748
- "eval_samples_per_second": 110.311,
749
- "eval_steps_per_second": 3.152,
750
  "step": 146
751
  },
752
  {
753
  "epoch": 74.0,
754
- "eval_accuracy": 0.6714285714285714,
755
- "eval_loss": 0.8025286793708801,
756
- "eval_runtime": 0.7964,
757
- "eval_samples_per_second": 87.9,
758
- "eval_steps_per_second": 2.511,
759
  "step": 148
760
  },
761
  {
762
  "epoch": 75.0,
763
  "learning_rate": 7.8125e-05,
764
- "loss": 0.8615,
765
  "step": 150
766
  },
767
  {
768
  "epoch": 75.0,
769
- "eval_accuracy": 0.6571428571428571,
770
- "eval_loss": 0.8000553250312805,
771
- "eval_runtime": 0.6302,
772
- "eval_samples_per_second": 111.077,
773
- "eval_steps_per_second": 3.174,
774
  "step": 150
775
  },
776
  {
777
  "epoch": 76.0,
778
- "eval_accuracy": 0.6571428571428571,
779
- "eval_loss": 0.797595739364624,
780
- "eval_runtime": 0.6315,
781
- "eval_samples_per_second": 110.841,
782
- "eval_steps_per_second": 3.167,
783
  "step": 152
784
  },
785
  {
786
  "epoch": 77.0,
787
- "eval_accuracy": 0.6571428571428571,
788
- "eval_loss": 0.795224130153656,
789
- "eval_runtime": 0.7988,
790
- "eval_samples_per_second": 87.627,
791
- "eval_steps_per_second": 2.504,
792
  "step": 154
793
  },
794
  {
795
  "epoch": 78.0,
796
- "eval_accuracy": 0.6571428571428571,
797
- "eval_loss": 0.7928095459938049,
798
- "eval_runtime": 0.6405,
799
- "eval_samples_per_second": 109.291,
800
- "eval_steps_per_second": 3.123,
801
  "step": 156
802
  },
803
  {
804
  "epoch": 79.0,
805
- "eval_accuracy": 0.6571428571428571,
806
- "eval_loss": 0.7904472351074219,
807
- "eval_runtime": 0.6375,
808
- "eval_samples_per_second": 109.802,
809
- "eval_steps_per_second": 3.137,
810
  "step": 158
811
  },
812
  {
813
  "epoch": 80.0,
814
  "learning_rate": 7.500000000000001e-05,
815
- "loss": 0.8507,
816
  "step": 160
817
  },
818
  {
819
  "epoch": 80.0,
820
- "eval_accuracy": 0.6714285714285714,
821
- "eval_loss": 0.7881516218185425,
822
- "eval_runtime": 0.8024,
823
- "eval_samples_per_second": 87.237,
824
- "eval_steps_per_second": 2.492,
825
  "step": 160
826
  },
827
  {
828
  "epoch": 81.0,
829
- "eval_accuracy": 0.6714285714285714,
830
- "eval_loss": 0.7857871055603027,
831
- "eval_runtime": 0.6344,
832
- "eval_samples_per_second": 110.338,
833
- "eval_steps_per_second": 3.153,
834
  "step": 162
835
  },
836
  {
837
  "epoch": 82.0,
838
- "eval_accuracy": 0.6857142857142857,
839
- "eval_loss": 0.7834708094596863,
840
- "eval_runtime": 0.6376,
841
- "eval_samples_per_second": 109.781,
842
- "eval_steps_per_second": 3.137,
843
  "step": 164
844
  },
845
  {
846
  "epoch": 83.0,
847
- "eval_accuracy": 0.6857142857142857,
848
- "eval_loss": 0.7811411023139954,
849
- "eval_runtime": 0.8171,
850
- "eval_samples_per_second": 85.665,
851
- "eval_steps_per_second": 2.448,
852
  "step": 166
853
  },
854
  {
855
  "epoch": 84.0,
856
- "eval_accuracy": 0.6857142857142857,
857
- "eval_loss": 0.7788016200065613,
858
- "eval_runtime": 0.6448,
859
- "eval_samples_per_second": 108.552,
860
- "eval_steps_per_second": 3.101,
861
  "step": 168
862
  },
863
  {
864
  "epoch": 85.0,
865
  "learning_rate": 7.1875e-05,
866
- "loss": 0.838,
867
  "step": 170
868
  },
869
  {
870
  "epoch": 85.0,
871
- "eval_accuracy": 0.6857142857142857,
872
- "eval_loss": 0.7765094041824341,
873
- "eval_runtime": 0.6436,
874
- "eval_samples_per_second": 108.77,
875
- "eval_steps_per_second": 3.108,
876
  "step": 170
877
  },
878
  {
879
  "epoch": 86.0,
880
- "eval_accuracy": 0.6857142857142857,
881
- "eval_loss": 0.7743098139762878,
882
- "eval_runtime": 0.8392,
883
- "eval_samples_per_second": 83.412,
884
- "eval_steps_per_second": 2.383,
885
  "step": 172
886
  },
887
  {
888
  "epoch": 87.0,
889
- "eval_accuracy": 0.6857142857142857,
890
- "eval_loss": 0.7722915410995483,
891
- "eval_runtime": 0.6421,
892
- "eval_samples_per_second": 109.013,
893
- "eval_steps_per_second": 3.115,
894
  "step": 174
895
  },
896
  {
897
  "epoch": 88.0,
898
- "eval_accuracy": 0.6857142857142857,
899
- "eval_loss": 0.7703205943107605,
900
- "eval_runtime": 0.6583,
901
- "eval_samples_per_second": 106.337,
902
- "eval_steps_per_second": 3.038,
903
  "step": 176
904
  },
905
  {
906
  "epoch": 89.0,
907
- "eval_accuracy": 0.6857142857142857,
908
- "eval_loss": 0.768402636051178,
909
- "eval_runtime": 0.8092,
910
- "eval_samples_per_second": 86.507,
911
- "eval_steps_per_second": 2.472,
912
  "step": 178
913
  },
914
  {
915
  "epoch": 90.0,
916
  "learning_rate": 6.875e-05,
917
- "loss": 0.8245,
918
  "step": 180
919
  },
920
  {
921
  "epoch": 90.0,
922
- "eval_accuracy": 0.6857142857142857,
923
- "eval_loss": 0.7664420008659363,
924
- "eval_runtime": 0.6321,
925
- "eval_samples_per_second": 110.749,
926
- "eval_steps_per_second": 3.164,
927
  "step": 180
928
  },
929
  {
930
  "epoch": 91.0,
931
- "eval_accuracy": 0.6857142857142857,
932
- "eval_loss": 0.7643933296203613,
933
- "eval_runtime": 0.6416,
934
- "eval_samples_per_second": 109.102,
935
- "eval_steps_per_second": 3.117,
936
  "step": 182
937
  },
938
  {
939
  "epoch": 92.0,
940
- "eval_accuracy": 0.6857142857142857,
941
- "eval_loss": 0.7624573111534119,
942
- "eval_runtime": 0.8206,
943
- "eval_samples_per_second": 85.305,
944
- "eval_steps_per_second": 2.437,
945
  "step": 184
946
  },
947
  {
948
  "epoch": 93.0,
949
- "eval_accuracy": 0.7142857142857143,
950
- "eval_loss": 0.7605774998664856,
951
- "eval_runtime": 0.6303,
952
- "eval_samples_per_second": 111.066,
953
- "eval_steps_per_second": 3.173,
954
  "step": 186
955
  },
956
  {
957
  "epoch": 94.0,
958
- "eval_accuracy": 0.7142857142857143,
959
- "eval_loss": 0.7587181329727173,
960
- "eval_runtime": 0.6487,
961
- "eval_samples_per_second": 107.907,
962
- "eval_steps_per_second": 3.083,
963
  "step": 188
964
  },
965
  {
966
  "epoch": 95.0,
967
  "learning_rate": 6.562500000000001e-05,
968
- "loss": 0.8124,
969
  "step": 190
970
  },
971
  {
972
  "epoch": 95.0,
973
- "eval_accuracy": 0.7142857142857143,
974
- "eval_loss": 0.75688236951828,
975
- "eval_runtime": 0.83,
976
- "eval_samples_per_second": 84.337,
977
- "eval_steps_per_second": 2.41,
978
  "step": 190
979
  },
980
  {
981
  "epoch": 96.0,
982
- "eval_accuracy": 0.7285714285714285,
983
- "eval_loss": 0.755053699016571,
984
- "eval_runtime": 0.6434,
985
- "eval_samples_per_second": 108.8,
986
- "eval_steps_per_second": 3.109,
987
  "step": 192
988
  },
989
  {
990
  "epoch": 97.0,
991
- "eval_accuracy": 0.7285714285714285,
992
- "eval_loss": 0.7533228993415833,
993
- "eval_runtime": 0.6338,
994
- "eval_samples_per_second": 110.439,
995
- "eval_steps_per_second": 3.155,
996
  "step": 194
997
  },
998
  {
999
  "epoch": 98.0,
1000
- "eval_accuracy": 0.7285714285714285,
1001
- "eval_loss": 0.7516511082649231,
1002
- "eval_runtime": 0.8125,
1003
- "eval_samples_per_second": 86.157,
1004
- "eval_steps_per_second": 2.462,
1005
  "step": 196
1006
  },
1007
  {
1008
  "epoch": 99.0,
1009
- "eval_accuracy": 0.7428571428571429,
1010
- "eval_loss": 0.7499614953994751,
1011
- "eval_runtime": 0.6565,
1012
- "eval_samples_per_second": 106.632,
1013
- "eval_steps_per_second": 3.047,
1014
  "step": 198
1015
  },
1016
  {
1017
  "epoch": 100.0,
1018
  "learning_rate": 6.25e-05,
1019
- "loss": 0.8102,
1020
  "step": 200
1021
  },
1022
  {
1023
  "epoch": 100.0,
1024
- "eval_accuracy": 0.7428571428571429,
1025
- "eval_loss": 0.7482544183731079,
1026
- "eval_runtime": 0.6344,
1027
- "eval_samples_per_second": 110.346,
1028
- "eval_steps_per_second": 3.153,
1029
  "step": 200
1030
  },
1031
  {
1032
  "epoch": 101.0,
1033
- "eval_accuracy": 0.7428571428571429,
1034
- "eval_loss": 0.7465088963508606,
1035
- "eval_runtime": 0.8175,
1036
- "eval_samples_per_second": 85.623,
1037
- "eval_steps_per_second": 2.446,
1038
  "step": 202
1039
  },
1040
  {
1041
  "epoch": 102.0,
1042
- "eval_accuracy": 0.7428571428571429,
1043
- "eval_loss": 0.7449584007263184,
1044
- "eval_runtime": 0.6372,
1045
- "eval_samples_per_second": 109.854,
1046
- "eval_steps_per_second": 3.139,
1047
  "step": 204
1048
  },
1049
  {
1050
  "epoch": 103.0,
1051
- "eval_accuracy": 0.7428571428571429,
1052
- "eval_loss": 0.7434430718421936,
1053
- "eval_runtime": 0.6379,
1054
- "eval_samples_per_second": 109.741,
1055
- "eval_steps_per_second": 3.135,
1056
  "step": 206
1057
  },
1058
  {
1059
  "epoch": 104.0,
1060
- "eval_accuracy": 0.7428571428571429,
1061
- "eval_loss": 0.7419188618659973,
1062
- "eval_runtime": 0.8331,
1063
- "eval_samples_per_second": 84.027,
1064
- "eval_steps_per_second": 2.401,
1065
  "step": 208
1066
  },
1067
  {
1068
  "epoch": 105.0,
1069
  "learning_rate": 5.9375e-05,
1070
- "loss": 0.821,
1071
  "step": 210
1072
  },
1073
  {
1074
  "epoch": 105.0,
1075
- "eval_accuracy": 0.7571428571428571,
1076
- "eval_loss": 0.7403832674026489,
1077
- "eval_runtime": 0.6317,
1078
- "eval_samples_per_second": 110.817,
1079
- "eval_steps_per_second": 3.166,
1080
  "step": 210
1081
  },
1082
  {
1083
  "epoch": 106.0,
1084
- "eval_accuracy": 0.7571428571428571,
1085
- "eval_loss": 0.7388736605644226,
1086
- "eval_runtime": 0.638,
1087
- "eval_samples_per_second": 109.722,
1088
- "eval_steps_per_second": 3.135,
1089
  "step": 212
1090
  },
1091
  {
1092
  "epoch": 107.0,
1093
- "eval_accuracy": 0.7571428571428571,
1094
- "eval_loss": 0.7374056577682495,
1095
- "eval_runtime": 0.8271,
1096
- "eval_samples_per_second": 84.63,
1097
- "eval_steps_per_second": 2.418,
1098
  "step": 214
1099
  },
1100
  {
1101
  "epoch": 108.0,
1102
- "eval_accuracy": 0.7571428571428571,
1103
- "eval_loss": 0.7359411120414734,
1104
- "eval_runtime": 0.6382,
1105
- "eval_samples_per_second": 109.678,
1106
- "eval_steps_per_second": 3.134,
1107
  "step": 216
1108
  },
1109
  {
1110
  "epoch": 109.0,
1111
- "eval_accuracy": 0.7571428571428571,
1112
- "eval_loss": 0.734478235244751,
1113
- "eval_runtime": 0.6424,
1114
- "eval_samples_per_second": 108.966,
1115
- "eval_steps_per_second": 3.113,
1116
  "step": 218
1117
  },
1118
  {
1119
  "epoch": 110.0,
1120
  "learning_rate": 5.6250000000000005e-05,
1121
- "loss": 0.7918,
1122
  "step": 220
1123
  },
1124
  {
1125
  "epoch": 110.0,
1126
- "eval_accuracy": 0.7571428571428571,
1127
- "eval_loss": 0.7330225110054016,
1128
- "eval_runtime": 0.8294,
1129
- "eval_samples_per_second": 84.398,
1130
- "eval_steps_per_second": 2.411,
1131
  "step": 220
1132
  },
1133
  {
1134
  "epoch": 111.0,
1135
- "eval_accuracy": 0.7571428571428571,
1136
- "eval_loss": 0.7315928339958191,
1137
- "eval_runtime": 0.6456,
1138
- "eval_samples_per_second": 108.432,
1139
- "eval_steps_per_second": 3.098,
1140
  "step": 222
1141
  },
1142
  {
1143
  "epoch": 112.0,
1144
- "eval_accuracy": 0.7571428571428571,
1145
- "eval_loss": 0.7302229404449463,
1146
- "eval_runtime": 0.6368,
1147
- "eval_samples_per_second": 109.931,
1148
- "eval_steps_per_second": 3.141,
1149
  "step": 224
1150
  },
1151
  {
1152
  "epoch": 113.0,
1153
- "eval_accuracy": 0.7571428571428571,
1154
- "eval_loss": 0.7288532257080078,
1155
- "eval_runtime": 0.8298,
1156
- "eval_samples_per_second": 84.362,
1157
- "eval_steps_per_second": 2.41,
1158
  "step": 226
1159
  },
1160
  {
1161
  "epoch": 114.0,
1162
- "eval_accuracy": 0.7571428571428571,
1163
- "eval_loss": 0.7275059223175049,
1164
- "eval_runtime": 0.6447,
1165
- "eval_samples_per_second": 108.584,
1166
- "eval_steps_per_second": 3.102,
1167
  "step": 228
1168
  },
1169
  {
1170
  "epoch": 115.0,
1171
  "learning_rate": 5.3125000000000004e-05,
1172
- "loss": 0.8063,
1173
  "step": 230
1174
  },
1175
  {
1176
  "epoch": 115.0,
1177
- "eval_accuracy": 0.7714285714285715,
1178
- "eval_loss": 0.7261765599250793,
1179
- "eval_runtime": 0.6376,
1180
- "eval_samples_per_second": 109.779,
1181
- "eval_steps_per_second": 3.137,
1182
  "step": 230
1183
  },
1184
  {
1185
  "epoch": 116.0,
1186
- "eval_accuracy": 0.7714285714285715,
1187
- "eval_loss": 0.7246890068054199,
1188
- "eval_runtime": 0.8093,
1189
- "eval_samples_per_second": 86.498,
1190
- "eval_steps_per_second": 2.471,
1191
  "step": 232
1192
  },
1193
  {
1194
  "epoch": 117.0,
1195
- "eval_accuracy": 0.7571428571428571,
1196
- "eval_loss": 0.7232338190078735,
1197
- "eval_runtime": 0.6473,
1198
- "eval_samples_per_second": 108.146,
1199
- "eval_steps_per_second": 3.09,
1200
  "step": 234
1201
  },
1202
  {
1203
  "epoch": 118.0,
1204
- "eval_accuracy": 0.7571428571428571,
1205
- "eval_loss": 0.7218143939971924,
1206
- "eval_runtime": 0.639,
1207
- "eval_samples_per_second": 109.553,
1208
- "eval_steps_per_second": 3.13,
1209
  "step": 236
1210
  },
1211
  {
1212
  "epoch": 119.0,
1213
- "eval_accuracy": 0.7571428571428571,
1214
- "eval_loss": 0.7204232811927795,
1215
- "eval_runtime": 0.8245,
1216
- "eval_samples_per_second": 84.902,
1217
- "eval_steps_per_second": 2.426,
1218
  "step": 238
1219
  },
1220
  {
1221
  "epoch": 120.0,
1222
  "learning_rate": 5e-05,
1223
- "loss": 0.7897,
1224
  "step": 240
1225
  },
1226
  {
1227
  "epoch": 120.0,
1228
- "eval_accuracy": 0.7571428571428571,
1229
- "eval_loss": 0.7191569209098816,
1230
- "eval_runtime": 0.639,
1231
- "eval_samples_per_second": 109.548,
1232
- "eval_steps_per_second": 3.13,
1233
  "step": 240
1234
  },
1235
  {
1236
  "epoch": 121.0,
1237
- "eval_accuracy": 0.7571428571428571,
1238
- "eval_loss": 0.7179904580116272,
1239
- "eval_runtime": 0.6313,
1240
- "eval_samples_per_second": 110.887,
1241
- "eval_steps_per_second": 3.168,
1242
  "step": 242
1243
  },
1244
  {
1245
  "epoch": 122.0,
1246
- "eval_accuracy": 0.7571428571428571,
1247
- "eval_loss": 0.7168429493904114,
1248
- "eval_runtime": 0.8342,
1249
- "eval_samples_per_second": 83.908,
1250
- "eval_steps_per_second": 2.397,
1251
  "step": 244
1252
  },
1253
  {
1254
  "epoch": 123.0,
1255
- "eval_accuracy": 0.7571428571428571,
1256
- "eval_loss": 0.7157979011535645,
1257
- "eval_runtime": 0.6333,
1258
- "eval_samples_per_second": 110.541,
1259
- "eval_steps_per_second": 3.158,
1260
  "step": 246
1261
  },
1262
  {
1263
  "epoch": 124.0,
1264
- "eval_accuracy": 0.7714285714285715,
1265
- "eval_loss": 0.714880645275116,
1266
- "eval_runtime": 0.6368,
1267
- "eval_samples_per_second": 109.924,
1268
- "eval_steps_per_second": 3.141,
1269
  "step": 248
1270
  },
1271
  {
1272
  "epoch": 125.0,
1273
  "learning_rate": 4.6875e-05,
1274
- "loss": 0.7845,
1275
  "step": 250
1276
  },
1277
  {
1278
  "epoch": 125.0,
1279
- "eval_accuracy": 0.7571428571428571,
1280
- "eval_loss": 0.7140344381332397,
1281
- "eval_runtime": 0.8287,
1282
- "eval_samples_per_second": 84.466,
1283
- "eval_steps_per_second": 2.413,
1284
  "step": 250
1285
  },
1286
  {
1287
  "epoch": 126.0,
1288
- "eval_accuracy": 0.7571428571428571,
1289
- "eval_loss": 0.7130730152130127,
1290
- "eval_runtime": 0.6617,
1291
- "eval_samples_per_second": 105.79,
1292
- "eval_steps_per_second": 3.023,
1293
  "step": 252
1294
  },
1295
  {
1296
  "epoch": 127.0,
1297
- "eval_accuracy": 0.7571428571428571,
1298
- "eval_loss": 0.7120916247367859,
1299
- "eval_runtime": 0.6392,
1300
- "eval_samples_per_second": 109.509,
1301
- "eval_steps_per_second": 3.129,
1302
  "step": 254
1303
  },
1304
  {
1305
  "epoch": 128.0,
1306
- "eval_accuracy": 0.7571428571428571,
1307
- "eval_loss": 0.7110173106193542,
1308
- "eval_runtime": 0.8272,
1309
- "eval_samples_per_second": 84.623,
1310
- "eval_steps_per_second": 2.418,
1311
  "step": 256
1312
  },
1313
  {
1314
  "epoch": 129.0,
1315
- "eval_accuracy": 0.7571428571428571,
1316
- "eval_loss": 0.7099365592002869,
1317
- "eval_runtime": 0.8513,
1318
- "eval_samples_per_second": 82.229,
1319
- "eval_steps_per_second": 2.349,
1320
  "step": 258
1321
  },
1322
  {
1323
  "epoch": 130.0,
1324
  "learning_rate": 4.375e-05,
1325
- "loss": 0.7781,
1326
  "step": 260
1327
  },
1328
  {
1329
  "epoch": 130.0,
1330
- "eval_accuracy": 0.7571428571428571,
1331
- "eval_loss": 0.7087655663490295,
1332
- "eval_runtime": 0.6529,
1333
- "eval_samples_per_second": 107.222,
1334
- "eval_steps_per_second": 3.063,
1335
  "step": 260
1336
  },
1337
  {
1338
  "epoch": 131.0,
1339
- "eval_accuracy": 0.7571428571428571,
1340
- "eval_loss": 0.7076297402381897,
1341
- "eval_runtime": 0.633,
1342
- "eval_samples_per_second": 110.582,
1343
- "eval_steps_per_second": 3.159,
1344
  "step": 262
1345
  },
1346
  {
1347
  "epoch": 132.0,
1348
- "eval_accuracy": 0.7571428571428571,
1349
- "eval_loss": 0.706558883190155,
1350
- "eval_runtime": 0.8161,
1351
- "eval_samples_per_second": 85.772,
1352
- "eval_steps_per_second": 2.451,
1353
  "step": 264
1354
  },
1355
  {
1356
  "epoch": 133.0,
1357
- "eval_accuracy": 0.7571428571428571,
1358
- "eval_loss": 0.7055317759513855,
1359
- "eval_runtime": 0.6432,
1360
- "eval_samples_per_second": 108.834,
1361
- "eval_steps_per_second": 3.11,
1362
  "step": 266
1363
  },
1364
  {
1365
  "epoch": 134.0,
1366
- "eval_accuracy": 0.7714285714285715,
1367
- "eval_loss": 0.7044604420661926,
1368
- "eval_runtime": 0.6426,
1369
- "eval_samples_per_second": 108.936,
1370
- "eval_steps_per_second": 3.112,
1371
  "step": 268
1372
  },
1373
  {
1374
  "epoch": 135.0,
1375
  "learning_rate": 4.0625000000000005e-05,
1376
- "loss": 0.7708,
1377
  "step": 270
1378
  },
1379
  {
1380
  "epoch": 135.0,
1381
- "eval_accuracy": 0.7714285714285715,
1382
- "eval_loss": 0.7034193873405457,
1383
- "eval_runtime": 0.8178,
1384
- "eval_samples_per_second": 85.591,
1385
- "eval_steps_per_second": 2.445,
1386
  "step": 270
1387
  },
1388
  {
1389
  "epoch": 136.0,
1390
- "eval_accuracy": 0.7571428571428571,
1391
- "eval_loss": 0.7024958729743958,
1392
- "eval_runtime": 0.6353,
1393
- "eval_samples_per_second": 110.186,
1394
- "eval_steps_per_second": 3.148,
1395
  "step": 272
1396
  },
1397
  {
1398
  "epoch": 137.0,
1399
- "eval_accuracy": 0.7571428571428571,
1400
- "eval_loss": 0.7016207575798035,
1401
- "eval_runtime": 0.632,
1402
- "eval_samples_per_second": 110.758,
1403
- "eval_steps_per_second": 3.165,
1404
  "step": 274
1405
  },
1406
  {
1407
  "epoch": 138.0,
1408
- "eval_accuracy": 0.7571428571428571,
1409
- "eval_loss": 0.7007526755332947,
1410
- "eval_runtime": 0.8281,
1411
- "eval_samples_per_second": 84.534,
1412
- "eval_steps_per_second": 2.415,
1413
  "step": 276
1414
  },
1415
  {
1416
  "epoch": 139.0,
1417
- "eval_accuracy": 0.7571428571428571,
1418
- "eval_loss": 0.6998603343963623,
1419
- "eval_runtime": 0.6379,
1420
- "eval_samples_per_second": 109.736,
1421
- "eval_steps_per_second": 3.135,
1422
  "step": 278
1423
  },
1424
  {
1425
  "epoch": 140.0,
1426
  "learning_rate": 3.7500000000000003e-05,
1427
- "loss": 0.797,
1428
  "step": 280
1429
  },
1430
  {
1431
  "epoch": 140.0,
1432
- "eval_accuracy": 0.7571428571428571,
1433
- "eval_loss": 0.6989655494689941,
1434
- "eval_runtime": 0.6404,
1435
- "eval_samples_per_second": 109.303,
1436
- "eval_steps_per_second": 3.123,
1437
  "step": 280
1438
  },
1439
  {
1440
  "epoch": 141.0,
1441
- "eval_accuracy": 0.7714285714285715,
1442
- "eval_loss": 0.6981316804885864,
1443
- "eval_runtime": 0.8143,
1444
- "eval_samples_per_second": 85.963,
1445
- "eval_steps_per_second": 2.456,
1446
  "step": 282
1447
  },
1448
  {
1449
  "epoch": 142.0,
1450
- "eval_accuracy": 0.7714285714285715,
1451
- "eval_loss": 0.6973427534103394,
1452
  "eval_runtime": 0.6382,
1453
- "eval_samples_per_second": 109.681,
1454
  "eval_steps_per_second": 3.134,
1455
  "step": 284
1456
  },
1457
  {
1458
  "epoch": 143.0,
1459
- "eval_accuracy": 0.7714285714285715,
1460
- "eval_loss": 0.6966080069541931,
1461
- "eval_runtime": 0.6394,
1462
- "eval_samples_per_second": 109.471,
1463
- "eval_steps_per_second": 3.128,
1464
  "step": 286
1465
  },
1466
  {
1467
  "epoch": 144.0,
1468
- "eval_accuracy": 0.7714285714285715,
1469
- "eval_loss": 0.6958935856819153,
1470
- "eval_runtime": 0.8183,
1471
- "eval_samples_per_second": 85.546,
1472
- "eval_steps_per_second": 2.444,
1473
  "step": 288
1474
  },
1475
  {
1476
  "epoch": 145.0,
1477
  "learning_rate": 3.4375e-05,
1478
- "loss": 0.7768,
1479
  "step": 290
1480
  },
1481
  {
1482
  "epoch": 145.0,
1483
- "eval_accuracy": 0.7714285714285715,
1484
- "eval_loss": 0.695166289806366,
1485
- "eval_runtime": 0.6458,
1486
- "eval_samples_per_second": 108.386,
1487
- "eval_steps_per_second": 3.097,
1488
  "step": 290
1489
  },
1490
  {
1491
  "epoch": 146.0,
1492
- "eval_accuracy": 0.7714285714285715,
1493
- "eval_loss": 0.694381833076477,
1494
- "eval_runtime": 0.6351,
1495
- "eval_samples_per_second": 110.21,
1496
- "eval_steps_per_second": 3.149,
1497
  "step": 292
1498
  },
1499
  {
1500
  "epoch": 147.0,
1501
- "eval_accuracy": 0.7714285714285715,
1502
- "eval_loss": 0.6935797333717346,
1503
- "eval_runtime": 0.8207,
1504
- "eval_samples_per_second": 85.29,
1505
- "eval_steps_per_second": 2.437,
1506
  "step": 294
1507
  },
1508
  {
1509
  "epoch": 148.0,
1510
- "eval_accuracy": 0.7857142857142857,
1511
- "eval_loss": 0.6927558779716492,
1512
- "eval_runtime": 0.6395,
1513
- "eval_samples_per_second": 109.463,
1514
- "eval_steps_per_second": 3.128,
1515
  "step": 296
1516
  },
1517
  {
1518
  "epoch": 149.0,
1519
- "eval_accuracy": 0.7857142857142857,
1520
- "eval_loss": 0.6919543147087097,
1521
- "eval_runtime": 0.6322,
1522
- "eval_samples_per_second": 110.727,
1523
- "eval_steps_per_second": 3.164,
1524
  "step": 298
1525
  },
1526
  {
1527
  "epoch": 150.0,
1528
  "learning_rate": 3.125e-05,
1529
- "loss": 0.7569,
1530
  "step": 300
1531
  },
1532
  {
1533
  "epoch": 150.0,
1534
- "eval_accuracy": 0.7857142857142857,
1535
- "eval_loss": 0.6911686658859253,
1536
- "eval_runtime": 0.8112,
1537
- "eval_samples_per_second": 86.294,
1538
- "eval_steps_per_second": 2.466,
1539
  "step": 300
1540
  },
1541
  {
1542
  "epoch": 151.0,
1543
- "eval_accuracy": 0.8,
1544
- "eval_loss": 0.6904271841049194,
1545
- "eval_runtime": 0.6485,
1546
- "eval_samples_per_second": 107.942,
1547
- "eval_steps_per_second": 3.084,
1548
  "step": 302
1549
  },
1550
  {
1551
  "epoch": 152.0,
1552
- "eval_accuracy": 0.8,
1553
- "eval_loss": 0.6896898150444031,
1554
- "eval_runtime": 0.6429,
1555
- "eval_samples_per_second": 108.878,
1556
- "eval_steps_per_second": 3.111,
1557
  "step": 304
1558
  },
1559
  {
1560
  "epoch": 153.0,
1561
- "eval_accuracy": 0.8,
1562
- "eval_loss": 0.688970148563385,
1563
- "eval_runtime": 0.8248,
1564
- "eval_samples_per_second": 84.872,
1565
- "eval_steps_per_second": 2.425,
1566
  "step": 306
1567
  },
1568
  {
1569
  "epoch": 154.0,
1570
- "eval_accuracy": 0.8,
1571
- "eval_loss": 0.6882473826408386,
1572
- "eval_runtime": 0.6473,
1573
- "eval_samples_per_second": 108.141,
1574
- "eval_steps_per_second": 3.09,
1575
  "step": 308
1576
  },
1577
  {
1578
  "epoch": 155.0,
1579
  "learning_rate": 2.8125000000000003e-05,
1580
- "loss": 0.7807,
1581
  "step": 310
1582
  },
1583
  {
1584
  "epoch": 155.0,
1585
  "eval_accuracy": 0.8142857142857143,
1586
- "eval_loss": 0.6875176429748535,
1587
- "eval_runtime": 0.6211,
1588
- "eval_samples_per_second": 112.707,
1589
- "eval_steps_per_second": 3.22,
1590
  "step": 310
1591
  },
1592
  {
1593
  "epoch": 156.0,
1594
  "eval_accuracy": 0.8142857142857143,
1595
- "eval_loss": 0.6867862939834595,
1596
- "eval_runtime": 0.8104,
1597
- "eval_samples_per_second": 86.38,
1598
- "eval_steps_per_second": 2.468,
1599
  "step": 312
1600
  },
1601
  {
1602
  "epoch": 157.0,
1603
  "eval_accuracy": 0.8142857142857143,
1604
- "eval_loss": 0.6860566139221191,
1605
- "eval_runtime": 0.6376,
1606
- "eval_samples_per_second": 109.79,
1607
- "eval_steps_per_second": 3.137,
1608
  "step": 314
1609
  },
1610
  {
1611
  "epoch": 158.0,
1612
  "eval_accuracy": 0.8142857142857143,
1613
- "eval_loss": 0.6853832006454468,
1614
- "eval_runtime": 0.6414,
1615
- "eval_samples_per_second": 109.144,
1616
- "eval_steps_per_second": 3.118,
1617
  "step": 316
1618
  },
1619
  {
1620
  "epoch": 159.0,
1621
  "eval_accuracy": 0.8142857142857143,
1622
- "eval_loss": 0.684758186340332,
1623
- "eval_runtime": 0.8168,
1624
- "eval_samples_per_second": 85.701,
1625
- "eval_steps_per_second": 2.449,
1626
  "step": 318
1627
  },
1628
  {
1629
  "epoch": 160.0,
1630
  "learning_rate": 2.5e-05,
1631
- "loss": 0.7472,
1632
  "step": 320
1633
  },
1634
  {
1635
  "epoch": 160.0,
1636
- "eval_accuracy": 0.8142857142857143,
1637
- "eval_loss": 0.6841580867767334,
1638
- "eval_runtime": 0.6398,
1639
- "eval_samples_per_second": 109.413,
1640
- "eval_steps_per_second": 3.126,
1641
  "step": 320
1642
  },
1643
  {
1644
  "epoch": 161.0,
1645
  "eval_accuracy": 0.8142857142857143,
1646
- "eval_loss": 0.6836223006248474,
1647
- "eval_runtime": 0.6529,
1648
- "eval_samples_per_second": 107.211,
1649
- "eval_steps_per_second": 3.063,
1650
  "step": 322
1651
  },
1652
  {
1653
  "epoch": 162.0,
1654
  "eval_accuracy": 0.8142857142857143,
1655
- "eval_loss": 0.683104932308197,
1656
- "eval_runtime": 0.8258,
1657
- "eval_samples_per_second": 84.764,
1658
- "eval_steps_per_second": 2.422,
1659
  "step": 324
1660
  },
1661
  {
1662
  "epoch": 163.0,
1663
  "eval_accuracy": 0.8142857142857143,
1664
- "eval_loss": 0.6826251745223999,
1665
- "eval_runtime": 0.6361,
1666
- "eval_samples_per_second": 110.054,
1667
- "eval_steps_per_second": 3.144,
1668
  "step": 326
1669
  },
1670
  {
1671
  "epoch": 164.0,
1672
  "eval_accuracy": 0.8142857142857143,
1673
- "eval_loss": 0.6821797490119934,
1674
- "eval_runtime": 0.6381,
1675
- "eval_samples_per_second": 109.703,
1676
- "eval_steps_per_second": 3.134,
1677
  "step": 328
1678
  },
1679
  {
1680
  "epoch": 165.0,
1681
  "learning_rate": 2.1875e-05,
1682
- "loss": 0.7665,
1683
  "step": 330
1684
  },
1685
  {
1686
  "epoch": 165.0,
1687
- "eval_accuracy": 0.8,
1688
- "eval_loss": 0.6817546486854553,
1689
- "eval_runtime": 0.8246,
1690
- "eval_samples_per_second": 84.889,
1691
- "eval_steps_per_second": 2.425,
1692
  "step": 330
1693
  },
1694
  {
1695
  "epoch": 166.0,
1696
- "eval_accuracy": 0.8,
1697
- "eval_loss": 0.6813837885856628,
1698
- "eval_runtime": 0.6625,
1699
- "eval_samples_per_second": 105.654,
1700
- "eval_steps_per_second": 3.019,
1701
  "step": 332
1702
  },
1703
  {
1704
  "epoch": 167.0,
1705
- "eval_accuracy": 0.8,
1706
- "eval_loss": 0.681039035320282,
1707
- "eval_runtime": 0.6385,
1708
- "eval_samples_per_second": 109.638,
1709
- "eval_steps_per_second": 3.133,
1710
  "step": 334
1711
  },
1712
  {
1713
  "epoch": 168.0,
1714
- "eval_accuracy": 0.7857142857142857,
1715
- "eval_loss": 0.6806796193122864,
1716
- "eval_runtime": 0.8294,
1717
- "eval_samples_per_second": 84.403,
1718
- "eval_steps_per_second": 2.412,
1719
  "step": 336
1720
  },
1721
  {
1722
  "epoch": 169.0,
1723
- "eval_accuracy": 0.7857142857142857,
1724
- "eval_loss": 0.680313766002655,
1725
- "eval_runtime": 0.6312,
1726
- "eval_samples_per_second": 110.896,
1727
- "eval_steps_per_second": 3.168,
1728
  "step": 338
1729
  },
1730
  {
1731
  "epoch": 170.0,
1732
  "learning_rate": 1.8750000000000002e-05,
1733
- "loss": 0.7684,
1734
  "step": 340
1735
  },
1736
  {
1737
  "epoch": 170.0,
1738
- "eval_accuracy": 0.7857142857142857,
1739
- "eval_loss": 0.6799898743629456,
1740
- "eval_runtime": 0.6322,
1741
- "eval_samples_per_second": 110.721,
1742
- "eval_steps_per_second": 3.163,
1743
  "step": 340
1744
  },
1745
  {
1746
  "epoch": 171.0,
1747
- "eval_accuracy": 0.7857142857142857,
1748
- "eval_loss": 0.6796825528144836,
1749
- "eval_runtime": 0.8381,
1750
- "eval_samples_per_second": 83.525,
1751
- "eval_steps_per_second": 2.386,
1752
  "step": 342
1753
  },
1754
  {
1755
  "epoch": 172.0,
1756
- "eval_accuracy": 0.7857142857142857,
1757
- "eval_loss": 0.6793543696403503,
1758
- "eval_runtime": 0.6493,
1759
- "eval_samples_per_second": 107.81,
1760
- "eval_steps_per_second": 3.08,
1761
  "step": 344
1762
  },
1763
  {
1764
  "epoch": 173.0,
1765
- "eval_accuracy": 0.7857142857142857,
1766
- "eval_loss": 0.6790363192558289,
1767
- "eval_runtime": 0.6451,
1768
- "eval_samples_per_second": 108.504,
1769
- "eval_steps_per_second": 3.1,
1770
  "step": 346
1771
  },
1772
  {
1773
  "epoch": 174.0,
1774
- "eval_accuracy": 0.7857142857142857,
1775
- "eval_loss": 0.6787369847297668,
1776
- "eval_runtime": 0.8298,
1777
- "eval_samples_per_second": 84.361,
1778
- "eval_steps_per_second": 2.41,
1779
  "step": 348
1780
  },
1781
  {
1782
  "epoch": 175.0,
1783
  "learning_rate": 1.5625e-05,
1784
- "loss": 0.7459,
1785
  "step": 350
1786
  },
1787
  {
1788
  "epoch": 175.0,
1789
- "eval_accuracy": 0.7857142857142857,
1790
- "eval_loss": 0.6784414052963257,
1791
- "eval_runtime": 0.6607,
1792
- "eval_samples_per_second": 105.953,
1793
- "eval_steps_per_second": 3.027,
1794
  "step": 350
1795
  },
1796
  {
1797
  "epoch": 176.0,
1798
- "eval_accuracy": 0.7857142857142857,
1799
- "eval_loss": 0.6781107783317566,
1800
- "eval_runtime": 0.681,
1801
- "eval_samples_per_second": 102.789,
1802
- "eval_steps_per_second": 2.937,
1803
  "step": 352
1804
  },
1805
  {
1806
  "epoch": 177.0,
1807
- "eval_accuracy": 0.7857142857142857,
1808
- "eval_loss": 0.677795946598053,
1809
- "eval_runtime": 0.8379,
1810
- "eval_samples_per_second": 83.54,
1811
- "eval_steps_per_second": 2.387,
1812
  "step": 354
1813
  },
1814
  {
1815
  "epoch": 178.0,
1816
- "eval_accuracy": 0.7857142857142857,
1817
- "eval_loss": 0.6774783730506897,
1818
- "eval_runtime": 0.6408,
1819
- "eval_samples_per_second": 109.247,
1820
- "eval_steps_per_second": 3.121,
1821
  "step": 356
1822
  },
1823
  {
1824
  "epoch": 179.0,
1825
- "eval_accuracy": 0.7857142857142857,
1826
- "eval_loss": 0.6771765351295471,
1827
- "eval_runtime": 0.6403,
1828
- "eval_samples_per_second": 109.315,
1829
- "eval_steps_per_second": 3.123,
1830
  "step": 358
1831
  },
1832
  {
1833
  "epoch": 180.0,
1834
  "learning_rate": 1.25e-05,
1835
- "loss": 0.742,
1836
  "step": 360
1837
  },
1838
  {
1839
  "epoch": 180.0,
1840
- "eval_accuracy": 0.7857142857142857,
1841
- "eval_loss": 0.6768958568572998,
1842
- "eval_runtime": 0.8321,
1843
- "eval_samples_per_second": 84.126,
1844
- "eval_steps_per_second": 2.404,
1845
  "step": 360
1846
  },
1847
  {
1848
  "epoch": 181.0,
1849
- "eval_accuracy": 0.7857142857142857,
1850
- "eval_loss": 0.6766448616981506,
1851
- "eval_runtime": 0.6364,
1852
- "eval_samples_per_second": 110.0,
1853
- "eval_steps_per_second": 3.143,
1854
  "step": 362
1855
  },
1856
  {
1857
  "epoch": 182.0,
1858
- "eval_accuracy": 0.7857142857142857,
1859
- "eval_loss": 0.6764284372329712,
1860
- "eval_runtime": 0.647,
1861
- "eval_samples_per_second": 108.196,
1862
- "eval_steps_per_second": 3.091,
1863
  "step": 364
1864
  },
1865
  {
1866
  "epoch": 183.0,
1867
- "eval_accuracy": 0.7857142857142857,
1868
- "eval_loss": 0.6762242913246155,
1869
- "eval_runtime": 0.8597,
1870
- "eval_samples_per_second": 81.422,
1871
- "eval_steps_per_second": 2.326,
1872
  "step": 366
1873
  },
1874
  {
1875
  "epoch": 184.0,
1876
- "eval_accuracy": 0.7857142857142857,
1877
- "eval_loss": 0.6760057806968689,
1878
- "eval_runtime": 0.651,
1879
- "eval_samples_per_second": 107.527,
1880
- "eval_steps_per_second": 3.072,
1881
  "step": 368
1882
  },
1883
  {
1884
  "epoch": 185.0,
1885
  "learning_rate": 9.375000000000001e-06,
1886
- "loss": 0.7642,
1887
  "step": 370
1888
  },
1889
  {
1890
  "epoch": 185.0,
1891
- "eval_accuracy": 0.7857142857142857,
1892
- "eval_loss": 0.6757904291152954,
1893
- "eval_runtime": 0.6445,
1894
- "eval_samples_per_second": 108.618,
1895
- "eval_steps_per_second": 3.103,
1896
  "step": 370
1897
  },
1898
  {
1899
  "epoch": 186.0,
1900
- "eval_accuracy": 0.7857142857142857,
1901
- "eval_loss": 0.6755796670913696,
1902
- "eval_runtime": 0.8335,
1903
- "eval_samples_per_second": 83.986,
1904
- "eval_steps_per_second": 2.4,
1905
  "step": 372
1906
  },
1907
  {
1908
  "epoch": 187.0,
1909
- "eval_accuracy": 0.7857142857142857,
1910
- "eval_loss": 0.6753801107406616,
1911
- "eval_runtime": 0.6394,
1912
- "eval_samples_per_second": 109.479,
1913
- "eval_steps_per_second": 3.128,
1914
  "step": 374
1915
  },
1916
  {
1917
  "epoch": 188.0,
1918
- "eval_accuracy": 0.7857142857142857,
1919
- "eval_loss": 0.6752031445503235,
1920
- "eval_runtime": 0.658,
1921
- "eval_samples_per_second": 106.386,
1922
- "eval_steps_per_second": 3.04,
1923
  "step": 376
1924
  },
1925
  {
1926
  "epoch": 189.0,
1927
- "eval_accuracy": 0.7857142857142857,
1928
- "eval_loss": 0.6750344634056091,
1929
- "eval_runtime": 0.8422,
1930
- "eval_samples_per_second": 83.112,
1931
- "eval_steps_per_second": 2.375,
1932
  "step": 378
1933
  },
1934
  {
1935
  "epoch": 190.0,
1936
  "learning_rate": 6.25e-06,
1937
- "loss": 0.7277,
1938
  "step": 380
1939
  },
1940
  {
1941
  "epoch": 190.0,
1942
- "eval_accuracy": 0.7857142857142857,
1943
- "eval_loss": 0.674885094165802,
1944
- "eval_runtime": 0.6493,
1945
- "eval_samples_per_second": 107.808,
1946
- "eval_steps_per_second": 3.08,
1947
  "step": 380
1948
  },
1949
  {
1950
  "epoch": 191.0,
1951
- "eval_accuracy": 0.7857142857142857,
1952
- "eval_loss": 0.6747546195983887,
1953
- "eval_runtime": 0.6429,
1954
- "eval_samples_per_second": 108.875,
1955
- "eval_steps_per_second": 3.111,
1956
  "step": 382
1957
  },
1958
  {
1959
  "epoch": 192.0,
1960
- "eval_accuracy": 0.7857142857142857,
1961
- "eval_loss": 0.6746455430984497,
1962
- "eval_runtime": 0.8305,
1963
- "eval_samples_per_second": 84.289,
1964
- "eval_steps_per_second": 2.408,
1965
  "step": 384
1966
  },
1967
  {
1968
  "epoch": 193.0,
1969
- "eval_accuracy": 0.7857142857142857,
1970
- "eval_loss": 0.6745493412017822,
1971
- "eval_runtime": 0.6551,
1972
- "eval_samples_per_second": 106.855,
1973
- "eval_steps_per_second": 3.053,
1974
  "step": 386
1975
  },
1976
  {
1977
  "epoch": 194.0,
1978
- "eval_accuracy": 0.7857142857142857,
1979
- "eval_loss": 0.6744527816772461,
1980
- "eval_runtime": 0.6428,
1981
- "eval_samples_per_second": 108.896,
1982
- "eval_steps_per_second": 3.111,
1983
  "step": 388
1984
  },
1985
  {
1986
  "epoch": 195.0,
1987
  "learning_rate": 3.125e-06,
1988
- "loss": 0.764,
1989
  "step": 390
1990
  },
1991
  {
1992
  "epoch": 195.0,
1993
- "eval_accuracy": 0.7857142857142857,
1994
- "eval_loss": 0.6743654608726501,
1995
- "eval_runtime": 0.8334,
1996
- "eval_samples_per_second": 83.991,
1997
- "eval_steps_per_second": 2.4,
1998
  "step": 390
1999
  },
2000
  {
2001
  "epoch": 196.0,
2002
- "eval_accuracy": 0.7857142857142857,
2003
- "eval_loss": 0.674295961856842,
2004
- "eval_runtime": 0.652,
2005
- "eval_samples_per_second": 107.363,
2006
- "eval_steps_per_second": 3.068,
2007
  "step": 392
2008
  },
2009
  {
2010
  "epoch": 197.0,
2011
- "eval_accuracy": 0.7857142857142857,
2012
- "eval_loss": 0.6742398142814636,
2013
- "eval_runtime": 0.6659,
2014
- "eval_samples_per_second": 105.127,
2015
- "eval_steps_per_second": 3.004,
2016
  "step": 394
2017
  },
2018
  {
2019
  "epoch": 198.0,
2020
- "eval_accuracy": 0.8,
2021
- "eval_loss": 0.6741960644721985,
2022
- "eval_runtime": 0.8894,
2023
- "eval_samples_per_second": 78.703,
2024
- "eval_steps_per_second": 2.249,
2025
  "step": 396
2026
  },
2027
  {
2028
  "epoch": 199.0,
2029
- "eval_accuracy": 0.8,
2030
- "eval_loss": 0.674168586730957,
2031
- "eval_runtime": 0.6565,
2032
- "eval_samples_per_second": 106.623,
2033
- "eval_steps_per_second": 3.046,
2034
  "step": 398
2035
  },
2036
  {
2037
  "epoch": 200.0,
2038
  "learning_rate": 0.0,
2039
- "loss": 0.7444,
2040
  "step": 400
2041
  },
2042
  {
2043
  "epoch": 200.0,
2044
- "eval_accuracy": 0.8,
2045
- "eval_loss": 0.6741567850112915,
2046
- "eval_runtime": 0.6417,
2047
- "eval_samples_per_second": 109.09,
2048
- "eval_steps_per_second": 3.117,
2049
  "step": 400
2050
  },
2051
  {
2052
  "epoch": 200.0,
2053
  "step": 400,
2054
  "total_flos": 2.23710151698432e+18,
2055
- "train_loss": 0.8548950719833374,
2056
- "train_runtime": 1030.1946,
2057
- "train_samples_per_second": 87.362,
2058
- "train_steps_per_second": 0.388
2059
  }
2060
  ],
2061
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.8285714285714286,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-3cls-0922/checkpoint-40",
4
  "epoch": 200.0,
5
  "eval_steps": 500,
6
  "global_step": 400,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.8142857142857143,
14
+ "eval_loss": 0.6875032186508179,
15
+ "eval_runtime": 0.8381,
16
+ "eval_samples_per_second": 83.526,
17
+ "eval_steps_per_second": 2.386,
18
  "step": 2
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.8142857142857143,
23
+ "eval_loss": 0.6874324083328247,
24
+ "eval_runtime": 0.6285,
25
+ "eval_samples_per_second": 111.384,
26
+ "eval_steps_per_second": 3.182,
27
  "step": 4
28
  },
29
  {
30
  "epoch": 3.0,
31
+ "eval_accuracy": 0.8142857142857143,
32
+ "eval_loss": 0.6873045563697815,
33
+ "eval_runtime": 0.6533,
34
+ "eval_samples_per_second": 107.153,
35
+ "eval_steps_per_second": 3.062,
36
  "step": 6
37
  },
38
  {
39
  "epoch": 4.0,
40
+ "eval_accuracy": 0.8142857142857143,
41
+ "eval_loss": 0.687107264995575,
42
+ "eval_runtime": 0.8041,
43
+ "eval_samples_per_second": 87.057,
44
+ "eval_steps_per_second": 2.487,
45
  "step": 8
46
  },
47
  {
48
  "epoch": 5.0,
49
  "learning_rate": 1.25e-05,
50
+ "loss": 0.7555,
51
  "step": 10
52
  },
53
  {
54
  "epoch": 5.0,
55
+ "eval_accuracy": 0.8142857142857143,
56
+ "eval_loss": 0.6868652701377869,
57
+ "eval_runtime": 0.6585,
58
+ "eval_samples_per_second": 106.309,
59
+ "eval_steps_per_second": 3.037,
60
  "step": 10
61
  },
62
  {
63
  "epoch": 6.0,
64
+ "eval_accuracy": 0.8142857142857143,
65
+ "eval_loss": 0.686565101146698,
66
+ "eval_runtime": 0.6372,
67
+ "eval_samples_per_second": 109.86,
68
+ "eval_steps_per_second": 3.139,
69
  "step": 12
70
  },
71
  {
72
  "epoch": 7.0,
73
+ "eval_accuracy": 0.8142857142857143,
74
+ "eval_loss": 0.6861968636512756,
75
+ "eval_runtime": 0.8401,
76
+ "eval_samples_per_second": 83.326,
77
+ "eval_steps_per_second": 2.381,
78
  "step": 14
79
  },
80
  {
81
  "epoch": 8.0,
82
+ "eval_accuracy": 0.8142857142857143,
83
+ "eval_loss": 0.685771644115448,
84
+ "eval_runtime": 0.6402,
85
+ "eval_samples_per_second": 109.344,
86
+ "eval_steps_per_second": 3.124,
87
  "step": 16
88
  },
89
  {
90
  "epoch": 9.0,
91
+ "eval_accuracy": 0.8142857142857143,
92
+ "eval_loss": 0.6853042244911194,
93
+ "eval_runtime": 0.638,
94
+ "eval_samples_per_second": 109.711,
95
+ "eval_steps_per_second": 3.135,
96
  "step": 18
97
  },
98
  {
99
  "epoch": 10.0,
100
  "learning_rate": 2.5e-05,
101
+ "loss": 0.7576,
102
  "step": 20
103
  },
104
  {
105
  "epoch": 10.0,
106
+ "eval_accuracy": 0.8142857142857143,
107
+ "eval_loss": 0.6847913861274719,
108
+ "eval_runtime": 0.8284,
109
+ "eval_samples_per_second": 84.496,
110
+ "eval_steps_per_second": 2.414,
111
  "step": 20
112
  },
113
  {
114
  "epoch": 11.0,
115
+ "eval_accuracy": 0.8142857142857143,
116
+ "eval_loss": 0.6842377185821533,
117
+ "eval_runtime": 0.6408,
118
+ "eval_samples_per_second": 109.237,
119
+ "eval_steps_per_second": 3.121,
120
  "step": 22
121
  },
122
  {
123
  "epoch": 12.0,
124
+ "eval_accuracy": 0.8142857142857143,
125
+ "eval_loss": 0.6836268901824951,
126
+ "eval_runtime": 0.6496,
127
+ "eval_samples_per_second": 107.755,
128
+ "eval_steps_per_second": 3.079,
129
  "step": 24
130
  },
131
  {
132
  "epoch": 13.0,
133
+ "eval_accuracy": 0.8142857142857143,
134
+ "eval_loss": 0.6829591393470764,
135
+ "eval_runtime": 0.8145,
136
+ "eval_samples_per_second": 85.938,
137
+ "eval_steps_per_second": 2.455,
138
  "step": 26
139
  },
140
  {
141
  "epoch": 14.0,
142
+ "eval_accuracy": 0.8142857142857143,
143
+ "eval_loss": 0.6822755336761475,
144
+ "eval_runtime": 0.6641,
145
+ "eval_samples_per_second": 105.412,
146
+ "eval_steps_per_second": 3.012,
147
  "step": 28
148
  },
149
  {
150
  "epoch": 15.0,
151
  "learning_rate": 3.7500000000000003e-05,
152
+ "loss": 0.769,
153
  "step": 30
154
  },
155
  {
156
  "epoch": 15.0,
157
+ "eval_accuracy": 0.8,
158
+ "eval_loss": 0.6815804839134216,
159
+ "eval_runtime": 0.6278,
160
+ "eval_samples_per_second": 111.502,
161
+ "eval_steps_per_second": 3.186,
162
  "step": 30
163
  },
164
  {
165
  "epoch": 16.0,
166
+ "eval_accuracy": 0.8,
167
+ "eval_loss": 0.6808401346206665,
168
+ "eval_runtime": 0.8247,
169
+ "eval_samples_per_second": 84.88,
170
+ "eval_steps_per_second": 2.425,
171
  "step": 32
172
  },
173
  {
174
  "epoch": 17.0,
175
+ "eval_accuracy": 0.8142857142857143,
176
+ "eval_loss": 0.6800239086151123,
177
+ "eval_runtime": 0.6376,
178
+ "eval_samples_per_second": 109.794,
179
+ "eval_steps_per_second": 3.137,
180
  "step": 34
181
  },
182
  {
183
  "epoch": 18.0,
184
+ "eval_accuracy": 0.8142857142857143,
185
+ "eval_loss": 0.679133951663971,
186
+ "eval_runtime": 0.6356,
187
+ "eval_samples_per_second": 110.128,
188
+ "eval_steps_per_second": 3.147,
189
  "step": 36
190
  },
191
  {
192
  "epoch": 19.0,
193
+ "eval_accuracy": 0.8142857142857143,
194
+ "eval_loss": 0.6781331896781921,
195
+ "eval_runtime": 0.8136,
196
+ "eval_samples_per_second": 86.039,
197
+ "eval_steps_per_second": 2.458,
198
  "step": 38
199
  },
200
  {
201
  "epoch": 20.0,
202
  "learning_rate": 5e-05,
203
+ "loss": 0.7564,
204
  "step": 40
205
  },
206
  {
207
  "epoch": 20.0,
208
+ "eval_accuracy": 0.8285714285714286,
209
+ "eval_loss": 0.6770716309547424,
210
+ "eval_runtime": 0.627,
211
+ "eval_samples_per_second": 111.643,
212
+ "eval_steps_per_second": 3.19,
213
  "step": 40
214
  },
215
  {
216
  "epoch": 21.0,
217
+ "eval_accuracy": 0.8142857142857143,
218
+ "eval_loss": 0.6759592294692993,
219
+ "eval_runtime": 0.6244,
220
+ "eval_samples_per_second": 112.113,
221
+ "eval_steps_per_second": 3.203,
222
  "step": 42
223
  },
224
  {
225
  "epoch": 22.0,
226
+ "eval_accuracy": 0.8142857142857143,
227
+ "eval_loss": 0.674824059009552,
228
+ "eval_runtime": 0.72,
229
+ "eval_samples_per_second": 97.226,
230
+ "eval_steps_per_second": 2.778,
231
  "step": 44
232
  },
233
  {
234
  "epoch": 23.0,
235
+ "eval_accuracy": 0.8,
236
+ "eval_loss": 0.6736522912979126,
237
+ "eval_runtime": 0.6356,
238
+ "eval_samples_per_second": 110.125,
239
+ "eval_steps_per_second": 3.146,
240
  "step": 46
241
  },
242
  {
243
  "epoch": 24.0,
244
+ "eval_accuracy": 0.8,
245
+ "eval_loss": 0.6724562644958496,
246
+ "eval_runtime": 0.6465,
247
+ "eval_samples_per_second": 108.268,
248
+ "eval_steps_per_second": 3.093,
249
  "step": 48
250
  },
251
  {
252
  "epoch": 25.0,
253
  "learning_rate": 6.25e-05,
254
+ "loss": 0.7508,
255
  "step": 50
256
  },
257
  {
258
  "epoch": 25.0,
259
+ "eval_accuracy": 0.8142857142857143,
260
+ "eval_loss": 0.6713314056396484,
261
+ "eval_runtime": 0.6458,
262
+ "eval_samples_per_second": 108.385,
263
+ "eval_steps_per_second": 3.097,
264
  "step": 50
265
  },
266
  {
267
  "epoch": 26.0,
268
+ "eval_accuracy": 0.8142857142857143,
269
+ "eval_loss": 0.6701393723487854,
270
+ "eval_runtime": 0.7532,
271
+ "eval_samples_per_second": 92.934,
272
+ "eval_steps_per_second": 2.655,
273
  "step": 52
274
  },
275
  {
276
  "epoch": 27.0,
277
+ "eval_accuracy": 0.8142857142857143,
278
+ "eval_loss": 0.6688514947891235,
279
+ "eval_runtime": 0.6275,
280
+ "eval_samples_per_second": 111.546,
281
+ "eval_steps_per_second": 3.187,
282
  "step": 54
283
  },
284
  {
285
  "epoch": 28.0,
286
+ "eval_accuracy": 0.8142857142857143,
287
+ "eval_loss": 0.6674489378929138,
288
+ "eval_runtime": 0.6455,
289
+ "eval_samples_per_second": 108.446,
290
+ "eval_steps_per_second": 3.098,
291
  "step": 56
292
  },
293
  {
294
  "epoch": 29.0,
295
+ "eval_accuracy": 0.8142857142857143,
296
+ "eval_loss": 0.6660061478614807,
297
+ "eval_runtime": 0.7926,
298
+ "eval_samples_per_second": 88.312,
299
+ "eval_steps_per_second": 2.523,
300
  "step": 58
301
  },
302
  {
303
  "epoch": 30.0,
304
  "learning_rate": 7.500000000000001e-05,
305
+ "loss": 0.747,
306
  "step": 60
307
  },
308
  {
309
  "epoch": 30.0,
310
+ "eval_accuracy": 0.8142857142857143,
311
+ "eval_loss": 0.6645620465278625,
312
+ "eval_runtime": 0.6238,
313
+ "eval_samples_per_second": 112.214,
314
+ "eval_steps_per_second": 3.206,
315
  "step": 60
316
  },
317
  {
318
  "epoch": 31.0,
319
+ "eval_accuracy": 0.8142857142857143,
320
+ "eval_loss": 0.6631242632865906,
321
+ "eval_runtime": 0.651,
322
+ "eval_samples_per_second": 107.52,
323
+ "eval_steps_per_second": 3.072,
324
  "step": 62
325
  },
326
  {
327
  "epoch": 32.0,
328
+ "eval_accuracy": 0.8142857142857143,
329
+ "eval_loss": 0.6616196036338806,
330
+ "eval_runtime": 0.8036,
331
+ "eval_samples_per_second": 87.111,
332
+ "eval_steps_per_second": 2.489,
333
  "step": 64
334
  },
335
  {
336
  "epoch": 33.0,
337
+ "eval_accuracy": 0.8142857142857143,
338
+ "eval_loss": 0.6600926518440247,
339
+ "eval_runtime": 0.638,
340
+ "eval_samples_per_second": 109.722,
341
+ "eval_steps_per_second": 3.135,
342
  "step": 66
343
  },
344
  {
345
  "epoch": 34.0,
346
+ "eval_accuracy": 0.8142857142857143,
347
+ "eval_loss": 0.6585766673088074,
348
+ "eval_runtime": 0.6365,
349
+ "eval_samples_per_second": 109.973,
350
+ "eval_steps_per_second": 3.142,
351
  "step": 68
352
  },
353
  {
354
  "epoch": 35.0,
355
  "learning_rate": 8.75e-05,
356
+ "loss": 0.7343,
357
  "step": 70
358
  },
359
  {
360
  "epoch": 35.0,
361
+ "eval_accuracy": 0.8142857142857143,
362
+ "eval_loss": 0.6569960117340088,
363
+ "eval_runtime": 0.7913,
364
+ "eval_samples_per_second": 88.467,
365
+ "eval_steps_per_second": 2.528,
366
  "step": 70
367
  },
368
  {
369
  "epoch": 36.0,
370
+ "eval_accuracy": 0.8142857142857143,
371
+ "eval_loss": 0.6553293466567993,
372
+ "eval_runtime": 0.6317,
373
+ "eval_samples_per_second": 110.815,
374
+ "eval_steps_per_second": 3.166,
375
  "step": 72
376
  },
377
  {
378
  "epoch": 37.0,
379
+ "eval_accuracy": 0.8142857142857143,
380
+ "eval_loss": 0.6535871028900146,
381
+ "eval_runtime": 0.6261,
382
+ "eval_samples_per_second": 111.81,
383
+ "eval_steps_per_second": 3.195,
384
  "step": 74
385
  },
386
  {
387
  "epoch": 38.0,
388
+ "eval_accuracy": 0.8142857142857143,
389
+ "eval_loss": 0.6517333388328552,
390
+ "eval_runtime": 0.7801,
391
+ "eval_samples_per_second": 89.727,
392
+ "eval_steps_per_second": 2.564,
393
  "step": 76
394
  },
395
  {
396
  "epoch": 39.0,
397
+ "eval_accuracy": 0.8142857142857143,
398
+ "eval_loss": 0.6498710513114929,
399
+ "eval_runtime": 0.6688,
400
+ "eval_samples_per_second": 104.667,
401
+ "eval_steps_per_second": 2.99,
402
  "step": 78
403
  },
404
  {
405
  "epoch": 40.0,
406
  "learning_rate": 0.0001,
407
+ "loss": 0.7532,
408
  "step": 80
409
  },
410
  {
411
  "epoch": 40.0,
412
+ "eval_accuracy": 0.8142857142857143,
413
+ "eval_loss": 0.6480462551116943,
414
+ "eval_runtime": 0.6422,
415
+ "eval_samples_per_second": 108.998,
416
+ "eval_steps_per_second": 3.114,
417
  "step": 80
418
  },
419
  {
420
  "epoch": 41.0,
421
+ "eval_accuracy": 0.8142857142857143,
422
+ "eval_loss": 0.6461040377616882,
423
+ "eval_runtime": 0.7878,
424
+ "eval_samples_per_second": 88.86,
425
+ "eval_steps_per_second": 2.539,
426
  "step": 82
427
  },
428
  {
429
  "epoch": 42.0,
430
+ "eval_accuracy": 0.8142857142857143,
431
+ "eval_loss": 0.6441839337348938,
432
+ "eval_runtime": 0.6221,
433
+ "eval_samples_per_second": 112.518,
434
+ "eval_steps_per_second": 3.215,
435
  "step": 84
436
  },
437
  {
438
  "epoch": 43.0,
439
+ "eval_accuracy": 0.8142857142857143,
440
+ "eval_loss": 0.6423068046569824,
441
+ "eval_runtime": 0.6404,
442
+ "eval_samples_per_second": 109.306,
443
+ "eval_steps_per_second": 3.123,
444
  "step": 86
445
  },
446
  {
447
  "epoch": 44.0,
448
+ "eval_accuracy": 0.8142857142857143,
449
+ "eval_loss": 0.6404834985733032,
450
+ "eval_runtime": 0.8194,
451
+ "eval_samples_per_second": 85.431,
452
+ "eval_steps_per_second": 2.441,
453
  "step": 88
454
  },
455
  {
456
  "epoch": 45.0,
457
  "learning_rate": 9.687500000000001e-05,
458
+ "loss": 0.7239,
459
  "step": 90
460
  },
461
  {
462
  "epoch": 45.0,
463
+ "eval_accuracy": 0.8142857142857143,
464
+ "eval_loss": 0.638668417930603,
465
+ "eval_runtime": 0.6293,
466
+ "eval_samples_per_second": 111.227,
467
+ "eval_steps_per_second": 3.178,
468
  "step": 90
469
  },
470
  {
471
  "epoch": 46.0,
472
+ "eval_accuracy": 0.8142857142857143,
473
+ "eval_loss": 0.6368482112884521,
474
+ "eval_runtime": 0.6307,
475
+ "eval_samples_per_second": 110.981,
476
+ "eval_steps_per_second": 3.171,
477
  "step": 92
478
  },
479
  {
480
  "epoch": 47.0,
481
+ "eval_accuracy": 0.8142857142857143,
482
+ "eval_loss": 0.6351889967918396,
483
+ "eval_runtime": 0.8243,
484
+ "eval_samples_per_second": 84.921,
485
+ "eval_steps_per_second": 2.426,
486
  "step": 94
487
  },
488
  {
489
  "epoch": 48.0,
490
+ "eval_accuracy": 0.8142857142857143,
491
+ "eval_loss": 0.6336590051651001,
492
+ "eval_runtime": 0.6325,
493
+ "eval_samples_per_second": 110.664,
494
+ "eval_steps_per_second": 3.162,
495
  "step": 96
496
  },
497
  {
498
  "epoch": 49.0,
499
+ "eval_accuracy": 0.8285714285714286,
500
+ "eval_loss": 0.6321325302124023,
501
+ "eval_runtime": 0.6292,
502
+ "eval_samples_per_second": 111.258,
503
+ "eval_steps_per_second": 3.179,
504
  "step": 98
505
  },
506
  {
507
  "epoch": 50.0,
508
  "learning_rate": 9.375e-05,
509
+ "loss": 0.7085,
510
  "step": 100
511
  },
512
  {
513
  "epoch": 50.0,
514
+ "eval_accuracy": 0.8285714285714286,
515
+ "eval_loss": 0.6307134628295898,
516
+ "eval_runtime": 0.8147,
517
+ "eval_samples_per_second": 85.924,
518
+ "eval_steps_per_second": 2.455,
519
  "step": 100
520
  },
521
  {
522
  "epoch": 51.0,
523
+ "eval_accuracy": 0.8285714285714286,
524
+ "eval_loss": 0.6293519139289856,
525
+ "eval_runtime": 0.6273,
526
+ "eval_samples_per_second": 111.588,
527
+ "eval_steps_per_second": 3.188,
528
  "step": 102
529
  },
530
  {
531
  "epoch": 52.0,
532
+ "eval_accuracy": 0.8285714285714286,
533
+ "eval_loss": 0.6278188228607178,
534
+ "eval_runtime": 0.6366,
535
+ "eval_samples_per_second": 109.96,
536
+ "eval_steps_per_second": 3.142,
537
  "step": 104
538
  },
539
  {
540
  "epoch": 53.0,
541
+ "eval_accuracy": 0.8285714285714286,
542
+ "eval_loss": 0.6263061165809631,
543
+ "eval_runtime": 0.8106,
544
+ "eval_samples_per_second": 86.353,
545
+ "eval_steps_per_second": 2.467,
546
  "step": 106
547
  },
548
  {
549
  "epoch": 54.0,
550
+ "eval_accuracy": 0.8142857142857143,
551
+ "eval_loss": 0.6247809529304504,
552
+ "eval_runtime": 0.637,
553
+ "eval_samples_per_second": 109.885,
554
+ "eval_steps_per_second": 3.14,
555
  "step": 108
556
  },
557
  {
558
  "epoch": 55.0,
559
  "learning_rate": 9.062500000000001e-05,
560
+ "loss": 0.7203,
561
  "step": 110
562
  },
563
  {
564
  "epoch": 55.0,
565
+ "eval_accuracy": 0.8142857142857143,
566
+ "eval_loss": 0.6232935190200806,
567
+ "eval_runtime": 0.6312,
568
+ "eval_samples_per_second": 110.901,
569
+ "eval_steps_per_second": 3.169,
570
  "step": 110
571
  },
572
  {
573
  "epoch": 56.0,
574
+ "eval_accuracy": 0.8142857142857143,
575
+ "eval_loss": 0.6218679547309875,
576
+ "eval_runtime": 0.8253,
577
+ "eval_samples_per_second": 84.819,
578
+ "eval_steps_per_second": 2.423,
579
  "step": 112
580
  },
581
  {
582
  "epoch": 57.0,
583
+ "eval_accuracy": 0.8142857142857143,
584
+ "eval_loss": 0.6204643845558167,
585
+ "eval_runtime": 0.6393,
586
+ "eval_samples_per_second": 109.495,
587
+ "eval_steps_per_second": 3.128,
588
  "step": 114
589
  },
590
  {
591
  "epoch": 58.0,
592
+ "eval_accuracy": 0.8142857142857143,
593
+ "eval_loss": 0.6191075444221497,
594
+ "eval_runtime": 0.6278,
595
+ "eval_samples_per_second": 111.495,
596
+ "eval_steps_per_second": 3.186,
597
  "step": 116
598
  },
599
  {
600
  "epoch": 59.0,
601
+ "eval_accuracy": 0.8142857142857143,
602
+ "eval_loss": 0.6178752779960632,
603
+ "eval_runtime": 0.8233,
604
+ "eval_samples_per_second": 85.027,
605
+ "eval_steps_per_second": 2.429,
606
  "step": 118
607
  },
608
  {
609
  "epoch": 60.0,
610
  "learning_rate": 8.75e-05,
611
+ "loss": 0.7136,
612
  "step": 120
613
  },
614
  {
615
  "epoch": 60.0,
616
+ "eval_accuracy": 0.8142857142857143,
617
+ "eval_loss": 0.6167242527008057,
618
+ "eval_runtime": 0.6489,
619
+ "eval_samples_per_second": 107.881,
620
+ "eval_steps_per_second": 3.082,
621
  "step": 120
622
  },
623
  {
624
  "epoch": 61.0,
625
+ "eval_accuracy": 0.8142857142857143,
626
+ "eval_loss": 0.6156985759735107,
627
+ "eval_runtime": 0.6472,
628
+ "eval_samples_per_second": 108.154,
629
+ "eval_steps_per_second": 3.09,
630
  "step": 122
631
  },
632
  {
633
  "epoch": 62.0,
634
+ "eval_accuracy": 0.8,
635
+ "eval_loss": 0.61481112241745,
636
+ "eval_runtime": 0.8228,
637
+ "eval_samples_per_second": 85.073,
638
+ "eval_steps_per_second": 2.431,
639
  "step": 124
640
  },
641
  {
642
  "epoch": 63.0,
643
+ "eval_accuracy": 0.8,
644
+ "eval_loss": 0.6138356328010559,
645
+ "eval_runtime": 0.6327,
646
+ "eval_samples_per_second": 110.64,
647
+ "eval_steps_per_second": 3.161,
648
  "step": 126
649
  },
650
  {
651
  "epoch": 64.0,
652
+ "eval_accuracy": 0.8,
653
+ "eval_loss": 0.6125301122665405,
654
+ "eval_runtime": 0.6379,
655
+ "eval_samples_per_second": 109.736,
656
+ "eval_steps_per_second": 3.135,
657
  "step": 128
658
  },
659
  {
660
  "epoch": 65.0,
661
  "learning_rate": 8.4375e-05,
662
+ "loss": 0.7123,
663
  "step": 130
664
  },
665
  {
666
  "epoch": 65.0,
667
+ "eval_accuracy": 0.8,
668
+ "eval_loss": 0.6110576391220093,
669
+ "eval_runtime": 0.825,
670
+ "eval_samples_per_second": 84.849,
671
+ "eval_steps_per_second": 2.424,
672
  "step": 130
673
  },
674
  {
675
  "epoch": 66.0,
676
+ "eval_accuracy": 0.8142857142857143,
677
+ "eval_loss": 0.6096405982971191,
678
+ "eval_runtime": 0.6376,
679
+ "eval_samples_per_second": 109.782,
680
+ "eval_steps_per_second": 3.137,
681
  "step": 132
682
  },
683
  {
684
  "epoch": 67.0,
685
+ "eval_accuracy": 0.8142857142857143,
686
+ "eval_loss": 0.6083278656005859,
687
+ "eval_runtime": 0.8232,
688
+ "eval_samples_per_second": 85.037,
689
+ "eval_steps_per_second": 2.43,
690
  "step": 134
691
  },
692
  {
693
  "epoch": 68.0,
694
+ "eval_accuracy": 0.8142857142857143,
695
+ "eval_loss": 0.6069909930229187,
696
+ "eval_runtime": 0.8193,
697
+ "eval_samples_per_second": 85.437,
698
+ "eval_steps_per_second": 2.441,
699
  "step": 136
700
  },
701
  {
702
  "epoch": 69.0,
703
+ "eval_accuracy": 0.8142857142857143,
704
+ "eval_loss": 0.6057179570198059,
705
+ "eval_runtime": 0.6486,
706
+ "eval_samples_per_second": 107.925,
707
+ "eval_steps_per_second": 3.084,
708
  "step": 138
709
  },
710
  {
711
  "epoch": 70.0,
712
  "learning_rate": 8.125000000000001e-05,
713
+ "loss": 0.7076,
714
  "step": 140
715
  },
716
  {
717
  "epoch": 70.0,
718
+ "eval_accuracy": 0.8142857142857143,
719
+ "eval_loss": 0.604619562625885,
720
+ "eval_runtime": 0.6358,
721
+ "eval_samples_per_second": 110.095,
722
+ "eval_steps_per_second": 3.146,
723
  "step": 140
724
  },
725
  {
726
  "epoch": 71.0,
727
+ "eval_accuracy": 0.8142857142857143,
728
+ "eval_loss": 0.6034784913063049,
729
+ "eval_runtime": 0.8201,
730
+ "eval_samples_per_second": 85.352,
731
+ "eval_steps_per_second": 2.439,
732
  "step": 142
733
  },
734
  {
735
  "epoch": 72.0,
736
+ "eval_accuracy": 0.8142857142857143,
737
+ "eval_loss": 0.6022736430168152,
738
+ "eval_runtime": 0.6311,
739
+ "eval_samples_per_second": 110.91,
740
+ "eval_steps_per_second": 3.169,
741
  "step": 144
742
  },
743
  {
744
  "epoch": 73.0,
745
+ "eval_accuracy": 0.8142857142857143,
746
+ "eval_loss": 0.6011058688163757,
747
+ "eval_runtime": 0.6566,
748
+ "eval_samples_per_second": 106.607,
749
+ "eval_steps_per_second": 3.046,
750
  "step": 146
751
  },
752
  {
753
  "epoch": 74.0,
754
+ "eval_accuracy": 0.8142857142857143,
755
+ "eval_loss": 0.5999324917793274,
756
+ "eval_runtime": 0.8174,
757
+ "eval_samples_per_second": 85.64,
758
+ "eval_steps_per_second": 2.447,
759
  "step": 148
760
  },
761
  {
762
  "epoch": 75.0,
763
  "learning_rate": 7.8125e-05,
764
+ "loss": 0.6878,
765
  "step": 150
766
  },
767
  {
768
  "epoch": 75.0,
769
+ "eval_accuracy": 0.8142857142857143,
770
+ "eval_loss": 0.5987647175788879,
771
+ "eval_runtime": 0.6275,
772
+ "eval_samples_per_second": 111.562,
773
+ "eval_steps_per_second": 3.187,
774
  "step": 150
775
  },
776
  {
777
  "epoch": 76.0,
778
+ "eval_accuracy": 0.8142857142857143,
779
+ "eval_loss": 0.5975351333618164,
780
+ "eval_runtime": 0.6296,
781
+ "eval_samples_per_second": 111.178,
782
+ "eval_steps_per_second": 3.177,
783
  "step": 152
784
  },
785
  {
786
  "epoch": 77.0,
787
+ "eval_accuracy": 0.8142857142857143,
788
+ "eval_loss": 0.5963953137397766,
789
+ "eval_runtime": 0.8044,
790
+ "eval_samples_per_second": 87.018,
791
+ "eval_steps_per_second": 2.486,
792
  "step": 154
793
  },
794
  {
795
  "epoch": 78.0,
796
+ "eval_accuracy": 0.8142857142857143,
797
+ "eval_loss": 0.5952684879302979,
798
+ "eval_runtime": 0.6501,
799
+ "eval_samples_per_second": 107.669,
800
+ "eval_steps_per_second": 3.076,
801
  "step": 156
802
  },
803
  {
804
  "epoch": 79.0,
805
+ "eval_accuracy": 0.8142857142857143,
806
+ "eval_loss": 0.5942099094390869,
807
+ "eval_runtime": 0.6469,
808
+ "eval_samples_per_second": 108.203,
809
+ "eval_steps_per_second": 3.092,
810
  "step": 158
811
  },
812
  {
813
  "epoch": 80.0,
814
  "learning_rate": 7.500000000000001e-05,
815
+ "loss": 0.6657,
816
  "step": 160
817
  },
818
  {
819
  "epoch": 80.0,
820
+ "eval_accuracy": 0.8142857142857143,
821
+ "eval_loss": 0.5932222008705139,
822
+ "eval_runtime": 0.8259,
823
+ "eval_samples_per_second": 84.754,
824
+ "eval_steps_per_second": 2.422,
825
  "step": 160
826
  },
827
  {
828
  "epoch": 81.0,
829
+ "eval_accuracy": 0.8142857142857143,
830
+ "eval_loss": 0.5923032760620117,
831
+ "eval_runtime": 0.6393,
832
+ "eval_samples_per_second": 109.49,
833
+ "eval_steps_per_second": 3.128,
834
  "step": 162
835
  },
836
  {
837
  "epoch": 82.0,
838
+ "eval_accuracy": 0.8142857142857143,
839
+ "eval_loss": 0.5914328098297119,
840
+ "eval_runtime": 0.6466,
841
+ "eval_samples_per_second": 108.262,
842
+ "eval_steps_per_second": 3.093,
843
  "step": 164
844
  },
845
  {
846
  "epoch": 83.0,
847
+ "eval_accuracy": 0.8142857142857143,
848
+ "eval_loss": 0.5905909538269043,
849
+ "eval_runtime": 0.8278,
850
+ "eval_samples_per_second": 84.56,
851
+ "eval_steps_per_second": 2.416,
852
  "step": 166
853
  },
854
  {
855
  "epoch": 84.0,
856
+ "eval_accuracy": 0.8142857142857143,
857
+ "eval_loss": 0.5897351503372192,
858
+ "eval_runtime": 0.6485,
859
+ "eval_samples_per_second": 107.935,
860
+ "eval_steps_per_second": 3.084,
861
  "step": 168
862
  },
863
  {
864
  "epoch": 85.0,
865
  "learning_rate": 7.1875e-05,
866
+ "loss": 0.6434,
867
  "step": 170
868
  },
869
  {
870
  "epoch": 85.0,
871
+ "eval_accuracy": 0.8142857142857143,
872
+ "eval_loss": 0.588803768157959,
873
+ "eval_runtime": 0.6407,
874
+ "eval_samples_per_second": 109.255,
875
+ "eval_steps_per_second": 3.122,
876
  "step": 170
877
  },
878
  {
879
  "epoch": 86.0,
880
+ "eval_accuracy": 0.8142857142857143,
881
+ "eval_loss": 0.5878075361251831,
882
+ "eval_runtime": 0.7846,
883
+ "eval_samples_per_second": 89.216,
884
+ "eval_steps_per_second": 2.549,
885
  "step": 172
886
  },
887
  {
888
  "epoch": 87.0,
889
+ "eval_accuracy": 0.8142857142857143,
890
+ "eval_loss": 0.5868256688117981,
891
+ "eval_runtime": 0.6427,
892
+ "eval_samples_per_second": 108.917,
893
+ "eval_steps_per_second": 3.112,
894
  "step": 174
895
  },
896
  {
897
  "epoch": 88.0,
898
+ "eval_accuracy": 0.8142857142857143,
899
+ "eval_loss": 0.5859082341194153,
900
+ "eval_runtime": 0.6384,
901
+ "eval_samples_per_second": 109.65,
902
+ "eval_steps_per_second": 3.133,
903
  "step": 176
904
  },
905
  {
906
  "epoch": 89.0,
907
+ "eval_accuracy": 0.8142857142857143,
908
+ "eval_loss": 0.5850787162780762,
909
+ "eval_runtime": 0.7009,
910
+ "eval_samples_per_second": 99.878,
911
+ "eval_steps_per_second": 2.854,
912
  "step": 178
913
  },
914
  {
915
  "epoch": 90.0,
916
  "learning_rate": 6.875e-05,
917
+ "loss": 0.6825,
918
  "step": 180
919
  },
920
  {
921
  "epoch": 90.0,
922
+ "eval_accuracy": 0.8142857142857143,
923
+ "eval_loss": 0.5843265652656555,
924
+ "eval_runtime": 0.6343,
925
+ "eval_samples_per_second": 110.361,
926
+ "eval_steps_per_second": 3.153,
927
  "step": 180
928
  },
929
  {
930
  "epoch": 91.0,
931
+ "eval_accuracy": 0.8142857142857143,
932
+ "eval_loss": 0.5835766792297363,
933
+ "eval_runtime": 0.645,
934
+ "eval_samples_per_second": 108.529,
935
+ "eval_steps_per_second": 3.101,
936
  "step": 182
937
  },
938
  {
939
  "epoch": 92.0,
940
+ "eval_accuracy": 0.8142857142857143,
941
+ "eval_loss": 0.5828419923782349,
942
+ "eval_runtime": 0.6414,
943
+ "eval_samples_per_second": 109.129,
944
+ "eval_steps_per_second": 3.118,
945
  "step": 184
946
  },
947
  {
948
  "epoch": 93.0,
949
+ "eval_accuracy": 0.8142857142857143,
950
+ "eval_loss": 0.5822591781616211,
951
+ "eval_runtime": 0.6506,
952
+ "eval_samples_per_second": 107.585,
953
+ "eval_steps_per_second": 3.074,
954
  "step": 186
955
  },
956
  {
957
  "epoch": 94.0,
958
+ "eval_accuracy": 0.8285714285714286,
959
+ "eval_loss": 0.5817149877548218,
960
+ "eval_runtime": 0.6481,
961
+ "eval_samples_per_second": 108.003,
962
+ "eval_steps_per_second": 3.086,
963
  "step": 188
964
  },
965
  {
966
  "epoch": 95.0,
967
  "learning_rate": 6.562500000000001e-05,
968
+ "loss": 0.6695,
969
  "step": 190
970
  },
971
  {
972
  "epoch": 95.0,
973
+ "eval_accuracy": 0.8142857142857143,
974
+ "eval_loss": 0.5809342265129089,
975
+ "eval_runtime": 0.6426,
976
+ "eval_samples_per_second": 108.939,
977
+ "eval_steps_per_second": 3.113,
978
  "step": 190
979
  },
980
  {
981
  "epoch": 96.0,
982
+ "eval_accuracy": 0.8142857142857143,
983
+ "eval_loss": 0.5801157355308533,
984
+ "eval_runtime": 0.7408,
985
+ "eval_samples_per_second": 94.487,
986
+ "eval_steps_per_second": 2.7,
987
  "step": 192
988
  },
989
  {
990
  "epoch": 97.0,
991
+ "eval_accuracy": 0.8142857142857143,
992
+ "eval_loss": 0.5793442130088806,
993
+ "eval_runtime": 0.6328,
994
+ "eval_samples_per_second": 110.628,
995
+ "eval_steps_per_second": 3.161,
996
  "step": 194
997
  },
998
  {
999
  "epoch": 98.0,
1000
+ "eval_accuracy": 0.8142857142857143,
1001
+ "eval_loss": 0.5787318348884583,
1002
+ "eval_runtime": 0.6404,
1003
+ "eval_samples_per_second": 109.309,
1004
+ "eval_steps_per_second": 3.123,
1005
  "step": 196
1006
  },
1007
  {
1008
  "epoch": 99.0,
1009
+ "eval_accuracy": 0.8142857142857143,
1010
+ "eval_loss": 0.5780039429664612,
1011
+ "eval_runtime": 0.7894,
1012
+ "eval_samples_per_second": 88.678,
1013
+ "eval_steps_per_second": 2.534,
1014
  "step": 198
1015
  },
1016
  {
1017
  "epoch": 100.0,
1018
  "learning_rate": 6.25e-05,
1019
+ "loss": 0.6672,
1020
  "step": 200
1021
  },
1022
  {
1023
  "epoch": 100.0,
1024
+ "eval_accuracy": 0.8142857142857143,
1025
+ "eval_loss": 0.5772114992141724,
1026
+ "eval_runtime": 0.6485,
1027
+ "eval_samples_per_second": 107.942,
1028
+ "eval_steps_per_second": 3.084,
1029
  "step": 200
1030
  },
1031
  {
1032
  "epoch": 101.0,
1033
+ "eval_accuracy": 0.8142857142857143,
1034
+ "eval_loss": 0.5762485265731812,
1035
+ "eval_runtime": 0.632,
1036
+ "eval_samples_per_second": 110.757,
1037
+ "eval_steps_per_second": 3.164,
1038
  "step": 202
1039
  },
1040
  {
1041
  "epoch": 102.0,
1042
+ "eval_accuracy": 0.8142857142857143,
1043
+ "eval_loss": 0.5753609538078308,
1044
+ "eval_runtime": 0.8156,
1045
+ "eval_samples_per_second": 85.824,
1046
+ "eval_steps_per_second": 2.452,
1047
  "step": 204
1048
  },
1049
  {
1050
  "epoch": 103.0,
1051
+ "eval_accuracy": 0.8142857142857143,
1052
+ "eval_loss": 0.5745884776115417,
1053
+ "eval_runtime": 0.641,
1054
+ "eval_samples_per_second": 109.197,
1055
+ "eval_steps_per_second": 3.12,
1056
  "step": 206
1057
  },
1058
  {
1059
  "epoch": 104.0,
1060
+ "eval_accuracy": 0.8142857142857143,
1061
+ "eval_loss": 0.573843777179718,
1062
+ "eval_runtime": 0.64,
1063
+ "eval_samples_per_second": 109.374,
1064
+ "eval_steps_per_second": 3.125,
1065
  "step": 208
1066
  },
1067
  {
1068
  "epoch": 105.0,
1069
  "learning_rate": 5.9375e-05,
1070
+ "loss": 0.6569,
1071
  "step": 210
1072
  },
1073
  {
1074
  "epoch": 105.0,
1075
+ "eval_accuracy": 0.8142857142857143,
1076
+ "eval_loss": 0.5731338858604431,
1077
+ "eval_runtime": 0.8165,
1078
+ "eval_samples_per_second": 85.735,
1079
+ "eval_steps_per_second": 2.45,
1080
  "step": 210
1081
  },
1082
  {
1083
  "epoch": 106.0,
1084
+ "eval_accuracy": 0.8142857142857143,
1085
+ "eval_loss": 0.5723776817321777,
1086
+ "eval_runtime": 0.6448,
1087
+ "eval_samples_per_second": 108.558,
1088
+ "eval_steps_per_second": 3.102,
1089
  "step": 212
1090
  },
1091
  {
1092
  "epoch": 107.0,
1093
+ "eval_accuracy": 0.8142857142857143,
1094
+ "eval_loss": 0.5715596675872803,
1095
+ "eval_runtime": 0.6552,
1096
+ "eval_samples_per_second": 106.837,
1097
+ "eval_steps_per_second": 3.052,
1098
  "step": 214
1099
  },
1100
  {
1101
  "epoch": 108.0,
1102
+ "eval_accuracy": 0.8142857142857143,
1103
+ "eval_loss": 0.5707866549491882,
1104
+ "eval_runtime": 0.7955,
1105
+ "eval_samples_per_second": 87.991,
1106
+ "eval_steps_per_second": 2.514,
1107
  "step": 216
1108
  },
1109
  {
1110
  "epoch": 109.0,
1111
+ "eval_accuracy": 0.8142857142857143,
1112
+ "eval_loss": 0.570074737071991,
1113
+ "eval_runtime": 0.6364,
1114
+ "eval_samples_per_second": 109.993,
1115
+ "eval_steps_per_second": 3.143,
1116
  "step": 218
1117
  },
1118
  {
1119
  "epoch": 110.0,
1120
  "learning_rate": 5.6250000000000005e-05,
1121
+ "loss": 0.6748,
1122
  "step": 220
1123
  },
1124
  {
1125
  "epoch": 110.0,
1126
+ "eval_accuracy": 0.8142857142857143,
1127
+ "eval_loss": 0.5693923830986023,
1128
+ "eval_runtime": 0.6356,
1129
+ "eval_samples_per_second": 110.138,
1130
+ "eval_steps_per_second": 3.147,
1131
  "step": 220
1132
  },
1133
  {
1134
  "epoch": 111.0,
1135
+ "eval_accuracy": 0.8142857142857143,
1136
+ "eval_loss": 0.5686994791030884,
1137
+ "eval_runtime": 0.8207,
1138
+ "eval_samples_per_second": 85.298,
1139
+ "eval_steps_per_second": 2.437,
1140
  "step": 222
1141
  },
1142
  {
1143
  "epoch": 112.0,
1144
+ "eval_accuracy": 0.8142857142857143,
1145
+ "eval_loss": 0.5680269598960876,
1146
+ "eval_runtime": 0.6498,
1147
+ "eval_samples_per_second": 107.722,
1148
+ "eval_steps_per_second": 3.078,
1149
  "step": 224
1150
  },
1151
  {
1152
  "epoch": 113.0,
1153
+ "eval_accuracy": 0.8142857142857143,
1154
+ "eval_loss": 0.5673888325691223,
1155
+ "eval_runtime": 0.6711,
1156
+ "eval_samples_per_second": 104.299,
1157
+ "eval_steps_per_second": 2.98,
1158
  "step": 226
1159
  },
1160
  {
1161
  "epoch": 114.0,
1162
+ "eval_accuracy": 0.8142857142857143,
1163
+ "eval_loss": 0.5668244957923889,
1164
+ "eval_runtime": 0.8599,
1165
+ "eval_samples_per_second": 81.4,
1166
+ "eval_steps_per_second": 2.326,
1167
  "step": 228
1168
  },
1169
  {
1170
  "epoch": 115.0,
1171
  "learning_rate": 5.3125000000000004e-05,
1172
+ "loss": 0.6388,
1173
  "step": 230
1174
  },
1175
  {
1176
  "epoch": 115.0,
1177
+ "eval_accuracy": 0.8142857142857143,
1178
+ "eval_loss": 0.56624835729599,
1179
+ "eval_runtime": 0.639,
1180
+ "eval_samples_per_second": 109.549,
1181
+ "eval_steps_per_second": 3.13,
1182
  "step": 230
1183
  },
1184
  {
1185
  "epoch": 116.0,
1186
+ "eval_accuracy": 0.8142857142857143,
1187
+ "eval_loss": 0.5657045841217041,
1188
+ "eval_runtime": 0.6452,
1189
+ "eval_samples_per_second": 108.495,
1190
+ "eval_steps_per_second": 3.1,
1191
  "step": 232
1192
  },
1193
  {
1194
  "epoch": 117.0,
1195
+ "eval_accuracy": 0.8142857142857143,
1196
+ "eval_loss": 0.5652384757995605,
1197
+ "eval_runtime": 0.8319,
1198
+ "eval_samples_per_second": 84.146,
1199
+ "eval_steps_per_second": 2.404,
1200
  "step": 234
1201
  },
1202
  {
1203
  "epoch": 118.0,
1204
+ "eval_accuracy": 0.8285714285714286,
1205
+ "eval_loss": 0.5648259520530701,
1206
+ "eval_runtime": 0.6475,
1207
+ "eval_samples_per_second": 108.103,
1208
+ "eval_steps_per_second": 3.089,
1209
  "step": 236
1210
  },
1211
  {
1212
  "epoch": 119.0,
1213
+ "eval_accuracy": 0.8285714285714286,
1214
+ "eval_loss": 0.5644696354866028,
1215
+ "eval_runtime": 0.6531,
1216
+ "eval_samples_per_second": 107.184,
1217
+ "eval_steps_per_second": 3.062,
1218
  "step": 238
1219
  },
1220
  {
1221
  "epoch": 120.0,
1222
  "learning_rate": 5e-05,
1223
+ "loss": 0.6551,
1224
  "step": 240
1225
  },
1226
  {
1227
  "epoch": 120.0,
1228
+ "eval_accuracy": 0.8285714285714286,
1229
+ "eval_loss": 0.5640624165534973,
1230
+ "eval_runtime": 0.8277,
1231
+ "eval_samples_per_second": 84.574,
1232
+ "eval_steps_per_second": 2.416,
1233
  "step": 240
1234
  },
1235
  {
1236
  "epoch": 121.0,
1237
+ "eval_accuracy": 0.8142857142857143,
1238
+ "eval_loss": 0.5636399388313293,
1239
+ "eval_runtime": 0.6478,
1240
+ "eval_samples_per_second": 108.056,
1241
+ "eval_steps_per_second": 3.087,
1242
  "step": 242
1243
  },
1244
  {
1245
  "epoch": 122.0,
1246
+ "eval_accuracy": 0.8142857142857143,
1247
+ "eval_loss": 0.563149094581604,
1248
+ "eval_runtime": 0.6338,
1249
+ "eval_samples_per_second": 110.453,
1250
+ "eval_steps_per_second": 3.156,
1251
  "step": 244
1252
  },
1253
  {
1254
  "epoch": 123.0,
1255
+ "eval_accuracy": 0.8142857142857143,
1256
+ "eval_loss": 0.5627174973487854,
1257
+ "eval_runtime": 0.8111,
1258
+ "eval_samples_per_second": 86.304,
1259
+ "eval_steps_per_second": 2.466,
1260
  "step": 246
1261
  },
1262
  {
1263
  "epoch": 124.0,
1264
+ "eval_accuracy": 0.8142857142857143,
1265
+ "eval_loss": 0.562400221824646,
1266
+ "eval_runtime": 0.649,
1267
+ "eval_samples_per_second": 107.86,
1268
+ "eval_steps_per_second": 3.082,
1269
  "step": 248
1270
  },
1271
  {
1272
  "epoch": 125.0,
1273
  "learning_rate": 4.6875e-05,
1274
+ "loss": 0.6452,
1275
  "step": 250
1276
  },
1277
  {
1278
  "epoch": 125.0,
1279
+ "eval_accuracy": 0.8142857142857143,
1280
+ "eval_loss": 0.5621911287307739,
1281
+ "eval_runtime": 0.6465,
1282
+ "eval_samples_per_second": 108.279,
1283
+ "eval_steps_per_second": 3.094,
1284
  "step": 250
1285
  },
1286
  {
1287
  "epoch": 126.0,
1288
+ "eval_accuracy": 0.8142857142857143,
1289
+ "eval_loss": 0.5620221495628357,
1290
+ "eval_runtime": 0.827,
1291
+ "eval_samples_per_second": 84.639,
1292
+ "eval_steps_per_second": 2.418,
1293
  "step": 252
1294
  },
1295
  {
1296
  "epoch": 127.0,
1297
+ "eval_accuracy": 0.8142857142857143,
1298
+ "eval_loss": 0.5617978572845459,
1299
+ "eval_runtime": 0.6221,
1300
+ "eval_samples_per_second": 112.525,
1301
+ "eval_steps_per_second": 3.215,
1302
  "step": 254
1303
  },
1304
  {
1305
  "epoch": 128.0,
1306
+ "eval_accuracy": 0.8142857142857143,
1307
+ "eval_loss": 0.5614616870880127,
1308
+ "eval_runtime": 0.6384,
1309
+ "eval_samples_per_second": 109.65,
1310
+ "eval_steps_per_second": 3.133,
1311
  "step": 256
1312
  },
1313
  {
1314
  "epoch": 129.0,
1315
+ "eval_accuracy": 0.8142857142857143,
1316
+ "eval_loss": 0.5612771511077881,
1317
+ "eval_runtime": 0.8188,
1318
+ "eval_samples_per_second": 85.487,
1319
+ "eval_steps_per_second": 2.442,
1320
  "step": 258
1321
  },
1322
  {
1323
  "epoch": 130.0,
1324
  "learning_rate": 4.375e-05,
1325
+ "loss": 0.645,
1326
  "step": 260
1327
  },
1328
  {
1329
  "epoch": 130.0,
1330
+ "eval_accuracy": 0.8142857142857143,
1331
+ "eval_loss": 0.5610944032669067,
1332
+ "eval_runtime": 0.6274,
1333
+ "eval_samples_per_second": 111.57,
1334
+ "eval_steps_per_second": 3.188,
1335
  "step": 260
1336
  },
1337
  {
1338
  "epoch": 131.0,
1339
+ "eval_accuracy": 0.8142857142857143,
1340
+ "eval_loss": 0.5608205199241638,
1341
+ "eval_runtime": 0.6351,
1342
+ "eval_samples_per_second": 110.223,
1343
+ "eval_steps_per_second": 3.149,
1344
  "step": 262
1345
  },
1346
  {
1347
  "epoch": 132.0,
1348
+ "eval_accuracy": 0.8142857142857143,
1349
+ "eval_loss": 0.5606086254119873,
1350
+ "eval_runtime": 0.8451,
1351
+ "eval_samples_per_second": 82.832,
1352
+ "eval_steps_per_second": 2.367,
1353
  "step": 264
1354
  },
1355
  {
1356
  "epoch": 133.0,
1357
+ "eval_accuracy": 0.8142857142857143,
1358
+ "eval_loss": 0.5602155923843384,
1359
+ "eval_runtime": 0.6314,
1360
+ "eval_samples_per_second": 110.864,
1361
+ "eval_steps_per_second": 3.168,
1362
  "step": 266
1363
  },
1364
  {
1365
  "epoch": 134.0,
1366
+ "eval_accuracy": 0.8142857142857143,
1367
+ "eval_loss": 0.5596277713775635,
1368
+ "eval_runtime": 0.6347,
1369
+ "eval_samples_per_second": 110.28,
1370
+ "eval_steps_per_second": 3.151,
1371
  "step": 268
1372
  },
1373
  {
1374
  "epoch": 135.0,
1375
  "learning_rate": 4.0625000000000005e-05,
1376
+ "loss": 0.629,
1377
  "step": 270
1378
  },
1379
  {
1380
  "epoch": 135.0,
1381
+ "eval_accuracy": 0.8142857142857143,
1382
+ "eval_loss": 0.558956503868103,
1383
+ "eval_runtime": 0.8112,
1384
+ "eval_samples_per_second": 86.289,
1385
+ "eval_steps_per_second": 2.465,
1386
  "step": 270
1387
  },
1388
  {
1389
  "epoch": 136.0,
1390
+ "eval_accuracy": 0.8142857142857143,
1391
+ "eval_loss": 0.5582412481307983,
1392
+ "eval_runtime": 0.6394,
1393
+ "eval_samples_per_second": 109.485,
1394
+ "eval_steps_per_second": 3.128,
1395
  "step": 272
1396
  },
1397
  {
1398
  "epoch": 137.0,
1399
+ "eval_accuracy": 0.8142857142857143,
1400
+ "eval_loss": 0.5576009750366211,
1401
+ "eval_runtime": 0.6293,
1402
+ "eval_samples_per_second": 111.232,
1403
+ "eval_steps_per_second": 3.178,
1404
  "step": 274
1405
  },
1406
  {
1407
  "epoch": 138.0,
1408
+ "eval_accuracy": 0.8142857142857143,
1409
+ "eval_loss": 0.5571399927139282,
1410
+ "eval_runtime": 0.8108,
1411
+ "eval_samples_per_second": 86.33,
1412
+ "eval_steps_per_second": 2.467,
1413
  "step": 276
1414
  },
1415
  {
1416
  "epoch": 139.0,
1417
+ "eval_accuracy": 0.8142857142857143,
1418
+ "eval_loss": 0.5567926168441772,
1419
+ "eval_runtime": 0.6262,
1420
+ "eval_samples_per_second": 111.788,
1421
+ "eval_steps_per_second": 3.194,
1422
  "step": 278
1423
  },
1424
  {
1425
  "epoch": 140.0,
1426
  "learning_rate": 3.7500000000000003e-05,
1427
+ "loss": 0.7126,
1428
  "step": 280
1429
  },
1430
  {
1431
  "epoch": 140.0,
1432
+ "eval_accuracy": 0.8142857142857143,
1433
+ "eval_loss": 0.556534469127655,
1434
+ "eval_runtime": 0.6392,
1435
+ "eval_samples_per_second": 109.51,
1436
+ "eval_steps_per_second": 3.129,
1437
  "step": 280
1438
  },
1439
  {
1440
  "epoch": 141.0,
1441
+ "eval_accuracy": 0.8142857142857143,
1442
+ "eval_loss": 0.5563255548477173,
1443
+ "eval_runtime": 0.8384,
1444
+ "eval_samples_per_second": 83.488,
1445
+ "eval_steps_per_second": 2.385,
1446
  "step": 282
1447
  },
1448
  {
1449
  "epoch": 142.0,
1450
+ "eval_accuracy": 0.8142857142857143,
1451
+ "eval_loss": 0.5561147928237915,
1452
  "eval_runtime": 0.6382,
1453
+ "eval_samples_per_second": 109.687,
1454
  "eval_steps_per_second": 3.134,
1455
  "step": 284
1456
  },
1457
  {
1458
  "epoch": 143.0,
1459
+ "eval_accuracy": 0.8142857142857143,
1460
+ "eval_loss": 0.5559044480323792,
1461
+ "eval_runtime": 0.647,
1462
+ "eval_samples_per_second": 108.191,
1463
+ "eval_steps_per_second": 3.091,
1464
  "step": 286
1465
  },
1466
  {
1467
  "epoch": 144.0,
1468
+ "eval_accuracy": 0.8142857142857143,
1469
+ "eval_loss": 0.555549681186676,
1470
+ "eval_runtime": 0.8257,
1471
+ "eval_samples_per_second": 84.779,
1472
+ "eval_steps_per_second": 2.422,
1473
  "step": 288
1474
  },
1475
  {
1476
  "epoch": 145.0,
1477
  "learning_rate": 3.4375e-05,
1478
+ "loss": 0.669,
1479
  "step": 290
1480
  },
1481
  {
1482
  "epoch": 145.0,
1483
+ "eval_accuracy": 0.8142857142857143,
1484
+ "eval_loss": 0.5551820397377014,
1485
+ "eval_runtime": 0.6311,
1486
+ "eval_samples_per_second": 110.921,
1487
+ "eval_steps_per_second": 3.169,
1488
  "step": 290
1489
  },
1490
  {
1491
  "epoch": 146.0,
1492
+ "eval_accuracy": 0.8142857142857143,
1493
+ "eval_loss": 0.55474454164505,
1494
+ "eval_runtime": 0.653,
1495
+ "eval_samples_per_second": 107.193,
1496
+ "eval_steps_per_second": 3.063,
1497
  "step": 292
1498
  },
1499
  {
1500
  "epoch": 147.0,
1501
+ "eval_accuracy": 0.8142857142857143,
1502
+ "eval_loss": 0.5542392134666443,
1503
+ "eval_runtime": 0.8093,
1504
+ "eval_samples_per_second": 86.493,
1505
+ "eval_steps_per_second": 2.471,
1506
  "step": 294
1507
  },
1508
  {
1509
  "epoch": 148.0,
1510
+ "eval_accuracy": 0.8142857142857143,
1511
+ "eval_loss": 0.5537976622581482,
1512
+ "eval_runtime": 0.6472,
1513
+ "eval_samples_per_second": 108.161,
1514
+ "eval_steps_per_second": 3.09,
1515
  "step": 296
1516
  },
1517
  {
1518
  "epoch": 149.0,
1519
+ "eval_accuracy": 0.8142857142857143,
1520
+ "eval_loss": 0.5534089803695679,
1521
+ "eval_runtime": 0.6283,
1522
+ "eval_samples_per_second": 111.403,
1523
+ "eval_steps_per_second": 3.183,
1524
  "step": 298
1525
  },
1526
  {
1527
  "epoch": 150.0,
1528
  "learning_rate": 3.125e-05,
1529
+ "loss": 0.6481,
1530
  "step": 300
1531
  },
1532
  {
1533
  "epoch": 150.0,
1534
+ "eval_accuracy": 0.8142857142857143,
1535
+ "eval_loss": 0.5530030727386475,
1536
+ "eval_runtime": 0.8284,
1537
+ "eval_samples_per_second": 84.505,
1538
+ "eval_steps_per_second": 2.414,
1539
  "step": 300
1540
  },
1541
  {
1542
  "epoch": 151.0,
1543
+ "eval_accuracy": 0.8142857142857143,
1544
+ "eval_loss": 0.5526387095451355,
1545
+ "eval_runtime": 0.6358,
1546
+ "eval_samples_per_second": 110.105,
1547
+ "eval_steps_per_second": 3.146,
1548
  "step": 302
1549
  },
1550
  {
1551
  "epoch": 152.0,
1552
+ "eval_accuracy": 0.8142857142857143,
1553
+ "eval_loss": 0.5522416830062866,
1554
+ "eval_runtime": 0.6285,
1555
+ "eval_samples_per_second": 111.384,
1556
+ "eval_steps_per_second": 3.182,
1557
  "step": 304
1558
  },
1559
  {
1560
  "epoch": 153.0,
1561
+ "eval_accuracy": 0.8142857142857143,
1562
+ "eval_loss": 0.5518553853034973,
1563
+ "eval_runtime": 0.7207,
1564
+ "eval_samples_per_second": 97.122,
1565
+ "eval_steps_per_second": 2.775,
1566
  "step": 306
1567
  },
1568
  {
1569
  "epoch": 154.0,
1570
+ "eval_accuracy": 0.8142857142857143,
1571
+ "eval_loss": 0.5514690279960632,
1572
+ "eval_runtime": 0.6359,
1573
+ "eval_samples_per_second": 110.085,
1574
+ "eval_steps_per_second": 3.145,
1575
  "step": 308
1576
  },
1577
  {
1578
  "epoch": 155.0,
1579
  "learning_rate": 2.8125000000000003e-05,
1580
+ "loss": 0.6211,
1581
  "step": 310
1582
  },
1583
  {
1584
  "epoch": 155.0,
1585
  "eval_accuracy": 0.8142857142857143,
1586
+ "eval_loss": 0.5510378479957581,
1587
+ "eval_runtime": 0.636,
1588
+ "eval_samples_per_second": 110.056,
1589
+ "eval_steps_per_second": 3.144,
1590
  "step": 310
1591
  },
1592
  {
1593
  "epoch": 156.0,
1594
  "eval_accuracy": 0.8142857142857143,
1595
+ "eval_loss": 0.5506120920181274,
1596
+ "eval_runtime": 0.6297,
1597
+ "eval_samples_per_second": 111.157,
1598
+ "eval_steps_per_second": 3.176,
1599
  "step": 312
1600
  },
1601
  {
1602
  "epoch": 157.0,
1603
  "eval_accuracy": 0.8142857142857143,
1604
+ "eval_loss": 0.5502142906188965,
1605
+ "eval_runtime": 0.6795,
1606
+ "eval_samples_per_second": 103.02,
1607
+ "eval_steps_per_second": 2.943,
1608
  "step": 314
1609
  },
1610
  {
1611
  "epoch": 158.0,
1612
  "eval_accuracy": 0.8142857142857143,
1613
+ "eval_loss": 0.5498998761177063,
1614
+ "eval_runtime": 0.6321,
1615
+ "eval_samples_per_second": 110.745,
1616
+ "eval_steps_per_second": 3.164,
1617
  "step": 316
1618
  },
1619
  {
1620
  "epoch": 159.0,
1621
  "eval_accuracy": 0.8142857142857143,
1622
+ "eval_loss": 0.5495581030845642,
1623
+ "eval_runtime": 0.6392,
1624
+ "eval_samples_per_second": 109.52,
1625
+ "eval_steps_per_second": 3.129,
1626
  "step": 318
1627
  },
1628
  {
1629
  "epoch": 160.0,
1630
  "learning_rate": 2.5e-05,
1631
+ "loss": 0.6458,
1632
  "step": 320
1633
  },
1634
  {
1635
  "epoch": 160.0,
1636
+ "eval_accuracy": 0.8285714285714286,
1637
+ "eval_loss": 0.5492438077926636,
1638
+ "eval_runtime": 0.8024,
1639
+ "eval_samples_per_second": 87.237,
1640
+ "eval_steps_per_second": 2.492,
1641
  "step": 320
1642
  },
1643
  {
1644
  "epoch": 161.0,
1645
  "eval_accuracy": 0.8142857142857143,
1646
+ "eval_loss": 0.5489979982376099,
1647
+ "eval_runtime": 0.6575,
1648
+ "eval_samples_per_second": 106.471,
1649
+ "eval_steps_per_second": 3.042,
1650
  "step": 322
1651
  },
1652
  {
1653
  "epoch": 162.0,
1654
  "eval_accuracy": 0.8142857142857143,
1655
+ "eval_loss": 0.5487762689590454,
1656
+ "eval_runtime": 0.6515,
1657
+ "eval_samples_per_second": 107.45,
1658
+ "eval_steps_per_second": 3.07,
1659
  "step": 324
1660
  },
1661
  {
1662
  "epoch": 163.0,
1663
  "eval_accuracy": 0.8142857142857143,
1664
+ "eval_loss": 0.548595130443573,
1665
+ "eval_runtime": 0.8069,
1666
+ "eval_samples_per_second": 86.752,
1667
+ "eval_steps_per_second": 2.479,
1668
  "step": 326
1669
  },
1670
  {
1671
  "epoch": 164.0,
1672
  "eval_accuracy": 0.8142857142857143,
1673
+ "eval_loss": 0.5483713746070862,
1674
+ "eval_runtime": 0.6447,
1675
+ "eval_samples_per_second": 108.582,
1676
+ "eval_steps_per_second": 3.102,
1677
  "step": 328
1678
  },
1679
  {
1680
  "epoch": 165.0,
1681
  "learning_rate": 2.1875e-05,
1682
+ "loss": 0.6317,
1683
  "step": 330
1684
  },
1685
  {
1686
  "epoch": 165.0,
1687
+ "eval_accuracy": 0.8142857142857143,
1688
+ "eval_loss": 0.5481104254722595,
1689
+ "eval_runtime": 0.6486,
1690
+ "eval_samples_per_second": 107.926,
1691
+ "eval_steps_per_second": 3.084,
1692
  "step": 330
1693
  },
1694
  {
1695
  "epoch": 166.0,
1696
+ "eval_accuracy": 0.8285714285714286,
1697
+ "eval_loss": 0.5478586554527283,
1698
+ "eval_runtime": 0.8209,
1699
+ "eval_samples_per_second": 85.268,
1700
+ "eval_steps_per_second": 2.436,
1701
  "step": 332
1702
  },
1703
  {
1704
  "epoch": 167.0,
1705
+ "eval_accuracy": 0.8285714285714286,
1706
+ "eval_loss": 0.5475797653198242,
1707
+ "eval_runtime": 0.7417,
1708
+ "eval_samples_per_second": 94.381,
1709
+ "eval_steps_per_second": 2.697,
1710
  "step": 334
1711
  },
1712
  {
1713
  "epoch": 168.0,
1714
+ "eval_accuracy": 0.8285714285714286,
1715
+ "eval_loss": 0.5473471879959106,
1716
+ "eval_runtime": 0.6501,
1717
+ "eval_samples_per_second": 107.671,
1718
+ "eval_steps_per_second": 3.076,
1719
  "step": 336
1720
  },
1721
  {
1722
  "epoch": 169.0,
1723
+ "eval_accuracy": 0.8285714285714286,
1724
+ "eval_loss": 0.5471236705780029,
1725
+ "eval_runtime": 0.7944,
1726
+ "eval_samples_per_second": 88.115,
1727
+ "eval_steps_per_second": 2.518,
1728
  "step": 338
1729
  },
1730
  {
1731
  "epoch": 170.0,
1732
  "learning_rate": 1.8750000000000002e-05,
1733
+ "loss": 0.6154,
1734
  "step": 340
1735
  },
1736
  {
1737
  "epoch": 170.0,
1738
+ "eval_accuracy": 0.8285714285714286,
1739
+ "eval_loss": 0.5469514727592468,
1740
+ "eval_runtime": 0.6378,
1741
+ "eval_samples_per_second": 109.76,
1742
+ "eval_steps_per_second": 3.136,
1743
  "step": 340
1744
  },
1745
  {
1746
  "epoch": 171.0,
1747
+ "eval_accuracy": 0.8285714285714286,
1748
+ "eval_loss": 0.5467889308929443,
1749
+ "eval_runtime": 0.6433,
1750
+ "eval_samples_per_second": 108.819,
1751
+ "eval_steps_per_second": 3.109,
1752
  "step": 342
1753
  },
1754
  {
1755
  "epoch": 172.0,
1756
+ "eval_accuracy": 0.8285714285714286,
1757
+ "eval_loss": 0.5466357469558716,
1758
+ "eval_runtime": 0.8146,
1759
+ "eval_samples_per_second": 85.93,
1760
+ "eval_steps_per_second": 2.455,
1761
  "step": 344
1762
  },
1763
  {
1764
  "epoch": 173.0,
1765
+ "eval_accuracy": 0.8285714285714286,
1766
+ "eval_loss": 0.5464411973953247,
1767
+ "eval_runtime": 0.6826,
1768
+ "eval_samples_per_second": 102.554,
1769
+ "eval_steps_per_second": 2.93,
1770
  "step": 346
1771
  },
1772
  {
1773
  "epoch": 174.0,
1774
+ "eval_accuracy": 0.8285714285714286,
1775
+ "eval_loss": 0.5462457537651062,
1776
+ "eval_runtime": 0.6413,
1777
+ "eval_samples_per_second": 109.146,
1778
+ "eval_steps_per_second": 3.118,
1779
  "step": 348
1780
  },
1781
  {
1782
  "epoch": 175.0,
1783
  "learning_rate": 1.5625e-05,
1784
+ "loss": 0.6323,
1785
  "step": 350
1786
  },
1787
  {
1788
  "epoch": 175.0,
1789
+ "eval_accuracy": 0.8285714285714286,
1790
+ "eval_loss": 0.5460384488105774,
1791
+ "eval_runtime": 0.8055,
1792
+ "eval_samples_per_second": 86.906,
1793
+ "eval_steps_per_second": 2.483,
1794
  "step": 350
1795
  },
1796
  {
1797
  "epoch": 176.0,
1798
+ "eval_accuracy": 0.8285714285714286,
1799
+ "eval_loss": 0.545864999294281,
1800
+ "eval_runtime": 0.635,
1801
+ "eval_samples_per_second": 110.23,
1802
+ "eval_steps_per_second": 3.149,
1803
  "step": 352
1804
  },
1805
  {
1806
  "epoch": 177.0,
1807
+ "eval_accuracy": 0.8285714285714286,
1808
+ "eval_loss": 0.54571932554245,
1809
+ "eval_runtime": 0.6362,
1810
+ "eval_samples_per_second": 110.035,
1811
+ "eval_steps_per_second": 3.144,
1812
  "step": 354
1813
  },
1814
  {
1815
  "epoch": 178.0,
1816
+ "eval_accuracy": 0.8285714285714286,
1817
+ "eval_loss": 0.5455992817878723,
1818
+ "eval_runtime": 0.8155,
1819
+ "eval_samples_per_second": 85.839,
1820
+ "eval_steps_per_second": 2.453,
1821
  "step": 356
1822
  },
1823
  {
1824
  "epoch": 179.0,
1825
+ "eval_accuracy": 0.8285714285714286,
1826
+ "eval_loss": 0.5454698801040649,
1827
+ "eval_runtime": 0.6543,
1828
+ "eval_samples_per_second": 106.992,
1829
+ "eval_steps_per_second": 3.057,
1830
  "step": 358
1831
  },
1832
  {
1833
  "epoch": 180.0,
1834
  "learning_rate": 1.25e-05,
1835
+ "loss": 0.6331,
1836
  "step": 360
1837
  },
1838
  {
1839
  "epoch": 180.0,
1840
+ "eval_accuracy": 0.8285714285714286,
1841
+ "eval_loss": 0.5453290343284607,
1842
+ "eval_runtime": 0.6499,
1843
+ "eval_samples_per_second": 107.716,
1844
+ "eval_steps_per_second": 3.078,
1845
  "step": 360
1846
  },
1847
  {
1848
  "epoch": 181.0,
1849
+ "eval_accuracy": 0.8285714285714286,
1850
+ "eval_loss": 0.5451884865760803,
1851
+ "eval_runtime": 0.8265,
1852
+ "eval_samples_per_second": 84.691,
1853
+ "eval_steps_per_second": 2.42,
1854
  "step": 362
1855
  },
1856
  {
1857
  "epoch": 182.0,
1858
+ "eval_accuracy": 0.8285714285714286,
1859
+ "eval_loss": 0.5450613498687744,
1860
+ "eval_runtime": 0.6389,
1861
+ "eval_samples_per_second": 109.556,
1862
+ "eval_steps_per_second": 3.13,
1863
  "step": 364
1864
  },
1865
  {
1866
  "epoch": 183.0,
1867
+ "eval_accuracy": 0.8285714285714286,
1868
+ "eval_loss": 0.5449284911155701,
1869
+ "eval_runtime": 0.6467,
1870
+ "eval_samples_per_second": 108.243,
1871
+ "eval_steps_per_second": 3.093,
1872
  "step": 366
1873
  },
1874
  {
1875
  "epoch": 184.0,
1876
+ "eval_accuracy": 0.8285714285714286,
1877
+ "eval_loss": 0.5448177456855774,
1878
+ "eval_runtime": 0.825,
1879
+ "eval_samples_per_second": 84.846,
1880
+ "eval_steps_per_second": 2.424,
1881
  "step": 368
1882
  },
1883
  {
1884
  "epoch": 185.0,
1885
  "learning_rate": 9.375000000000001e-06,
1886
+ "loss": 0.6333,
1887
  "step": 370
1888
  },
1889
  {
1890
  "epoch": 185.0,
1891
+ "eval_accuracy": 0.8285714285714286,
1892
+ "eval_loss": 0.544733464717865,
1893
+ "eval_runtime": 0.6541,
1894
+ "eval_samples_per_second": 107.012,
1895
+ "eval_steps_per_second": 3.057,
1896
  "step": 370
1897
  },
1898
  {
1899
  "epoch": 186.0,
1900
+ "eval_accuracy": 0.8285714285714286,
1901
+ "eval_loss": 0.5446553230285645,
1902
+ "eval_runtime": 0.6491,
1903
+ "eval_samples_per_second": 107.838,
1904
+ "eval_steps_per_second": 3.081,
1905
  "step": 372
1906
  },
1907
  {
1908
  "epoch": 187.0,
1909
+ "eval_accuracy": 0.8285714285714286,
1910
+ "eval_loss": 0.5445802211761475,
1911
+ "eval_runtime": 0.8184,
1912
+ "eval_samples_per_second": 85.533,
1913
+ "eval_steps_per_second": 2.444,
1914
  "step": 374
1915
  },
1916
  {
1917
  "epoch": 188.0,
1918
+ "eval_accuracy": 0.8285714285714286,
1919
+ "eval_loss": 0.5445207357406616,
1920
+ "eval_runtime": 0.6378,
1921
+ "eval_samples_per_second": 109.754,
1922
+ "eval_steps_per_second": 3.136,
1923
  "step": 376
1924
  },
1925
  {
1926
  "epoch": 189.0,
1927
+ "eval_accuracy": 0.8285714285714286,
1928
+ "eval_loss": 0.5444640517234802,
1929
+ "eval_runtime": 0.6708,
1930
+ "eval_samples_per_second": 104.36,
1931
+ "eval_steps_per_second": 2.982,
1932
  "step": 378
1933
  },
1934
  {
1935
  "epoch": 190.0,
1936
  "learning_rate": 6.25e-06,
1937
+ "loss": 0.608,
1938
  "step": 380
1939
  },
1940
  {
1941
  "epoch": 190.0,
1942
+ "eval_accuracy": 0.8285714285714286,
1943
+ "eval_loss": 0.544407069683075,
1944
+ "eval_runtime": 0.8392,
1945
+ "eval_samples_per_second": 83.416,
1946
+ "eval_steps_per_second": 2.383,
1947
  "step": 380
1948
  },
1949
  {
1950
  "epoch": 191.0,
1951
+ "eval_accuracy": 0.8285714285714286,
1952
+ "eval_loss": 0.5443536043167114,
1953
+ "eval_runtime": 0.6405,
1954
+ "eval_samples_per_second": 109.293,
1955
+ "eval_steps_per_second": 3.123,
1956
  "step": 382
1957
  },
1958
  {
1959
  "epoch": 192.0,
1960
+ "eval_accuracy": 0.8285714285714286,
1961
+ "eval_loss": 0.5443087816238403,
1962
+ "eval_runtime": 0.6431,
1963
+ "eval_samples_per_second": 108.85,
1964
+ "eval_steps_per_second": 3.11,
1965
  "step": 384
1966
  },
1967
  {
1968
  "epoch": 193.0,
1969
+ "eval_accuracy": 0.8285714285714286,
1970
+ "eval_loss": 0.5442724823951721,
1971
+ "eval_runtime": 0.8311,
1972
+ "eval_samples_per_second": 84.221,
1973
+ "eval_steps_per_second": 2.406,
1974
  "step": 386
1975
  },
1976
  {
1977
  "epoch": 194.0,
1978
+ "eval_accuracy": 0.8285714285714286,
1979
+ "eval_loss": 0.5442416667938232,
1980
+ "eval_runtime": 0.6416,
1981
+ "eval_samples_per_second": 109.095,
1982
+ "eval_steps_per_second": 3.117,
1983
  "step": 388
1984
  },
1985
  {
1986
  "epoch": 195.0,
1987
  "learning_rate": 3.125e-06,
1988
+ "loss": 0.6155,
1989
  "step": 390
1990
  },
1991
  {
1992
  "epoch": 195.0,
1993
+ "eval_accuracy": 0.8285714285714286,
1994
+ "eval_loss": 0.5442100763320923,
1995
+ "eval_runtime": 0.6472,
1996
+ "eval_samples_per_second": 108.158,
1997
+ "eval_steps_per_second": 3.09,
1998
  "step": 390
1999
  },
2000
  {
2001
  "epoch": 196.0,
2002
+ "eval_accuracy": 0.8285714285714286,
2003
+ "eval_loss": 0.5441816449165344,
2004
+ "eval_runtime": 0.8234,
2005
+ "eval_samples_per_second": 85.016,
2006
+ "eval_steps_per_second": 2.429,
2007
  "step": 392
2008
  },
2009
  {
2010
  "epoch": 197.0,
2011
+ "eval_accuracy": 0.8285714285714286,
2012
+ "eval_loss": 0.5441582202911377,
2013
+ "eval_runtime": 0.6411,
2014
+ "eval_samples_per_second": 109.183,
2015
+ "eval_steps_per_second": 3.12,
2016
  "step": 394
2017
  },
2018
  {
2019
  "epoch": 198.0,
2020
+ "eval_accuracy": 0.8285714285714286,
2021
+ "eval_loss": 0.5441429615020752,
2022
+ "eval_runtime": 0.6367,
2023
+ "eval_samples_per_second": 109.941,
2024
+ "eval_steps_per_second": 3.141,
2025
  "step": 396
2026
  },
2027
  {
2028
  "epoch": 199.0,
2029
+ "eval_accuracy": 0.8285714285714286,
2030
+ "eval_loss": 0.5441319346427917,
2031
+ "eval_runtime": 0.8204,
2032
+ "eval_samples_per_second": 85.32,
2033
+ "eval_steps_per_second": 2.438,
2034
  "step": 398
2035
  },
2036
  {
2037
  "epoch": 200.0,
2038
  "learning_rate": 0.0,
2039
+ "loss": 0.6272,
2040
  "step": 400
2041
  },
2042
  {
2043
  "epoch": 200.0,
2044
+ "eval_accuracy": 0.8285714285714286,
2045
+ "eval_loss": 0.5441268086433411,
2046
+ "eval_runtime": 0.646,
2047
+ "eval_samples_per_second": 108.365,
2048
+ "eval_steps_per_second": 3.096,
2049
  "step": 400
2050
  },
2051
  {
2052
  "epoch": 200.0,
2053
  "step": 400,
2054
  "total_flos": 2.23710151698432e+18,
2055
+ "train_loss": 0.6791047298908234,
2056
+ "train_runtime": 1022.1437,
2057
+ "train_samples_per_second": 88.05,
2058
+ "train_steps_per_second": 0.391
2059
  }
2060
  ],
2061
  "logging_steps": 10,