Jeska commited on
Commit
ddd6059
1 Parent(s): b2fdbe5

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 15.0,
3
  "eval_accuracy": 0.9067641496658325,
4
- "eval_loss": 0.4963526129722595,
5
- "eval_runtime": 2.1572,
6
  "eval_samples": 1094,
7
- "eval_samples_per_second": 507.137,
8
- "eval_steps_per_second": 63.508,
9
- "train_loss": 0.7935866661746093,
10
- "train_runtime": 2196.4523,
11
  "train_samples": 10556,
12
- "train_samples_per_second": 72.089,
13
- "train_steps_per_second": 9.015
14
  }
 
1
  {
2
  "epoch": 15.0,
3
  "eval_accuracy": 0.9067641496658325,
4
+ "eval_loss": 0.7214756608009338,
5
+ "eval_runtime": 2.2644,
6
  "eval_samples": 1094,
7
+ "eval_samples_per_second": 483.122,
8
+ "eval_steps_per_second": 60.501,
9
+ "train_loss": 0.27693930435361286,
10
+ "train_runtime": 2167.3602,
11
  "train_samples": 10556,
12
+ "train_samples_per_second": 73.057,
13
+ "train_steps_per_second": 9.136
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 15.0,
3
  "eval_accuracy": 0.9067641496658325,
4
- "eval_loss": 0.4963526129722595,
5
- "eval_runtime": 2.1572,
6
  "eval_samples": 1094,
7
- "eval_samples_per_second": 507.137,
8
- "eval_steps_per_second": 63.508
9
  }
 
1
  {
2
  "epoch": 15.0,
3
  "eval_accuracy": 0.9067641496658325,
4
+ "eval_loss": 0.7214756608009338,
5
+ "eval_runtime": 2.2644,
6
  "eval_samples": 1094,
7
+ "eval_samples_per_second": 483.122,
8
+ "eval_steps_per_second": 60.501
9
  }
runs/Dec15_14-01-16_jbuhmann/events.out.tfevents.1639576890.jbuhmann.8432.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7df785663d705213bc744ff5983ed2c9022695b1f4e503dc22bb24ce438eae9
3
- size 27441
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92d25c72fc47aaef4b443c760e64e4303f9178d0da44308159abac76d4720070
3
+ size 27801
runs/Dec15_14-01-16_jbuhmann/events.out.tfevents.1639579061.jbuhmann.8432.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:455e4b3d9a8f6e4586a8f45e1a8e709443355b2036a315b24582c19a12277aed
3
+ size 369
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
- "train_loss": 0.7935866661746093,
4
- "train_runtime": 2196.4523,
5
  "train_samples": 10556,
6
- "train_samples_per_second": 72.089,
7
- "train_steps_per_second": 9.015
8
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "train_loss": 0.27693930435361286,
4
+ "train_runtime": 2167.3602,
5
  "train_samples": 10556,
6
+ "train_samples_per_second": 73.057,
7
+ "train_steps_per_second": 9.136
8
  }
trainer_state.json CHANGED
@@ -9,381 +9,381 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.38,
12
- "learning_rate": 9.747474747474748e-06,
13
- "loss": 4.718,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.76,
18
- "learning_rate": 9.494949494949497e-06,
19
- "loss": 4.0196,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.0,
24
- "eval_accuracy": 0.4232175648212433,
25
- "eval_loss": 3.169081211090088,
26
- "eval_runtime": 2.1415,
27
- "eval_samples_per_second": 510.858,
28
- "eval_steps_per_second": 63.974,
29
  "step": 1320
30
  },
31
  {
32
  "epoch": 1.14,
33
- "learning_rate": 9.242424242424244e-06,
34
- "loss": 3.4554,
35
  "step": 1500
36
  },
37
  {
38
  "epoch": 1.52,
39
- "learning_rate": 8.98989898989899e-06,
40
- "loss": 2.8904,
41
  "step": 2000
42
  },
43
  {
44
  "epoch": 1.89,
45
- "learning_rate": 8.737373737373738e-06,
46
- "loss": 2.5409,
47
  "step": 2500
48
  },
49
  {
50
  "epoch": 2.0,
51
- "eval_accuracy": 0.6435100436210632,
52
- "eval_loss": 2.04946231842041,
53
- "eval_runtime": 2.1238,
54
- "eval_samples_per_second": 515.114,
55
- "eval_steps_per_second": 64.507,
56
  "step": 2640
57
  },
58
  {
59
  "epoch": 2.27,
60
- "learning_rate": 8.484848484848486e-06,
61
- "loss": 2.1224,
62
  "step": 3000
63
  },
64
  {
65
  "epoch": 2.65,
66
- "learning_rate": 8.232323232323233e-06,
67
- "loss": 1.8396,
68
  "step": 3500
69
  },
70
  {
71
  "epoch": 3.0,
72
- "eval_accuracy": 0.7861060500144958,
73
- "eval_loss": 1.3566279411315918,
74
- "eval_runtime": 2.0996,
75
- "eval_samples_per_second": 521.061,
76
- "eval_steps_per_second": 65.252,
77
  "step": 3960
78
  },
79
  {
80
  "epoch": 3.03,
81
- "learning_rate": 7.97979797979798e-06,
82
- "loss": 1.5406,
83
  "step": 4000
84
  },
85
  {
86
  "epoch": 3.41,
87
- "learning_rate": 7.727272727272727e-06,
88
- "loss": 1.2519,
89
  "step": 4500
90
  },
91
  {
92
  "epoch": 3.79,
93
- "learning_rate": 7.474747474747476e-06,
94
- "loss": 1.0986,
95
  "step": 5000
96
  },
97
  {
98
  "epoch": 4.0,
99
- "eval_accuracy": 0.8555758595466614,
100
- "eval_loss": 0.9564884305000305,
101
- "eval_runtime": 2.1078,
102
- "eval_samples_per_second": 519.033,
103
- "eval_steps_per_second": 64.998,
104
  "step": 5280
105
  },
106
  {
107
  "epoch": 4.17,
108
- "learning_rate": 7.222222222222223e-06,
109
- "loss": 0.9206,
110
  "step": 5500
111
  },
112
  {
113
  "epoch": 4.55,
114
- "learning_rate": 6.969696969696971e-06,
115
- "loss": 0.7389,
116
  "step": 6000
117
  },
118
  {
119
  "epoch": 4.92,
120
- "learning_rate": 6.717171717171718e-06,
121
- "loss": 0.6711,
122
  "step": 6500
123
  },
124
  {
125
  "epoch": 5.0,
126
- "eval_accuracy": 0.880255937576294,
127
- "eval_loss": 0.7118942141532898,
128
- "eval_runtime": 2.0744,
129
- "eval_samples_per_second": 527.383,
130
- "eval_steps_per_second": 66.043,
131
  "step": 6600
132
  },
133
  {
134
  "epoch": 5.3,
135
- "learning_rate": 6.464646464646466e-06,
136
- "loss": 0.5107,
137
  "step": 7000
138
  },
139
  {
140
  "epoch": 5.68,
141
- "learning_rate": 6.212121212121213e-06,
142
- "loss": 0.4456,
143
  "step": 7500
144
  },
145
  {
146
  "epoch": 6.0,
147
- "eval_accuracy": 0.8912248611450195,
148
- "eval_loss": 0.5804843902587891,
149
- "eval_runtime": 2.0875,
150
- "eval_samples_per_second": 524.077,
151
- "eval_steps_per_second": 65.629,
152
  "step": 7920
153
  },
154
  {
155
  "epoch": 6.06,
156
- "learning_rate": 5.95959595959596e-06,
157
- "loss": 0.3836,
158
  "step": 8000
159
  },
160
  {
161
  "epoch": 6.44,
162
- "learning_rate": 5.7070707070707075e-06,
163
- "loss": 0.2848,
164
  "step": 8500
165
  },
166
  {
167
  "epoch": 6.82,
168
- "learning_rate": 5.4545454545454545e-06,
169
- "loss": 0.2563,
170
  "step": 9000
171
  },
172
  {
173
  "epoch": 7.0,
174
- "eval_accuracy": 0.8994515538215637,
175
- "eval_loss": 0.5123510956764221,
176
- "eval_runtime": 2.1263,
177
- "eval_samples_per_second": 514.504,
178
- "eval_steps_per_second": 64.431,
179
  "step": 9240
180
  },
181
  {
182
  "epoch": 7.2,
183
- "learning_rate": 5.202020202020202e-06,
184
- "loss": 0.2322,
185
  "step": 9500
186
  },
187
  {
188
  "epoch": 7.58,
189
- "learning_rate": 4.94949494949495e-06,
190
- "loss": 0.1742,
191
  "step": 10000
192
  },
193
  {
194
  "epoch": 7.95,
195
- "learning_rate": 4.696969696969698e-06,
196
- "loss": 0.1686,
197
  "step": 10500
198
  },
199
  {
200
  "epoch": 8.0,
201
- "eval_accuracy": 0.9021937847137451,
202
- "eval_loss": 0.49525973200798035,
203
- "eval_runtime": 2.0716,
204
- "eval_samples_per_second": 528.088,
205
- "eval_steps_per_second": 66.132,
206
  "step": 10560
207
  },
208
  {
209
  "epoch": 8.33,
210
- "learning_rate": 4.444444444444444e-06,
211
- "loss": 0.1313,
212
  "step": 11000
213
  },
214
  {
215
  "epoch": 8.71,
216
- "learning_rate": 4.191919191919192e-06,
217
- "loss": 0.1135,
218
  "step": 11500
219
  },
220
  {
221
  "epoch": 9.0,
222
- "eval_accuracy": 0.9021937847137451,
223
- "eval_loss": 0.486285001039505,
224
- "eval_runtime": 2.0803,
225
- "eval_samples_per_second": 525.893,
226
- "eval_steps_per_second": 65.857,
227
  "step": 11880
228
  },
229
  {
230
  "epoch": 9.09,
231
- "learning_rate": 3.93939393939394e-06,
232
- "loss": 0.1161,
233
  "step": 12000
234
  },
235
  {
236
  "epoch": 9.47,
237
- "learning_rate": 3.686868686868687e-06,
238
- "loss": 0.0839,
239
  "step": 12500
240
  },
241
  {
242
  "epoch": 9.85,
243
- "learning_rate": 3.4343434343434347e-06,
244
- "loss": 0.0846,
245
  "step": 13000
246
  },
247
  {
248
  "epoch": 10.0,
249
- "eval_accuracy": 0.9049360156059265,
250
- "eval_loss": 0.48929038643836975,
251
- "eval_runtime": 2.0752,
252
- "eval_samples_per_second": 527.187,
253
- "eval_steps_per_second": 66.019,
254
  "step": 13200
255
  },
256
  {
257
  "epoch": 10.23,
258
- "learning_rate": 3.181818181818182e-06,
259
- "loss": 0.0658,
260
  "step": 13500
261
  },
262
  {
263
  "epoch": 10.61,
264
- "learning_rate": 2.9292929292929295e-06,
265
- "loss": 0.0663,
266
  "step": 14000
267
  },
268
  {
269
  "epoch": 10.98,
270
- "learning_rate": 2.676767676767677e-06,
271
- "loss": 0.07,
272
  "step": 14500
273
  },
274
  {
275
  "epoch": 11.0,
276
- "eval_accuracy": 0.9076782464981079,
277
- "eval_loss": 0.485994428396225,
278
- "eval_runtime": 2.1075,
279
- "eval_samples_per_second": 519.1,
280
- "eval_steps_per_second": 65.006,
281
  "step": 14520
282
  },
283
  {
284
  "epoch": 11.36,
285
- "learning_rate": 2.4242424242424244e-06,
286
- "loss": 0.0489,
287
  "step": 15000
288
  },
289
  {
290
  "epoch": 11.74,
291
- "learning_rate": 2.171717171717172e-06,
292
- "loss": 0.0487,
293
  "step": 15500
294
  },
295
  {
296
  "epoch": 12.0,
297
- "eval_accuracy": 0.9058501124382019,
298
- "eval_loss": 0.4888700246810913,
299
- "eval_runtime": 2.0826,
300
- "eval_samples_per_second": 525.296,
301
- "eval_steps_per_second": 65.782,
302
  "step": 15840
303
  },
304
  {
305
  "epoch": 12.12,
306
- "learning_rate": 1.9191919191919192e-06,
307
- "loss": 0.049,
308
  "step": 16000
309
  },
310
  {
311
  "epoch": 12.5,
312
- "learning_rate": 1.6666666666666667e-06,
313
- "loss": 0.0476,
314
  "step": 16500
315
  },
316
  {
317
  "epoch": 12.88,
318
- "learning_rate": 1.4141414141414143e-06,
319
- "loss": 0.0342,
320
  "step": 17000
321
  },
322
  {
323
  "epoch": 13.0,
324
- "eval_accuracy": 0.9067641496658325,
325
- "eval_loss": 0.4926171898841858,
326
- "eval_runtime": 2.3107,
327
- "eval_samples_per_second": 473.444,
328
- "eval_steps_per_second": 59.289,
329
  "step": 17160
330
  },
331
  {
332
  "epoch": 13.26,
333
- "learning_rate": 1.1616161616161617e-06,
334
- "loss": 0.0392,
335
  "step": 17500
336
  },
337
  {
338
  "epoch": 13.64,
339
- "learning_rate": 9.090909090909091e-07,
340
- "loss": 0.0332,
341
  "step": 18000
342
  },
343
  {
344
  "epoch": 14.0,
345
- "eval_accuracy": 0.9067641496658325,
346
- "eval_loss": 0.495064914226532,
347
- "eval_runtime": 2.1748,
348
- "eval_samples_per_second": 503.039,
349
- "eval_steps_per_second": 62.995,
350
  "step": 18480
351
  },
352
  {
353
  "epoch": 14.02,
354
- "learning_rate": 6.565656565656567e-07,
355
- "loss": 0.0411,
356
  "step": 18500
357
  },
358
  {
359
  "epoch": 14.39,
360
- "learning_rate": 4.040404040404041e-07,
361
- "loss": 0.0368,
362
  "step": 19000
363
  },
364
  {
365
  "epoch": 14.77,
366
- "learning_rate": 1.5151515151515152e-07,
367
- "loss": 0.029,
368
  "step": 19500
369
  },
370
  {
371
  "epoch": 15.0,
372
  "eval_accuracy": 0.9067641496658325,
373
- "eval_loss": 0.4963526129722595,
374
- "eval_runtime": 2.0713,
375
- "eval_samples_per_second": 528.178,
376
- "eval_steps_per_second": 66.143,
377
  "step": 19800
378
  },
379
  {
380
  "epoch": 15.0,
381
  "step": 19800,
382
  "total_flos": 5215995096399360.0,
383
- "train_loss": 0.7935866661746093,
384
- "train_runtime": 2196.4523,
385
- "train_samples_per_second": 72.089,
386
- "train_steps_per_second": 9.015
387
  }
388
  ],
389
  "max_steps": 19800,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.38,
12
+ "learning_rate": 4.8737373737373736e-05,
13
+ "loss": 3.8927,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.76,
18
+ "learning_rate": 4.7474747474747476e-05,
19
+ "loss": 2.3719,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.0,
24
+ "eval_accuracy": 0.7787933945655823,
25
+ "eval_loss": 1.1852809190750122,
26
+ "eval_runtime": 2.0844,
27
+ "eval_samples_per_second": 524.845,
28
+ "eval_steps_per_second": 65.726,
29
  "step": 1320
30
  },
31
  {
32
  "epoch": 1.14,
33
+ "learning_rate": 4.621212121212121e-05,
34
+ "loss": 1.4046,
35
  "step": 1500
36
  },
37
  {
38
  "epoch": 1.52,
39
+ "learning_rate": 4.494949494949495e-05,
40
+ "loss": 0.7837,
41
  "step": 2000
42
  },
43
  {
44
  "epoch": 1.89,
45
+ "learning_rate": 4.368686868686869e-05,
46
+ "loss": 0.5941,
47
  "step": 2500
48
  },
49
  {
50
  "epoch": 2.0,
51
+ "eval_accuracy": 0.880255937576294,
52
+ "eval_loss": 0.5889360308647156,
53
+ "eval_runtime": 2.0716,
54
+ "eval_samples_per_second": 528.095,
55
+ "eval_steps_per_second": 66.133,
56
  "step": 2640
57
  },
58
  {
59
  "epoch": 2.27,
60
+ "learning_rate": 4.242424242424243e-05,
61
+ "loss": 0.3401,
62
  "step": 3000
63
  },
64
  {
65
  "epoch": 2.65,
66
+ "learning_rate": 4.116161616161616e-05,
67
+ "loss": 0.2557,
68
  "step": 3500
69
  },
70
  {
71
  "epoch": 3.0,
72
+ "eval_accuracy": 0.8866544961929321,
73
+ "eval_loss": 0.5768323540687561,
74
+ "eval_runtime": 2.1209,
75
+ "eval_samples_per_second": 515.813,
76
+ "eval_steps_per_second": 64.595,
77
  "step": 3960
78
  },
79
  {
80
  "epoch": 3.03,
81
+ "learning_rate": 3.98989898989899e-05,
82
+ "loss": 0.2031,
83
  "step": 4000
84
  },
85
  {
86
  "epoch": 3.41,
87
+ "learning_rate": 3.8636363636363636e-05,
88
+ "loss": 0.1193,
89
  "step": 4500
90
  },
91
  {
92
  "epoch": 3.79,
93
+ "learning_rate": 3.7373737373737376e-05,
94
+ "loss": 0.1178,
95
  "step": 5000
96
  },
97
  {
98
  "epoch": 4.0,
99
+ "eval_accuracy": 0.8811700344085693,
100
+ "eval_loss": 0.6243242621421814,
101
+ "eval_runtime": 2.1461,
102
+ "eval_samples_per_second": 509.767,
103
+ "eval_steps_per_second": 63.837,
104
  "step": 5280
105
  },
106
  {
107
  "epoch": 4.17,
108
+ "learning_rate": 3.611111111111111e-05,
109
+ "loss": 0.1101,
110
  "step": 5500
111
  },
112
  {
113
  "epoch": 4.55,
114
+ "learning_rate": 3.484848484848485e-05,
115
+ "loss": 0.078,
116
  "step": 6000
117
  },
118
  {
119
  "epoch": 4.92,
120
+ "learning_rate": 3.358585858585859e-05,
121
+ "loss": 0.0948,
122
  "step": 6500
123
  },
124
  {
125
  "epoch": 5.0,
126
+ "eval_accuracy": 0.8912248611450195,
127
+ "eval_loss": 0.650793194770813,
128
+ "eval_runtime": 2.0892,
129
+ "eval_samples_per_second": 523.641,
130
+ "eval_steps_per_second": 65.575,
131
  "step": 6600
132
  },
133
  {
134
  "epoch": 5.3,
135
+ "learning_rate": 3.232323232323233e-05,
136
+ "loss": 0.0352,
137
  "step": 7000
138
  },
139
  {
140
  "epoch": 5.68,
141
+ "learning_rate": 3.106060606060606e-05,
142
+ "loss": 0.079,
143
  "step": 7500
144
  },
145
  {
146
  "epoch": 6.0,
147
+ "eval_accuracy": 0.8930529952049255,
148
+ "eval_loss": 0.6763377785682678,
149
+ "eval_runtime": 2.0996,
150
+ "eval_samples_per_second": 521.052,
151
+ "eval_steps_per_second": 65.251,
152
  "step": 7920
153
  },
154
  {
155
  "epoch": 6.06,
156
+ "learning_rate": 2.9797979797979796e-05,
157
+ "loss": 0.054,
158
  "step": 8000
159
  },
160
  {
161
  "epoch": 6.44,
162
+ "learning_rate": 2.8535353535353536e-05,
163
+ "loss": 0.0388,
164
  "step": 8500
165
  },
166
  {
167
  "epoch": 6.82,
168
+ "learning_rate": 2.7272727272727273e-05,
169
+ "loss": 0.0413,
170
  "step": 9000
171
  },
172
  {
173
  "epoch": 7.0,
174
+ "eval_accuracy": 0.9021937847137451,
175
+ "eval_loss": 0.6991991400718689,
176
+ "eval_runtime": 2.1868,
177
+ "eval_samples_per_second": 500.277,
178
+ "eval_steps_per_second": 62.649,
179
  "step": 9240
180
  },
181
  {
182
  "epoch": 7.2,
183
+ "learning_rate": 2.6010101010101013e-05,
184
+ "loss": 0.046,
185
  "step": 9500
186
  },
187
  {
188
  "epoch": 7.58,
189
+ "learning_rate": 2.474747474747475e-05,
190
+ "loss": 0.0295,
191
  "step": 10000
192
  },
193
  {
194
  "epoch": 7.95,
195
+ "learning_rate": 2.3484848484848487e-05,
196
+ "loss": 0.0291,
197
  "step": 10500
198
  },
199
  {
200
  "epoch": 8.0,
201
+ "eval_accuracy": 0.8921389579772949,
202
+ "eval_loss": 0.7643230557441711,
203
+ "eval_runtime": 2.2005,
204
+ "eval_samples_per_second": 497.161,
205
+ "eval_steps_per_second": 62.259,
206
  "step": 10560
207
  },
208
  {
209
  "epoch": 8.33,
210
+ "learning_rate": 2.2222222222222223e-05,
211
+ "loss": 0.0236,
212
  "step": 11000
213
  },
214
  {
215
  "epoch": 8.71,
216
+ "learning_rate": 2.095959595959596e-05,
217
+ "loss": 0.032,
218
  "step": 11500
219
  },
220
  {
221
  "epoch": 9.0,
222
+ "eval_accuracy": 0.9113345742225647,
223
+ "eval_loss": 0.6661025285720825,
224
+ "eval_runtime": 2.0869,
225
+ "eval_samples_per_second": 524.224,
226
+ "eval_steps_per_second": 65.648,
227
  "step": 11880
228
  },
229
  {
230
  "epoch": 9.09,
231
+ "learning_rate": 1.9696969696969697e-05,
232
+ "loss": 0.0323,
233
  "step": 12000
234
  },
235
  {
236
  "epoch": 9.47,
237
+ "learning_rate": 1.8434343434343433e-05,
238
+ "loss": 0.0158,
239
  "step": 12500
240
  },
241
  {
242
  "epoch": 9.85,
243
+ "learning_rate": 1.7171717171717173e-05,
244
+ "loss": 0.027,
245
  "step": 13000
246
  },
247
  {
248
  "epoch": 10.0,
249
+ "eval_accuracy": 0.9085923433303833,
250
+ "eval_loss": 0.6882250308990479,
251
+ "eval_runtime": 2.2029,
252
+ "eval_samples_per_second": 496.616,
253
+ "eval_steps_per_second": 62.19,
254
  "step": 13200
255
  },
256
  {
257
  "epoch": 10.23,
258
+ "learning_rate": 1.590909090909091e-05,
259
+ "loss": 0.0147,
260
  "step": 13500
261
  },
262
  {
263
  "epoch": 10.61,
264
+ "learning_rate": 1.4646464646464647e-05,
265
+ "loss": 0.0125,
266
  "step": 14000
267
  },
268
  {
269
  "epoch": 10.98,
270
+ "learning_rate": 1.3383838383838385e-05,
271
+ "loss": 0.0241,
272
  "step": 14500
273
  },
274
  {
275
  "epoch": 11.0,
276
+ "eval_accuracy": 0.9040219187736511,
277
+ "eval_loss": 0.7250856757164001,
278
+ "eval_runtime": 2.0725,
279
+ "eval_samples_per_second": 527.862,
280
+ "eval_steps_per_second": 66.103,
281
  "step": 14520
282
  },
283
  {
284
  "epoch": 11.36,
285
+ "learning_rate": 1.2121212121212122e-05,
286
+ "loss": 0.0108,
287
  "step": 15000
288
  },
289
  {
290
  "epoch": 11.74,
291
+ "learning_rate": 1.085858585858586e-05,
292
+ "loss": 0.0109,
293
  "step": 15500
294
  },
295
  {
296
  "epoch": 12.0,
297
+ "eval_accuracy": 0.8976234197616577,
298
+ "eval_loss": 0.7604121565818787,
299
+ "eval_runtime": 2.2757,
300
+ "eval_samples_per_second": 480.738,
301
+ "eval_steps_per_second": 60.202,
302
  "step": 15840
303
  },
304
  {
305
  "epoch": 12.12,
306
+ "learning_rate": 9.595959595959595e-06,
307
+ "loss": 0.0122,
308
  "step": 16000
309
  },
310
  {
311
  "epoch": 12.5,
312
+ "learning_rate": 8.333333333333334e-06,
313
+ "loss": 0.0058,
314
  "step": 16500
315
  },
316
  {
317
  "epoch": 12.88,
318
+ "learning_rate": 7.0707070707070704e-06,
319
+ "loss": 0.0056,
320
  "step": 17000
321
  },
322
  {
323
  "epoch": 13.0,
324
+ "eval_accuracy": 0.9040219187736511,
325
+ "eval_loss": 0.7356353998184204,
326
+ "eval_runtime": 2.1586,
327
+ "eval_samples_per_second": 506.799,
328
+ "eval_steps_per_second": 63.466,
329
  "step": 17160
330
  },
331
  {
332
  "epoch": 13.26,
333
+ "learning_rate": 5.808080808080808e-06,
334
+ "loss": 0.0055,
335
  "step": 17500
336
  },
337
  {
338
  "epoch": 13.64,
339
+ "learning_rate": 4.5454545454545455e-06,
340
+ "loss": 0.0018,
341
  "step": 18000
342
  },
343
  {
344
  "epoch": 14.0,
345
+ "eval_accuracy": 0.9076782464981079,
346
+ "eval_loss": 0.7189434170722961,
347
+ "eval_runtime": 2.0825,
348
+ "eval_samples_per_second": 525.332,
349
+ "eval_steps_per_second": 65.787,
350
  "step": 18480
351
  },
352
  {
353
  "epoch": 14.02,
354
+ "learning_rate": 3.2828282828282835e-06,
355
+ "loss": 0.0067,
356
  "step": 18500
357
  },
358
  {
359
  "epoch": 14.39,
360
+ "learning_rate": 2.0202020202020206e-06,
361
+ "loss": 0.0023,
362
  "step": 19000
363
  },
364
  {
365
  "epoch": 14.77,
366
+ "learning_rate": 7.575757575757576e-07,
367
+ "loss": 0.0017,
368
  "step": 19500
369
  },
370
  {
371
  "epoch": 15.0,
372
  "eval_accuracy": 0.9067641496658325,
373
+ "eval_loss": 0.7214756608009338,
374
+ "eval_runtime": 2.1084,
375
+ "eval_samples_per_second": 518.881,
376
+ "eval_steps_per_second": 64.979,
377
  "step": 19800
378
  },
379
  {
380
  "epoch": 15.0,
381
  "step": 19800,
382
  "total_flos": 5215995096399360.0,
383
+ "train_loss": 0.27693930435361286,
384
+ "train_runtime": 2167.3602,
385
+ "train_samples_per_second": 73.057,
386
+ "train_steps_per_second": 9.136
387
  }
388
  ],
389
  "max_steps": 19800,