lixiqi commited on
Commit
e355687
1 Parent(s): f4d4784

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.6819923371647509,
4
- "eval_loss": 0.8754438161849976,
5
- "eval_runtime": 25.581,
6
- "eval_samples_per_second": 112.232,
7
- "eval_steps_per_second": 3.518,
8
- "total_flos": 6.004415924974301e+18,
9
- "train_loss": 1.159771179602091,
10
- "train_runtime": 1779.2361,
11
- "train_samples_per_second": 43.566,
12
- "train_steps_per_second": 0.341
13
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.6879136189481017,
4
+ "eval_loss": 0.8504465818405151,
5
+ "eval_runtime": 25.231,
6
+ "eval_samples_per_second": 113.789,
7
+ "eval_steps_per_second": 3.567,
8
+ "total_flos": 8.005887899965735e+18,
9
+ "train_loss": 1.122130044616095,
10
+ "train_runtime": 3022.4726,
11
+ "train_samples_per_second": 34.195,
12
+ "train_steps_per_second": 0.267
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.6819923371647509,
4
- "eval_loss": 0.8754438161849976,
5
- "eval_runtime": 25.581,
6
- "eval_samples_per_second": 112.232,
7
- "eval_steps_per_second": 3.518
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.6879136189481017,
4
+ "eval_loss": 0.8504465818405151,
5
+ "eval_runtime": 25.231,
6
+ "eval_samples_per_second": 113.789,
7
+ "eval_steps_per_second": 3.567
8
  }
runs/Jan08_10-49-11_9d7446287eef/events.out.tfevents.1673178226.9d7446287eef.23.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934d59f7ea613c965daacaf93726a8fa602e6e33ebe986c055abe5f5aa686256
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 6.004415924974301e+18,
4
- "train_loss": 1.159771179602091,
5
- "train_runtime": 1779.2361,
6
- "train_samples_per_second": 43.566,
7
- "train_steps_per_second": 0.341
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "total_flos": 8.005887899965735e+18,
4
+ "train_loss": 1.122130044616095,
5
+ "train_runtime": 3022.4726,
6
+ "train_samples_per_second": 34.195,
7
+ "train_steps_per_second": 0.267
8
  }
trainer_state.json CHANGED
@@ -1,412 +1,541 @@
1
  {
2
- "best_metric": 0.6819923371647509,
3
- "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-FER2013/checkpoint-606",
4
- "epoch": 3.0,
5
- "global_step": 606,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
- "learning_rate": 8.196721311475409e-06,
13
- "loss": 1.9392,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.1,
18
- "learning_rate": 1.6393442622950818e-05,
19
- "loss": 1.7671,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.15,
24
- "learning_rate": 2.459016393442623e-05,
25
- "loss": 1.6727,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.2,
30
- "learning_rate": 3.2786885245901635e-05,
31
- "loss": 1.4688,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.25,
36
- "learning_rate": 4.098360655737705e-05,
37
- "loss": 1.4389,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.3,
42
- "learning_rate": 4.918032786885246e-05,
43
- "loss": 1.3738,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.35,
48
- "learning_rate": 4.917431192660551e-05,
49
- "loss": 1.3653,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.4,
54
- "learning_rate": 4.8256880733944956e-05,
55
- "loss": 1.2999,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.45,
60
- "learning_rate": 4.733944954128441e-05,
61
- "loss": 1.301,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.5,
66
- "learning_rate": 4.642201834862386e-05,
67
- "loss": 1.3416,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.54,
72
- "learning_rate": 4.5504587155963305e-05,
73
- "loss": 1.2908,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.59,
78
- "learning_rate": 4.458715596330276e-05,
79
- "loss": 1.2205,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.64,
84
- "learning_rate": 4.366972477064221e-05,
85
- "loss": 1.2779,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.69,
90
- "learning_rate": 4.2752293577981654e-05,
91
- "loss": 1.2563,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.74,
96
- "learning_rate": 4.1834862385321106e-05,
97
- "loss": 1.2122,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.79,
102
- "learning_rate": 4.091743119266056e-05,
103
- "loss": 1.2289,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.84,
108
- "learning_rate": 4e-05,
109
- "loss": 1.1613,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.89,
114
- "learning_rate": 3.9082568807339455e-05,
115
- "loss": 1.1646,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.94,
120
- "learning_rate": 3.81651376146789e-05,
121
- "loss": 1.2029,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.99,
126
- "learning_rate": 3.724770642201835e-05,
127
- "loss": 1.1701,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 1.0,
132
- "eval_accuracy": 0.6179031696273075,
133
- "eval_loss": 1.0163276195526123,
134
- "eval_runtime": 25.3453,
135
- "eval_samples_per_second": 113.276,
136
- "eval_steps_per_second": 3.551,
137
  "step": 202
138
  },
139
  {
140
  "epoch": 1.04,
141
- "learning_rate": 3.6330275229357804e-05,
142
- "loss": 1.1281,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.09,
147
- "learning_rate": 3.541284403669725e-05,
148
- "loss": 1.1595,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.14,
153
- "learning_rate": 3.44954128440367e-05,
154
- "loss": 1.1224,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.19,
159
- "learning_rate": 3.3577981651376154e-05,
160
- "loss": 1.0808,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.24,
165
- "learning_rate": 3.26605504587156e-05,
166
- "loss": 1.1312,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.29,
171
- "learning_rate": 3.174311926605505e-05,
172
- "loss": 1.1423,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.34,
177
- "learning_rate": 3.0825688073394496e-05,
178
- "loss": 1.1882,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.39,
183
- "learning_rate": 2.9908256880733948e-05,
184
- "loss": 1.1086,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.44,
189
- "learning_rate": 2.8990825688073397e-05,
190
- "loss": 1.0772,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.49,
195
- "learning_rate": 2.807339449541285e-05,
196
- "loss": 1.1303,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.53,
201
- "learning_rate": 2.7155963302752297e-05,
202
- "loss": 1.1025,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.58,
207
- "learning_rate": 2.6238532110091746e-05,
208
- "loss": 1.1117,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.63,
213
- "learning_rate": 2.5321100917431194e-05,
214
- "loss": 1.0372,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.68,
219
- "learning_rate": 2.4403669724770646e-05,
220
- "loss": 1.0228,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 1.73,
225
- "learning_rate": 2.3486238532110095e-05,
226
- "loss": 1.0936,
227
  "step": 350
228
  },
229
  {
230
  "epoch": 1.78,
231
- "learning_rate": 2.2568807339449544e-05,
232
- "loss": 1.0889,
233
  "step": 360
234
  },
235
  {
236
  "epoch": 1.83,
237
- "learning_rate": 2.1651376146788992e-05,
238
- "loss": 1.0765,
239
  "step": 370
240
  },
241
  {
242
  "epoch": 1.88,
243
- "learning_rate": 2.0733944954128444e-05,
244
- "loss": 1.0954,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 1.93,
249
- "learning_rate": 1.9816513761467893e-05,
250
- "loss": 1.1196,
251
  "step": 390
252
  },
253
  {
254
  "epoch": 1.98,
255
- "learning_rate": 1.889908256880734e-05,
256
- "loss": 1.0447,
257
  "step": 400
258
  },
259
  {
260
  "epoch": 2.0,
261
- "eval_accuracy": 0.6569139672587948,
262
- "eval_loss": 0.9236605763435364,
263
- "eval_runtime": 25.7906,
264
- "eval_samples_per_second": 111.32,
265
- "eval_steps_per_second": 3.49,
266
  "step": 404
267
  },
268
  {
269
  "epoch": 2.03,
270
- "learning_rate": 1.798165137614679e-05,
271
- "loss": 1.0341,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.08,
276
- "learning_rate": 1.7064220183486242e-05,
277
- "loss": 0.9925,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.13,
282
- "learning_rate": 1.614678899082569e-05,
283
- "loss": 1.0571,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.18,
288
- "learning_rate": 1.5229357798165139e-05,
289
- "loss": 1.0625,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.23,
294
- "learning_rate": 1.431192660550459e-05,
295
- "loss": 1.0117,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.28,
300
- "learning_rate": 1.3394495412844038e-05,
301
- "loss": 0.9992,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.33,
306
- "learning_rate": 1.2477064220183488e-05,
307
- "loss": 1.0723,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.38,
312
- "learning_rate": 1.1559633027522937e-05,
313
- "loss": 1.0284,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.43,
318
- "learning_rate": 1.0642201834862387e-05,
319
- "loss": 0.9993,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.48,
324
- "learning_rate": 9.724770642201836e-06,
325
- "loss": 1.023,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.52,
330
- "learning_rate": 8.807339449541286e-06,
331
- "loss": 1.0313,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.57,
336
- "learning_rate": 7.889908256880735e-06,
337
- "loss": 1.0584,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 2.62,
342
- "learning_rate": 6.972477064220184e-06,
343
- "loss": 1.003,
344
  "step": 530
345
  },
346
  {
347
  "epoch": 2.67,
348
- "learning_rate": 6.0550458715596335e-06,
349
- "loss": 1.0541,
350
  "step": 540
351
  },
352
  {
353
  "epoch": 2.72,
354
- "learning_rate": 5.137614678899083e-06,
355
- "loss": 1.0189,
356
  "step": 550
357
  },
358
  {
359
  "epoch": 2.77,
360
- "learning_rate": 4.220183486238532e-06,
361
- "loss": 0.9714,
362
  "step": 560
363
  },
364
  {
365
  "epoch": 2.82,
366
- "learning_rate": 3.302752293577982e-06,
367
- "loss": 1.0046,
368
  "step": 570
369
  },
370
  {
371
  "epoch": 2.87,
372
- "learning_rate": 2.3853211009174313e-06,
373
- "loss": 1.0082,
374
  "step": 580
375
  },
376
  {
377
  "epoch": 2.92,
378
- "learning_rate": 1.467889908256881e-06,
379
- "loss": 1.0499,
380
  "step": 590
381
  },
382
  {
383
  "epoch": 2.97,
384
- "learning_rate": 5.504587155963304e-07,
385
- "loss": 0.9712,
386
  "step": 600
387
  },
388
  {
389
  "epoch": 3.0,
390
- "eval_accuracy": 0.6819923371647509,
391
- "eval_loss": 0.8754438161849976,
392
- "eval_runtime": 25.5354,
393
- "eval_samples_per_second": 112.432,
394
- "eval_steps_per_second": 3.525,
395
  "step": 606
396
  },
397
  {
398
- "epoch": 3.0,
399
- "step": 606,
400
- "total_flos": 6.004415924974301e+18,
401
- "train_loss": 1.159771179602091,
402
- "train_runtime": 1779.2361,
403
- "train_samples_per_second": 43.566,
404
- "train_steps_per_second": 0.341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  }
406
  ],
407
- "max_steps": 606,
408
- "num_train_epochs": 3,
409
- "total_flos": 6.004415924974301e+18,
410
  "trial_name": null,
411
  "trial_params": null
412
  }
 
1
  {
2
+ "best_metric": 0.6879136189481017,
3
+ "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-FER2013/checkpoint-808",
4
+ "epoch": 4.0,
5
+ "global_step": 808,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
+ "learning_rate": 6.172839506172839e-06,
13
+ "loss": 2.0141,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.1,
18
+ "learning_rate": 1.2345679012345678e-05,
19
+ "loss": 1.7884,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.15,
24
+ "learning_rate": 1.8518518518518518e-05,
25
+ "loss": 1.6854,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.2,
30
+ "learning_rate": 2.4691358024691357e-05,
31
+ "loss": 1.5246,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.25,
36
+ "learning_rate": 3.08641975308642e-05,
37
+ "loss": 1.4423,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.3,
42
+ "learning_rate": 3.7037037037037037e-05,
43
+ "loss": 1.3828,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.35,
48
+ "learning_rate": 4.3209876543209875e-05,
49
+ "loss": 1.3731,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.4,
54
+ "learning_rate": 4.938271604938271e-05,
55
+ "loss": 1.313,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.45,
60
+ "learning_rate": 4.9381017881705645e-05,
61
+ "loss": 1.3077,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.5,
66
+ "learning_rate": 4.8693259972489685e-05,
67
+ "loss": 1.3512,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.54,
72
+ "learning_rate": 4.8005502063273726e-05,
73
+ "loss": 1.2712,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.59,
78
+ "learning_rate": 4.731774415405777e-05,
79
+ "loss": 1.2161,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.64,
84
+ "learning_rate": 4.662998624484182e-05,
85
+ "loss": 1.2698,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.69,
90
+ "learning_rate": 4.594222833562587e-05,
91
+ "loss": 1.2618,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.74,
96
+ "learning_rate": 4.525447042640991e-05,
97
+ "loss": 1.2162,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.79,
102
+ "learning_rate": 4.456671251719395e-05,
103
+ "loss": 1.2392,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.84,
108
+ "learning_rate": 4.3878954607977995e-05,
109
+ "loss": 1.1664,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.89,
114
+ "learning_rate": 4.3191196698762035e-05,
115
+ "loss": 1.1736,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.94,
120
+ "learning_rate": 4.250343878954608e-05,
121
+ "loss": 1.2149,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.99,
126
+ "learning_rate": 4.181568088033013e-05,
127
+ "loss": 1.1617,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 1.0,
132
+ "eval_accuracy": 0.6269592476489029,
133
+ "eval_loss": 1.0080664157867432,
134
+ "eval_runtime": 27.1012,
135
+ "eval_samples_per_second": 105.936,
136
+ "eval_steps_per_second": 3.321,
137
  "step": 202
138
  },
139
  {
140
  "epoch": 1.04,
141
+ "learning_rate": 4.112792297111417e-05,
142
+ "loss": 1.1415,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.09,
147
+ "learning_rate": 4.044016506189822e-05,
148
+ "loss": 1.1735,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.14,
153
+ "learning_rate": 3.975240715268226e-05,
154
+ "loss": 1.1356,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.19,
159
+ "learning_rate": 3.90646492434663e-05,
160
+ "loss": 1.0938,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.24,
165
+ "learning_rate": 3.8376891334250345e-05,
166
+ "loss": 1.1276,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.29,
171
+ "learning_rate": 3.768913342503439e-05,
172
+ "loss": 1.1508,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.34,
177
+ "learning_rate": 3.700137551581843e-05,
178
+ "loss": 1.2005,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.39,
183
+ "learning_rate": 3.631361760660248e-05,
184
+ "loss": 1.1185,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.44,
189
+ "learning_rate": 3.562585969738652e-05,
190
+ "loss": 1.0947,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.49,
195
+ "learning_rate": 3.493810178817056e-05,
196
+ "loss": 1.14,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.53,
201
+ "learning_rate": 3.425034387895461e-05,
202
+ "loss": 1.1113,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.58,
207
+ "learning_rate": 3.3562585969738655e-05,
208
+ "loss": 1.1291,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.63,
213
+ "learning_rate": 3.28748280605227e-05,
214
+ "loss": 1.0612,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.68,
219
+ "learning_rate": 3.218707015130674e-05,
220
+ "loss": 1.0498,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 1.73,
225
+ "learning_rate": 3.149931224209078e-05,
226
+ "loss": 1.1312,
227
  "step": 350
228
  },
229
  {
230
  "epoch": 1.78,
231
+ "learning_rate": 3.081155433287483e-05,
232
+ "loss": 1.0964,
233
  "step": 360
234
  },
235
  {
236
  "epoch": 1.83,
237
+ "learning_rate": 3.0123796423658874e-05,
238
+ "loss": 1.1113,
239
  "step": 370
240
  },
241
  {
242
  "epoch": 1.88,
243
+ "learning_rate": 2.9436038514442914e-05,
244
+ "loss": 1.0912,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 1.93,
249
+ "learning_rate": 2.874828060522696e-05,
250
+ "loss": 1.1223,
251
  "step": 390
252
  },
253
  {
254
  "epoch": 1.98,
255
+ "learning_rate": 2.8060522696011005e-05,
256
+ "loss": 1.0604,
257
  "step": 400
258
  },
259
  {
260
  "epoch": 2.0,
261
+ "eval_accuracy": 0.6523859282479972,
262
+ "eval_loss": 0.9515869617462158,
263
+ "eval_runtime": 24.9329,
264
+ "eval_samples_per_second": 115.149,
265
+ "eval_steps_per_second": 3.61,
266
  "step": 404
267
  },
268
  {
269
  "epoch": 2.03,
270
+ "learning_rate": 2.7372764786795052e-05,
271
+ "loss": 1.0564,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.08,
276
+ "learning_rate": 2.6685006877579092e-05,
277
+ "loss": 0.9928,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.13,
282
+ "learning_rate": 2.5997248968363136e-05,
283
+ "loss": 1.0681,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.18,
288
+ "learning_rate": 2.5309491059147183e-05,
289
+ "loss": 1.0882,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.23,
294
+ "learning_rate": 2.4621733149931224e-05,
295
+ "loss": 1.0132,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.28,
300
+ "learning_rate": 2.393397524071527e-05,
301
+ "loss": 1.0394,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.33,
306
+ "learning_rate": 2.324621733149931e-05,
307
+ "loss": 1.0756,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.38,
312
+ "learning_rate": 2.255845942228336e-05,
313
+ "loss": 1.0273,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.43,
318
+ "learning_rate": 2.1870701513067402e-05,
319
+ "loss": 1.0051,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.48,
324
+ "learning_rate": 2.1182943603851446e-05,
325
+ "loss": 1.0387,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.52,
330
+ "learning_rate": 2.049518569463549e-05,
331
+ "loss": 1.044,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.57,
336
+ "learning_rate": 1.9807427785419533e-05,
337
+ "loss": 1.0704,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 2.62,
342
+ "learning_rate": 1.9119669876203577e-05,
343
+ "loss": 1.0369,
344
  "step": 530
345
  },
346
  {
347
  "epoch": 2.67,
348
+ "learning_rate": 1.843191196698762e-05,
349
+ "loss": 1.062,
350
  "step": 540
351
  },
352
  {
353
  "epoch": 2.72,
354
+ "learning_rate": 1.7744154057771665e-05,
355
+ "loss": 1.0227,
356
  "step": 550
357
  },
358
  {
359
  "epoch": 2.77,
360
+ "learning_rate": 1.705639614855571e-05,
361
+ "loss": 0.9935,
362
  "step": 560
363
  },
364
  {
365
  "epoch": 2.82,
366
+ "learning_rate": 1.6368638239339752e-05,
367
+ "loss": 1.0015,
368
  "step": 570
369
  },
370
  {
371
  "epoch": 2.87,
372
+ "learning_rate": 1.56808803301238e-05,
373
+ "loss": 1.0193,
374
  "step": 580
375
  },
376
  {
377
  "epoch": 2.92,
378
+ "learning_rate": 1.499312242090784e-05,
379
+ "loss": 1.0556,
380
  "step": 590
381
  },
382
  {
383
  "epoch": 2.97,
384
+ "learning_rate": 1.4305364511691885e-05,
385
+ "loss": 0.998,
386
  "step": 600
387
  },
388
  {
389
  "epoch": 3.0,
390
+ "eval_accuracy": 0.6809474050853361,
391
+ "eval_loss": 0.8857186436653137,
392
+ "eval_runtime": 27.0525,
393
+ "eval_samples_per_second": 106.127,
394
+ "eval_steps_per_second": 3.327,
395
  "step": 606
396
  },
397
  {
398
+ "epoch": 3.02,
399
+ "learning_rate": 1.3617606602475929e-05,
400
+ "loss": 1.0409,
401
+ "step": 610
402
+ },
403
+ {
404
+ "epoch": 3.07,
405
+ "learning_rate": 1.2929848693259975e-05,
406
+ "loss": 1.006,
407
+ "step": 620
408
+ },
409
+ {
410
+ "epoch": 3.12,
411
+ "learning_rate": 1.2242090784044018e-05,
412
+ "loss": 0.9774,
413
+ "step": 630
414
+ },
415
+ {
416
+ "epoch": 3.17,
417
+ "learning_rate": 1.155433287482806e-05,
418
+ "loss": 0.9504,
419
+ "step": 640
420
+ },
421
+ {
422
+ "epoch": 3.22,
423
+ "learning_rate": 1.0866574965612106e-05,
424
+ "loss": 0.9869,
425
+ "step": 650
426
+ },
427
+ {
428
+ "epoch": 3.27,
429
+ "learning_rate": 1.017881705639615e-05,
430
+ "loss": 1.0148,
431
+ "step": 660
432
+ },
433
+ {
434
+ "epoch": 3.32,
435
+ "learning_rate": 9.491059147180192e-06,
436
+ "loss": 0.9741,
437
+ "step": 670
438
+ },
439
+ {
440
+ "epoch": 3.37,
441
+ "learning_rate": 8.803301237964237e-06,
442
+ "loss": 0.9989,
443
+ "step": 680
444
+ },
445
+ {
446
+ "epoch": 3.42,
447
+ "learning_rate": 8.115543328748281e-06,
448
+ "loss": 0.9169,
449
+ "step": 690
450
+ },
451
+ {
452
+ "epoch": 3.47,
453
+ "learning_rate": 7.4277854195323255e-06,
454
+ "loss": 0.9754,
455
+ "step": 700
456
+ },
457
+ {
458
+ "epoch": 3.51,
459
+ "learning_rate": 6.740027510316368e-06,
460
+ "loss": 0.9222,
461
+ "step": 710
462
+ },
463
+ {
464
+ "epoch": 3.56,
465
+ "learning_rate": 6.052269601100413e-06,
466
+ "loss": 0.9857,
467
+ "step": 720
468
+ },
469
+ {
470
+ "epoch": 3.61,
471
+ "learning_rate": 5.364511691884458e-06,
472
+ "loss": 0.9872,
473
+ "step": 730
474
+ },
475
+ {
476
+ "epoch": 3.66,
477
+ "learning_rate": 4.676753782668501e-06,
478
+ "loss": 0.9698,
479
+ "step": 740
480
+ },
481
+ {
482
+ "epoch": 3.71,
483
+ "learning_rate": 3.988995873452544e-06,
484
+ "loss": 0.9562,
485
+ "step": 750
486
+ },
487
+ {
488
+ "epoch": 3.76,
489
+ "learning_rate": 3.3012379642365885e-06,
490
+ "loss": 0.9538,
491
+ "step": 760
492
+ },
493
+ {
494
+ "epoch": 3.81,
495
+ "learning_rate": 2.613480055020633e-06,
496
+ "loss": 0.9707,
497
+ "step": 770
498
+ },
499
+ {
500
+ "epoch": 3.86,
501
+ "learning_rate": 1.925722145804677e-06,
502
+ "loss": 0.9708,
503
+ "step": 780
504
+ },
505
+ {
506
+ "epoch": 3.91,
507
+ "learning_rate": 1.2379642365887208e-06,
508
+ "loss": 0.9303,
509
+ "step": 790
510
+ },
511
+ {
512
+ "epoch": 3.96,
513
+ "learning_rate": 5.502063273727648e-07,
514
+ "loss": 0.9971,
515
+ "step": 800
516
+ },
517
+ {
518
+ "epoch": 4.0,
519
+ "eval_accuracy": 0.6879136189481017,
520
+ "eval_loss": 0.8504465818405151,
521
+ "eval_runtime": 26.9878,
522
+ "eval_samples_per_second": 106.381,
523
+ "eval_steps_per_second": 3.335,
524
+ "step": 808
525
+ },
526
+ {
527
+ "epoch": 4.0,
528
+ "step": 808,
529
+ "total_flos": 8.005887899965735e+18,
530
+ "train_loss": 1.122130044616095,
531
+ "train_runtime": 3022.4726,
532
+ "train_samples_per_second": 34.195,
533
+ "train_steps_per_second": 0.267
534
  }
535
  ],
536
+ "max_steps": 808,
537
+ "num_train_epochs": 4,
538
+ "total_flos": 8.005887899965735e+18,
539
  "trial_name": null,
540
  "trial_params": null
541
  }