Jeska commited on
Commit
82afbe4
1 Parent(s): 81b0175

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.6986985802650452,
5
- "eval_runtime": 0.0217,
6
- "eval_samples": 5,
7
- "eval_samples_per_second": 230.317,
8
- "eval_steps_per_second": 46.063,
9
- "train_loss": 2.285688607541627,
10
- "train_runtime": 426.5716,
11
- "train_samples": 11651,
12
- "train_samples_per_second": 81.939,
13
- "train_steps_per_second": 10.247
14
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.9067641496658325,
4
+ "eval_loss": 0.6223405599594116,
5
+ "eval_runtime": 3.1215,
6
+ "eval_samples": 1094,
7
+ "eval_samples_per_second": 350.471,
8
+ "eval_steps_per_second": 43.889,
9
+ "train_loss": 0.4635289347472817,
10
+ "train_runtime": 2578.0344,
11
+ "train_samples": 10556,
12
+ "train_samples_per_second": 61.419,
13
+ "train_steps_per_second": 7.68
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.6986985802650452,
5
- "eval_runtime": 0.0217,
6
- "eval_samples": 5,
7
- "eval_samples_per_second": 230.317,
8
- "eval_steps_per_second": 46.063
9
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.9067641496658325,
4
+ "eval_loss": 0.6223405599594116,
5
+ "eval_runtime": 3.1215,
6
+ "eval_samples": 1094,
7
+ "eval_samples_per_second": 350.471,
8
+ "eval_steps_per_second": 43.889
9
  }
runs/Dec07_08-55-12_jbuhmann/events.out.tfevents.1638867383.jbuhmann.18249.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4564aad1422f8311a33124c7d77cf6e9b536e8eb0cc8c077fe48d225343f25f0
3
- size 27356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71c97e16bb144e50a66f88b517b7f36fe7c3df6e9c8a9f4aac04cb2338ecaab
3
+ size 27716
runs/Dec07_08-55-12_jbuhmann/events.out.tfevents.1638869966.jbuhmann.18249.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1651e9deba0013bed58761eaad659750223df9783f0487f735b82976eb86c5e
3
+ size 369
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 2.285688607541627,
4
- "train_runtime": 426.5716,
5
- "train_samples": 11651,
6
- "train_samples_per_second": 81.939,
7
- "train_steps_per_second": 10.247
8
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "train_loss": 0.4635289347472817,
4
+ "train_runtime": 2578.0344,
5
+ "train_samples": 10556,
6
+ "train_samples_per_second": 61.419,
7
+ "train_steps_per_second": 7.68
8
  }
trainer_state.json CHANGED
@@ -1,100 +1,394 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 4371,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.34,
12
- "learning_rate": 1.77121940059483e-05,
13
- "loss": 4.5055,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.69,
18
- "learning_rate": 1.5424388011896593e-05,
19
- "loss": 3.5521,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.0,
24
- "eval_accuracy": 0.6000000238418579,
25
- "eval_loss": 2.099963903427124,
26
- "eval_runtime": 0.0157,
27
- "eval_samples_per_second": 319.046,
28
- "eval_steps_per_second": 63.809,
29
- "step": 1457
30
  },
31
  {
32
- "epoch": 1.03,
33
- "learning_rate": 1.3136582017844888e-05,
34
- "loss": 2.8365,
35
  "step": 1500
36
  },
37
  {
38
- "epoch": 1.37,
39
- "learning_rate": 1.0848776023793184e-05,
40
- "loss": 2.1971,
41
  "step": 2000
42
  },
43
  {
44
- "epoch": 1.72,
45
- "learning_rate": 8.56097002974148e-06,
46
- "loss": 1.8933,
47
  "step": 2500
48
  },
49
  {
50
  "epoch": 2.0,
51
- "eval_accuracy": 1.0,
52
- "eval_loss": 0.9956113696098328,
53
- "eval_runtime": 0.0195,
54
- "eval_samples_per_second": 256.73,
55
- "eval_steps_per_second": 51.346,
56
- "step": 2914
57
  },
58
  {
59
- "epoch": 2.06,
60
- "learning_rate": 6.273164035689774e-06,
61
- "loss": 1.6154,
62
  "step": 3000
63
  },
64
  {
65
- "epoch": 2.4,
66
- "learning_rate": 3.98535804163807e-06,
67
- "loss": 1.2959,
68
  "step": 3500
69
  },
70
  {
71
- "epoch": 2.75,
72
- "learning_rate": 1.697552047586365e-06,
73
- "loss": 1.2182,
 
 
 
 
 
 
 
 
 
74
  "step": 4000
75
  },
76
  {
77
- "epoch": 3.0,
78
- "eval_accuracy": 1.0,
79
- "eval_loss": 0.6986985802650452,
80
- "eval_runtime": 0.0153,
81
- "eval_samples_per_second": 326.232,
82
- "eval_steps_per_second": 65.246,
83
- "step": 4371
84
  },
85
  {
86
- "epoch": 3.0,
87
- "step": 4371,
88
- "total_flos": 1151412634864512.0,
89
- "train_loss": 2.285688607541627,
90
- "train_runtime": 426.5716,
91
- "train_samples_per_second": 81.939,
92
- "train_steps_per_second": 10.247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  }
94
  ],
95
- "max_steps": 4371,
96
- "num_train_epochs": 3,
97
- "total_flos": 1151412634864512.0,
98
  "trial_name": null,
99
  "trial_params": null
100
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
+ "global_step": 19800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.38,
12
+ "learning_rate": 1.9494949494949496e-05,
13
+ "loss": 4.4759,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.76,
18
+ "learning_rate": 1.8989898989898993e-05,
19
+ "loss": 3.4666,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.0,
24
+ "eval_accuracy": 0.5767824649810791,
25
+ "eval_loss": 2.3355202674865723,
26
+ "eval_runtime": 2.6806,
27
+ "eval_samples_per_second": 408.112,
28
+ "eval_steps_per_second": 51.107,
29
+ "step": 1320
30
  },
31
  {
32
+ "epoch": 1.14,
33
+ "learning_rate": 1.8484848484848487e-05,
34
+ "loss": 2.6641,
35
  "step": 1500
36
  },
37
  {
38
+ "epoch": 1.52,
39
+ "learning_rate": 1.797979797979798e-05,
40
+ "loss": 1.9312,
41
  "step": 2000
42
  },
43
  {
44
+ "epoch": 1.89,
45
+ "learning_rate": 1.7474747474747475e-05,
46
+ "loss": 1.5293,
47
  "step": 2500
48
  },
49
  {
50
  "epoch": 2.0,
51
+ "eval_accuracy": 0.8144423961639404,
52
+ "eval_loss": 1.1118409633636475,
53
+ "eval_runtime": 2.7222,
54
+ "eval_samples_per_second": 401.874,
55
+ "eval_steps_per_second": 50.326,
56
+ "step": 2640
57
  },
58
  {
59
+ "epoch": 2.27,
60
+ "learning_rate": 1.6969696969696972e-05,
61
+ "loss": 1.0511,
62
  "step": 3000
63
  },
64
  {
65
+ "epoch": 2.65,
66
+ "learning_rate": 1.6464646464646466e-05,
67
+ "loss": 0.8031,
68
  "step": 3500
69
  },
70
  {
71
+ "epoch": 3.0,
72
+ "eval_accuracy": 0.880255937576294,
73
+ "eval_loss": 0.6362389922142029,
74
+ "eval_runtime": 2.657,
75
+ "eval_samples_per_second": 411.738,
76
+ "eval_steps_per_second": 51.561,
77
+ "step": 3960
78
+ },
79
+ {
80
+ "epoch": 3.03,
81
+ "learning_rate": 1.595959595959596e-05,
82
+ "loss": 0.5864,
83
  "step": 4000
84
  },
85
  {
86
+ "epoch": 3.41,
87
+ "learning_rate": 1.5454545454545454e-05,
88
+ "loss": 0.3632,
89
+ "step": 4500
 
 
 
90
  },
91
  {
92
+ "epoch": 3.79,
93
+ "learning_rate": 1.4949494949494952e-05,
94
+ "loss": 0.2985,
95
+ "step": 5000
96
+ },
97
+ {
98
+ "epoch": 4.0,
99
+ "eval_accuracy": 0.8957952260971069,
100
+ "eval_loss": 0.5118863582611084,
101
+ "eval_runtime": 2.6864,
102
+ "eval_samples_per_second": 407.231,
103
+ "eval_steps_per_second": 50.997,
104
+ "step": 5280
105
+ },
106
+ {
107
+ "epoch": 4.17,
108
+ "learning_rate": 1.4444444444444446e-05,
109
+ "loss": 0.2188,
110
+ "step": 5500
111
+ },
112
+ {
113
+ "epoch": 4.55,
114
+ "learning_rate": 1.3939393939393942e-05,
115
+ "loss": 0.1478,
116
+ "step": 6000
117
+ },
118
+ {
119
+ "epoch": 4.92,
120
+ "learning_rate": 1.3434343434343436e-05,
121
+ "loss": 0.1284,
122
+ "step": 6500
123
+ },
124
+ {
125
+ "epoch": 5.0,
126
+ "eval_accuracy": 0.8930529952049255,
127
+ "eval_loss": 0.5023291707038879,
128
+ "eval_runtime": 2.7918,
129
+ "eval_samples_per_second": 391.858,
130
+ "eval_steps_per_second": 49.072,
131
+ "step": 6600
132
+ },
133
+ {
134
+ "epoch": 5.3,
135
+ "learning_rate": 1.2929292929292931e-05,
136
+ "loss": 0.0608,
137
+ "step": 7000
138
+ },
139
+ {
140
+ "epoch": 5.68,
141
+ "learning_rate": 1.2424242424242425e-05,
142
+ "loss": 0.0842,
143
+ "step": 7500
144
+ },
145
+ {
146
+ "epoch": 6.0,
147
+ "eval_accuracy": 0.9021937847137451,
148
+ "eval_loss": 0.5246109962463379,
149
+ "eval_runtime": 2.6464,
150
+ "eval_samples_per_second": 413.391,
151
+ "eval_steps_per_second": 51.768,
152
+ "step": 7920
153
+ },
154
+ {
155
+ "epoch": 6.06,
156
+ "learning_rate": 1.191919191919192e-05,
157
+ "loss": 0.064,
158
+ "step": 8000
159
+ },
160
+ {
161
+ "epoch": 6.44,
162
+ "learning_rate": 1.1414141414141415e-05,
163
+ "loss": 0.0461,
164
+ "step": 8500
165
+ },
166
+ {
167
+ "epoch": 6.82,
168
+ "learning_rate": 1.0909090909090909e-05,
169
+ "loss": 0.0414,
170
+ "step": 9000
171
+ },
172
+ {
173
+ "epoch": 7.0,
174
+ "eval_accuracy": 0.9012796878814697,
175
+ "eval_loss": 0.5580916404724121,
176
+ "eval_runtime": 2.6062,
177
+ "eval_samples_per_second": 419.767,
178
+ "eval_steps_per_second": 52.567,
179
+ "step": 9240
180
+ },
181
+ {
182
+ "epoch": 7.2,
183
+ "learning_rate": 1.0404040404040405e-05,
184
+ "loss": 0.0476,
185
+ "step": 9500
186
+ },
187
+ {
188
+ "epoch": 7.58,
189
+ "learning_rate": 9.8989898989899e-06,
190
+ "loss": 0.0333,
191
+ "step": 10000
192
+ },
193
+ {
194
+ "epoch": 7.95,
195
+ "learning_rate": 9.393939393939396e-06,
196
+ "loss": 0.0372,
197
+ "step": 10500
198
+ },
199
+ {
200
+ "epoch": 8.0,
201
+ "eval_accuracy": 0.9003656506538391,
202
+ "eval_loss": 0.5721055269241333,
203
+ "eval_runtime": 3.0748,
204
+ "eval_samples_per_second": 355.794,
205
+ "eval_steps_per_second": 44.556,
206
+ "step": 10560
207
+ },
208
+ {
209
+ "epoch": 8.33,
210
+ "learning_rate": 8.888888888888888e-06,
211
+ "loss": 0.0276,
212
+ "step": 11000
213
+ },
214
+ {
215
+ "epoch": 8.71,
216
+ "learning_rate": 8.383838383838384e-06,
217
+ "loss": 0.0292,
218
+ "step": 11500
219
+ },
220
+ {
221
+ "epoch": 9.0,
222
+ "eval_accuracy": 0.9140768051147461,
223
+ "eval_loss": 0.5468941926956177,
224
+ "eval_runtime": 3.0391,
225
+ "eval_samples_per_second": 359.977,
226
+ "eval_steps_per_second": 45.079,
227
+ "step": 11880
228
+ },
229
+ {
230
+ "epoch": 9.09,
231
+ "learning_rate": 7.87878787878788e-06,
232
+ "loss": 0.0298,
233
+ "step": 12000
234
+ },
235
+ {
236
+ "epoch": 9.47,
237
+ "learning_rate": 7.373737373737374e-06,
238
+ "loss": 0.0209,
239
+ "step": 12500
240
+ },
241
+ {
242
+ "epoch": 9.85,
243
+ "learning_rate": 6.868686868686869e-06,
244
+ "loss": 0.0257,
245
+ "step": 13000
246
+ },
247
+ {
248
+ "epoch": 10.0,
249
+ "eval_accuracy": 0.9058501124382019,
250
+ "eval_loss": 0.5871404409408569,
251
+ "eval_runtime": 2.8242,
252
+ "eval_samples_per_second": 387.373,
253
+ "eval_steps_per_second": 48.51,
254
+ "step": 13200
255
+ },
256
+ {
257
+ "epoch": 10.23,
258
+ "learning_rate": 6.363636363636364e-06,
259
+ "loss": 0.0205,
260
+ "step": 13500
261
+ },
262
+ {
263
+ "epoch": 10.61,
264
+ "learning_rate": 5.858585858585859e-06,
265
+ "loss": 0.0145,
266
+ "step": 14000
267
+ },
268
+ {
269
+ "epoch": 10.98,
270
+ "learning_rate": 5.353535353535354e-06,
271
+ "loss": 0.0189,
272
+ "step": 14500
273
+ },
274
+ {
275
+ "epoch": 11.0,
276
+ "eval_accuracy": 0.9049360156059265,
277
+ "eval_loss": 0.6180957555770874,
278
+ "eval_runtime": 2.9518,
279
+ "eval_samples_per_second": 370.627,
280
+ "eval_steps_per_second": 46.413,
281
+ "step": 14520
282
+ },
283
+ {
284
+ "epoch": 11.36,
285
+ "learning_rate": 4.848484848484849e-06,
286
+ "loss": 0.0117,
287
+ "step": 15000
288
+ },
289
+ {
290
+ "epoch": 11.74,
291
+ "learning_rate": 4.343434343434344e-06,
292
+ "loss": 0.0104,
293
+ "step": 15500
294
+ },
295
+ {
296
+ "epoch": 12.0,
297
+ "eval_accuracy": 0.9067641496658325,
298
+ "eval_loss": 0.618419349193573,
299
+ "eval_runtime": 2.8761,
300
+ "eval_samples_per_second": 380.381,
301
+ "eval_steps_per_second": 47.635,
302
+ "step": 15840
303
+ },
304
+ {
305
+ "epoch": 12.12,
306
+ "learning_rate": 3.8383838383838385e-06,
307
+ "loss": 0.0155,
308
+ "step": 16000
309
+ },
310
+ {
311
+ "epoch": 12.5,
312
+ "learning_rate": 3.3333333333333333e-06,
313
+ "loss": 0.0087,
314
+ "step": 16500
315
+ },
316
+ {
317
+ "epoch": 12.88,
318
+ "learning_rate": 2.8282828282828286e-06,
319
+ "loss": 0.009,
320
+ "step": 17000
321
+ },
322
+ {
323
+ "epoch": 13.0,
324
+ "eval_accuracy": 0.9049360156059265,
325
+ "eval_loss": 0.6013244986534119,
326
+ "eval_runtime": 2.7727,
327
+ "eval_samples_per_second": 394.554,
328
+ "eval_steps_per_second": 49.409,
329
+ "step": 17160
330
+ },
331
+ {
332
+ "epoch": 13.26,
333
+ "learning_rate": 2.3232323232323234e-06,
334
+ "loss": 0.0078,
335
+ "step": 17500
336
+ },
337
+ {
338
+ "epoch": 13.64,
339
+ "learning_rate": 1.8181818181818183e-06,
340
+ "loss": 0.0051,
341
+ "step": 18000
342
+ },
343
+ {
344
+ "epoch": 14.0,
345
+ "eval_accuracy": 0.9058501124382019,
346
+ "eval_loss": 0.620483934879303,
347
+ "eval_runtime": 3.0663,
348
+ "eval_samples_per_second": 356.786,
349
+ "eval_steps_per_second": 44.68,
350
+ "step": 18480
351
+ },
352
+ {
353
+ "epoch": 14.02,
354
+ "learning_rate": 1.3131313131313134e-06,
355
+ "loss": 0.0091,
356
+ "step": 18500
357
+ },
358
+ {
359
+ "epoch": 14.39,
360
+ "learning_rate": 8.080808080808082e-07,
361
+ "loss": 0.0046,
362
+ "step": 19000
363
+ },
364
+ {
365
+ "epoch": 14.77,
366
+ "learning_rate": 3.0303030303030305e-07,
367
+ "loss": 0.0035,
368
+ "step": 19500
369
+ },
370
+ {
371
+ "epoch": 15.0,
372
+ "eval_accuracy": 0.9067641496658325,
373
+ "eval_loss": 0.6223405599594116,
374
+ "eval_runtime": 3.0619,
375
+ "eval_samples_per_second": 357.298,
376
+ "eval_steps_per_second": 44.744,
377
+ "step": 19800
378
+ },
379
+ {
380
+ "epoch": 15.0,
381
+ "step": 19800,
382
+ "total_flos": 5215995096399360.0,
383
+ "train_loss": 0.4635289347472817,
384
+ "train_runtime": 2578.0344,
385
+ "train_samples_per_second": 61.419,
386
+ "train_steps_per_second": 7.68
387
  }
388
  ],
389
+ "max_steps": 19800,
390
+ "num_train_epochs": 15,
391
+ "total_flos": 5215995096399360.0,
392
  "trial_name": null,
393
  "trial_params": null
394
  }