Jeska commited on
Commit
6e44fdb
1 Parent(s): 6ea5e73

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_accuracy": 0.9067641496658325,
4
- "eval_loss": 0.7214756608009338,
5
- "eval_runtime": 2.2644,
6
  "eval_samples": 1094,
7
- "eval_samples_per_second": 483.122,
8
- "eval_steps_per_second": 60.501,
9
- "train_loss": 0.27693930435361286,
10
- "train_runtime": 2167.3602,
11
  "train_samples": 10556,
12
- "train_samples_per_second": 73.057,
13
- "train_steps_per_second": 9.136
14
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_accuracy": 0.8756855726242065,
4
+ "eval_loss": 0.810522198677063,
5
+ "eval_runtime": 3.6004,
6
  "eval_samples": 1094,
7
+ "eval_samples_per_second": 303.856,
8
+ "eval_steps_per_second": 38.051,
9
+ "train_loss": 1.72243186873619,
10
+ "train_runtime": 2003.3488,
11
  "train_samples": 10556,
12
+ "train_samples_per_second": 79.038,
13
+ "train_steps_per_second": 2.471
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_accuracy": 0.9067641496658325,
4
- "eval_loss": 0.7214756608009338,
5
- "eval_runtime": 2.2644,
6
  "eval_samples": 1094,
7
- "eval_samples_per_second": 483.122,
8
- "eval_steps_per_second": 60.501
9
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_accuracy": 0.8756855726242065,
4
+ "eval_loss": 0.810522198677063,
5
+ "eval_runtime": 3.6004,
6
  "eval_samples": 1094,
7
+ "eval_samples_per_second": 303.856,
8
+ "eval_steps_per_second": 38.051
9
  }
runs/Dec15_14-58-21_jbuhmann/events.out.tfevents.1639580315.jbuhmann.21699.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16bdf0234f3b04d051d0cec69b08603faf02cf886899f82ed13e82cec7c0eb6a
3
- size 22692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:992d713823ebc6119fd87774065d5b8297cea2c13ad0b8e9f99aa0c7cd8a3b82
3
+ size 23046
runs/Dec15_14-58-21_jbuhmann/events.out.tfevents.1639582323.jbuhmann.21699.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac566f80e7478542754cbe7ae0fe67741e2873d6ae22ff77b632871885dd66fe
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
- "train_loss": 0.27693930435361286,
4
- "train_runtime": 2167.3602,
5
  "train_samples": 10556,
6
- "train_samples_per_second": 73.057,
7
- "train_steps_per_second": 9.136
8
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "train_loss": 1.72243186873619,
4
+ "train_runtime": 2003.3488,
5
  "train_samples": 10556,
6
+ "train_samples_per_second": 79.038,
7
+ "train_steps_per_second": 2.471
8
  }
trainer_state.json CHANGED
@@ -2,393 +2,213 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 15.0,
5
- "global_step": 19800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.38,
12
- "learning_rate": 4.8737373737373736e-05,
13
- "loss": 3.8927,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.76,
18
- "learning_rate": 4.7474747474747476e-05,
19
- "loss": 2.3719,
20
- "step": 1000
21
- },
22
  {
23
  "epoch": 1.0,
24
- "eval_accuracy": 0.7787933945655823,
25
- "eval_loss": 1.1852809190750122,
26
- "eval_runtime": 2.0844,
27
- "eval_samples_per_second": 524.845,
28
- "eval_steps_per_second": 65.726,
29
- "step": 1320
30
- },
31
- {
32
- "epoch": 1.14,
33
- "learning_rate": 4.621212121212121e-05,
34
- "loss": 1.4046,
35
- "step": 1500
36
  },
37
  {
38
  "epoch": 1.52,
39
- "learning_rate": 4.494949494949495e-05,
40
- "loss": 0.7837,
41
- "step": 2000
42
- },
43
- {
44
- "epoch": 1.89,
45
- "learning_rate": 4.368686868686869e-05,
46
- "loss": 0.5941,
47
- "step": 2500
48
  },
49
  {
50
  "epoch": 2.0,
51
- "eval_accuracy": 0.880255937576294,
52
- "eval_loss": 0.5889360308647156,
53
- "eval_runtime": 2.0716,
54
- "eval_samples_per_second": 528.095,
55
- "eval_steps_per_second": 66.133,
56
- "step": 2640
57
- },
58
- {
59
- "epoch": 2.27,
60
- "learning_rate": 4.242424242424243e-05,
61
- "loss": 0.3401,
62
- "step": 3000
63
- },
64
- {
65
- "epoch": 2.65,
66
- "learning_rate": 4.116161616161616e-05,
67
- "loss": 0.2557,
68
- "step": 3500
69
  },
70
  {
71
  "epoch": 3.0,
72
- "eval_accuracy": 0.8866544961929321,
73
- "eval_loss": 0.5768323540687561,
74
- "eval_runtime": 2.1209,
75
- "eval_samples_per_second": 515.813,
76
- "eval_steps_per_second": 64.595,
77
- "step": 3960
78
  },
79
  {
80
  "epoch": 3.03,
81
- "learning_rate": 3.98989898989899e-05,
82
- "loss": 0.2031,
83
- "step": 4000
84
- },
85
- {
86
- "epoch": 3.41,
87
- "learning_rate": 3.8636363636363636e-05,
88
- "loss": 0.1193,
89
- "step": 4500
90
- },
91
- {
92
- "epoch": 3.79,
93
- "learning_rate": 3.7373737373737376e-05,
94
- "loss": 0.1178,
95
- "step": 5000
96
  },
97
  {
98
  "epoch": 4.0,
99
- "eval_accuracy": 0.8811700344085693,
100
- "eval_loss": 0.6243242621421814,
101
- "eval_runtime": 2.1461,
102
- "eval_samples_per_second": 509.767,
103
- "eval_steps_per_second": 63.837,
104
- "step": 5280
105
- },
106
- {
107
- "epoch": 4.17,
108
- "learning_rate": 3.611111111111111e-05,
109
- "loss": 0.1101,
110
- "step": 5500
111
  },
112
  {
113
  "epoch": 4.55,
114
- "learning_rate": 3.484848484848485e-05,
115
- "loss": 0.078,
116
- "step": 6000
117
- },
118
- {
119
- "epoch": 4.92,
120
- "learning_rate": 3.358585858585859e-05,
121
- "loss": 0.0948,
122
- "step": 6500
123
  },
124
  {
125
  "epoch": 5.0,
126
- "eval_accuracy": 0.8912248611450195,
127
- "eval_loss": 0.650793194770813,
128
- "eval_runtime": 2.0892,
129
- "eval_samples_per_second": 523.641,
130
- "eval_steps_per_second": 65.575,
131
- "step": 6600
132
- },
133
- {
134
- "epoch": 5.3,
135
- "learning_rate": 3.232323232323233e-05,
136
- "loss": 0.0352,
137
- "step": 7000
138
- },
139
- {
140
- "epoch": 5.68,
141
- "learning_rate": 3.106060606060606e-05,
142
- "loss": 0.079,
143
- "step": 7500
144
  },
145
  {
146
  "epoch": 6.0,
147
- "eval_accuracy": 0.8930529952049255,
148
- "eval_loss": 0.6763377785682678,
149
- "eval_runtime": 2.0996,
150
- "eval_samples_per_second": 521.052,
151
- "eval_steps_per_second": 65.251,
152
- "step": 7920
153
  },
154
  {
155
  "epoch": 6.06,
156
- "learning_rate": 2.9797979797979796e-05,
157
- "loss": 0.054,
158
- "step": 8000
159
- },
160
- {
161
- "epoch": 6.44,
162
- "learning_rate": 2.8535353535353536e-05,
163
- "loss": 0.0388,
164
- "step": 8500
165
- },
166
- {
167
- "epoch": 6.82,
168
- "learning_rate": 2.7272727272727273e-05,
169
- "loss": 0.0413,
170
- "step": 9000
171
  },
172
  {
173
  "epoch": 7.0,
174
- "eval_accuracy": 0.9021937847137451,
175
- "eval_loss": 0.6991991400718689,
176
- "eval_runtime": 2.1868,
177
- "eval_samples_per_second": 500.277,
178
- "eval_steps_per_second": 62.649,
179
- "step": 9240
180
- },
181
- {
182
- "epoch": 7.2,
183
- "learning_rate": 2.6010101010101013e-05,
184
- "loss": 0.046,
185
- "step": 9500
186
  },
187
  {
188
  "epoch": 7.58,
189
- "learning_rate": 2.474747474747475e-05,
190
- "loss": 0.0295,
191
- "step": 10000
192
- },
193
- {
194
- "epoch": 7.95,
195
- "learning_rate": 2.3484848484848487e-05,
196
- "loss": 0.0291,
197
- "step": 10500
198
  },
199
  {
200
  "epoch": 8.0,
201
- "eval_accuracy": 0.8921389579772949,
202
- "eval_loss": 0.7643230557441711,
203
- "eval_runtime": 2.2005,
204
- "eval_samples_per_second": 497.161,
205
- "eval_steps_per_second": 62.259,
206
- "step": 10560
207
- },
208
- {
209
- "epoch": 8.33,
210
- "learning_rate": 2.2222222222222223e-05,
211
- "loss": 0.0236,
212
- "step": 11000
213
- },
214
- {
215
- "epoch": 8.71,
216
- "learning_rate": 2.095959595959596e-05,
217
- "loss": 0.032,
218
- "step": 11500
219
  },
220
  {
221
  "epoch": 9.0,
222
- "eval_accuracy": 0.9113345742225647,
223
- "eval_loss": 0.6661025285720825,
224
- "eval_runtime": 2.0869,
225
- "eval_samples_per_second": 524.224,
226
- "eval_steps_per_second": 65.648,
227
- "step": 11880
228
  },
229
  {
230
  "epoch": 9.09,
231
- "learning_rate": 1.9696969696969697e-05,
232
- "loss": 0.0323,
233
- "step": 12000
234
- },
235
- {
236
- "epoch": 9.47,
237
- "learning_rate": 1.8434343434343433e-05,
238
- "loss": 0.0158,
239
- "step": 12500
240
- },
241
- {
242
- "epoch": 9.85,
243
- "learning_rate": 1.7171717171717173e-05,
244
- "loss": 0.027,
245
- "step": 13000
246
  },
247
  {
248
  "epoch": 10.0,
249
- "eval_accuracy": 0.9085923433303833,
250
- "eval_loss": 0.6882250308990479,
251
- "eval_runtime": 2.2029,
252
- "eval_samples_per_second": 496.616,
253
- "eval_steps_per_second": 62.19,
254
- "step": 13200
255
- },
256
- {
257
- "epoch": 10.23,
258
- "learning_rate": 1.590909090909091e-05,
259
- "loss": 0.0147,
260
- "step": 13500
261
  },
262
  {
263
  "epoch": 10.61,
264
- "learning_rate": 1.4646464646464647e-05,
265
- "loss": 0.0125,
266
- "step": 14000
267
- },
268
- {
269
- "epoch": 10.98,
270
- "learning_rate": 1.3383838383838385e-05,
271
- "loss": 0.0241,
272
- "step": 14500
273
  },
274
  {
275
  "epoch": 11.0,
276
- "eval_accuracy": 0.9040219187736511,
277
- "eval_loss": 0.7250856757164001,
278
- "eval_runtime": 2.0725,
279
- "eval_samples_per_second": 527.862,
280
- "eval_steps_per_second": 66.103,
281
- "step": 14520
282
- },
283
- {
284
- "epoch": 11.36,
285
- "learning_rate": 1.2121212121212122e-05,
286
- "loss": 0.0108,
287
- "step": 15000
288
- },
289
- {
290
- "epoch": 11.74,
291
- "learning_rate": 1.085858585858586e-05,
292
- "loss": 0.0109,
293
- "step": 15500
294
  },
295
  {
296
  "epoch": 12.0,
297
- "eval_accuracy": 0.8976234197616577,
298
- "eval_loss": 0.7604121565818787,
299
- "eval_runtime": 2.2757,
300
- "eval_samples_per_second": 480.738,
301
- "eval_steps_per_second": 60.202,
302
- "step": 15840
303
  },
304
  {
305
  "epoch": 12.12,
306
- "learning_rate": 9.595959595959595e-06,
307
- "loss": 0.0122,
308
- "step": 16000
309
- },
310
- {
311
- "epoch": 12.5,
312
- "learning_rate": 8.333333333333334e-06,
313
- "loss": 0.0058,
314
- "step": 16500
315
- },
316
- {
317
- "epoch": 12.88,
318
- "learning_rate": 7.0707070707070704e-06,
319
- "loss": 0.0056,
320
- "step": 17000
321
  },
322
  {
323
  "epoch": 13.0,
324
- "eval_accuracy": 0.9040219187736511,
325
- "eval_loss": 0.7356353998184204,
326
- "eval_runtime": 2.1586,
327
- "eval_samples_per_second": 506.799,
328
- "eval_steps_per_second": 63.466,
329
- "step": 17160
330
- },
331
- {
332
- "epoch": 13.26,
333
- "learning_rate": 5.808080808080808e-06,
334
- "loss": 0.0055,
335
- "step": 17500
336
  },
337
  {
338
  "epoch": 13.64,
339
- "learning_rate": 4.5454545454545455e-06,
340
- "loss": 0.0018,
341
- "step": 18000
342
  },
343
  {
344
  "epoch": 14.0,
345
- "eval_accuracy": 0.9076782464981079,
346
- "eval_loss": 0.7189434170722961,
347
- "eval_runtime": 2.0825,
348
- "eval_samples_per_second": 525.332,
349
- "eval_steps_per_second": 65.787,
350
- "step": 18480
351
- },
352
- {
353
- "epoch": 14.02,
354
- "learning_rate": 3.2828282828282835e-06,
355
- "loss": 0.0067,
356
- "step": 18500
357
- },
358
- {
359
- "epoch": 14.39,
360
- "learning_rate": 2.0202020202020206e-06,
361
- "loss": 0.0023,
362
- "step": 19000
363
- },
364
- {
365
- "epoch": 14.77,
366
- "learning_rate": 7.575757575757576e-07,
367
- "loss": 0.0017,
368
- "step": 19500
369
  },
370
  {
371
  "epoch": 15.0,
372
- "eval_accuracy": 0.9067641496658325,
373
- "eval_loss": 0.7214756608009338,
374
- "eval_runtime": 2.1084,
375
- "eval_samples_per_second": 518.881,
376
- "eval_steps_per_second": 64.979,
377
- "step": 19800
378
  },
379
  {
380
  "epoch": 15.0,
381
- "step": 19800,
382
- "total_flos": 5215995096399360.0,
383
- "train_loss": 0.27693930435361286,
384
- "train_runtime": 2167.3602,
385
- "train_samples_per_second": 73.057,
386
- "train_steps_per_second": 9.136
387
  }
388
  ],
389
- "max_steps": 19800,
390
  "num_train_epochs": 15,
391
- "total_flos": 5215995096399360.0,
392
  "trial_name": null,
393
  "trial_params": null
394
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 15.0,
5
+ "global_step": 4950,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.0,
12
+ "eval_accuracy": 0.22303473949432373,
13
+ "eval_loss": 3.9924213886260986,
14
+ "eval_runtime": 3.5667,
15
+ "eval_samples_per_second": 306.729,
16
+ "eval_steps_per_second": 38.411,
17
+ "step": 330
 
 
 
 
 
 
18
  },
19
  {
20
  "epoch": 1.52,
21
+ "learning_rate": 8.98989898989899e-06,
22
+ "loss": 4.3795,
23
+ "step": 500
 
 
 
 
 
 
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "eval_accuracy": 0.3957952558994293,
28
+ "eval_loss": 3.1812195777893066,
29
+ "eval_runtime": 3.5362,
30
+ "eval_samples_per_second": 309.369,
31
+ "eval_steps_per_second": 38.742,
32
+ "step": 660
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
  "epoch": 3.0,
36
+ "eval_accuracy": 0.5511882901191711,
37
+ "eval_loss": 2.5903849601745605,
38
+ "eval_runtime": 3.5231,
39
+ "eval_samples_per_second": 310.519,
40
+ "eval_steps_per_second": 38.886,
41
+ "step": 990
42
  },
43
  {
44
  "epoch": 3.03,
45
+ "learning_rate": 7.97979797979798e-06,
46
+ "loss": 3.2046,
47
+ "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
  {
50
  "epoch": 4.0,
51
+ "eval_accuracy": 0.6407678127288818,
52
+ "eval_loss": 2.1536314487457275,
53
+ "eval_runtime": 3.5444,
54
+ "eval_samples_per_second": 308.659,
55
+ "eval_steps_per_second": 38.653,
56
+ "step": 1320
 
 
 
 
 
 
57
  },
58
  {
59
  "epoch": 4.55,
60
+ "learning_rate": 6.969696969696971e-06,
61
+ "loss": 2.3683,
62
+ "step": 1500
 
 
 
 
 
 
63
  },
64
  {
65
  "epoch": 5.0,
66
+ "eval_accuracy": 0.7129799127578735,
67
+ "eval_loss": 1.8079293966293335,
68
+ "eval_runtime": 3.555,
69
+ "eval_samples_per_second": 307.739,
70
+ "eval_steps_per_second": 38.538,
71
+ "step": 1650
 
 
 
 
 
 
 
 
 
 
 
 
72
  },
73
  {
74
  "epoch": 6.0,
75
+ "eval_accuracy": 0.7687385678291321,
76
+ "eval_loss": 1.5419940948486328,
77
+ "eval_runtime": 3.5056,
78
+ "eval_samples_per_second": 312.075,
79
+ "eval_steps_per_second": 39.081,
80
+ "step": 1980
81
  },
82
  {
83
  "epoch": 6.06,
84
+ "learning_rate": 5.95959595959596e-06,
85
+ "loss": 1.8065,
86
+ "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
87
  },
88
  {
89
  "epoch": 7.0,
90
+ "eval_accuracy": 0.8053016662597656,
91
+ "eval_loss": 1.3433690071105957,
92
+ "eval_runtime": 3.4889,
93
+ "eval_samples_per_second": 313.564,
94
+ "eval_steps_per_second": 39.267,
95
+ "step": 2310
 
 
 
 
 
 
96
  },
97
  {
98
  "epoch": 7.58,
99
+ "learning_rate": 4.94949494949495e-06,
100
+ "loss": 1.373,
101
+ "step": 2500
 
 
 
 
 
 
102
  },
103
  {
104
  "epoch": 8.0,
105
+ "eval_accuracy": 0.825411319732666,
106
+ "eval_loss": 1.1881896257400513,
107
+ "eval_runtime": 3.5056,
108
+ "eval_samples_per_second": 312.068,
109
+ "eval_steps_per_second": 39.08,
110
+ "step": 2640
 
 
 
 
 
 
 
 
 
 
 
 
111
  },
112
  {
113
  "epoch": 9.0,
114
+ "eval_accuracy": 0.8400365710258484,
115
+ "eval_loss": 1.0700345039367676,
116
+ "eval_runtime": 3.4951,
117
+ "eval_samples_per_second": 313.014,
118
+ "eval_steps_per_second": 39.198,
119
+ "step": 2970
120
  },
121
  {
122
  "epoch": 9.09,
123
+ "learning_rate": 3.93939393939394e-06,
124
+ "loss": 1.0931,
125
+ "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
126
  },
127
  {
128
  "epoch": 10.0,
129
+ "eval_accuracy": 0.851005494594574,
130
+ "eval_loss": 0.9790602326393127,
131
+ "eval_runtime": 3.4892,
132
+ "eval_samples_per_second": 313.542,
133
+ "eval_steps_per_second": 39.264,
134
+ "step": 3300
 
 
 
 
 
 
135
  },
136
  {
137
  "epoch": 10.61,
138
+ "learning_rate": 2.9292929292929295e-06,
139
+ "loss": 0.8714,
140
+ "step": 3500
 
 
 
 
 
 
141
  },
142
  {
143
  "epoch": 11.0,
144
+ "eval_accuracy": 0.8619744181632996,
145
+ "eval_loss": 0.9201710224151611,
146
+ "eval_runtime": 3.4883,
147
+ "eval_samples_per_second": 313.616,
148
+ "eval_steps_per_second": 39.274,
149
+ "step": 3630
 
 
 
 
 
 
 
 
 
 
 
 
150
  },
151
  {
152
  "epoch": 12.0,
153
+ "eval_accuracy": 0.868372917175293,
154
+ "eval_loss": 0.8640827536582947,
155
+ "eval_runtime": 3.5075,
156
+ "eval_samples_per_second": 311.902,
157
+ "eval_steps_per_second": 39.059,
158
+ "step": 3960
159
  },
160
  {
161
  "epoch": 12.12,
162
+ "learning_rate": 1.9191919191919192e-06,
163
+ "loss": 0.7428,
164
+ "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
165
  },
166
  {
167
  "epoch": 13.0,
168
+ "eval_accuracy": 0.8747714757919312,
169
+ "eval_loss": 0.8372448086738586,
170
+ "eval_runtime": 3.5209,
171
+ "eval_samples_per_second": 310.718,
172
+ "eval_steps_per_second": 38.911,
173
+ "step": 4290
 
 
 
 
 
 
174
  },
175
  {
176
  "epoch": 13.64,
177
+ "learning_rate": 9.090909090909091e-07,
178
+ "loss": 0.6531,
179
+ "step": 4500
180
  },
181
  {
182
  "epoch": 14.0,
183
+ "eval_accuracy": 0.8765996098518372,
184
+ "eval_loss": 0.8168175220489502,
185
+ "eval_runtime": 3.5294,
186
+ "eval_samples_per_second": 309.965,
187
+ "eval_steps_per_second": 38.816,
188
+ "step": 4620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  },
190
  {
191
  "epoch": 15.0,
192
+ "eval_accuracy": 0.8756855726242065,
193
+ "eval_loss": 0.810522198677063,
194
+ "eval_runtime": 3.5261,
195
+ "eval_samples_per_second": 310.256,
196
+ "eval_steps_per_second": 38.853,
197
+ "step": 4950
198
  },
199
  {
200
  "epoch": 15.0,
201
+ "step": 4950,
202
+ "total_flos": 1.043199019279872e+16,
203
+ "train_loss": 1.72243186873619,
204
+ "train_runtime": 2003.3488,
205
+ "train_samples_per_second": 79.038,
206
+ "train_steps_per_second": 2.471
207
  }
208
  ],
209
+ "max_steps": 4950,
210
  "num_train_epochs": 15,
211
+ "total_flos": 1.043199019279872e+16,
212
  "trial_name": null,
213
  "trial_params": null
214
  }