vumichien commited on
Commit
595c991
1 Parent(s): c33caf1

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9040894380700206,
4
+ "eval_loss": 0.3562973439693451,
5
+ "eval_runtime": 43.2189,
6
+ "eval_samples_per_second": 157.292,
7
+ "eval_steps_per_second": 2.476,
8
+ "train_loss": 0.8915880279732229,
9
+ "train_runtime": 4529.6112,
10
+ "train_samples_per_second": 112.8,
11
+ "train_steps_per_second": 1.762
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9040894380700206,
4
+ "eval_loss": 0.3562973439693451,
5
+ "eval_runtime": 43.2189,
6
+ "eval_samples_per_second": 157.292,
7
+ "eval_steps_per_second": 2.476
8
+ }
runs/Nov28_21-42-20_3367f5396cf0/events.out.tfevents.1669676543.3367f5396cf0.230.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d95b9c1b6f2e084f9dfb4a0f6485debaaa30ce115712a53d568f423f070d86
3
+ size 363
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.8915880279732229,
4
+ "train_runtime": 4529.6112,
5
+ "train_samples_per_second": 112.8,
6
+ "train_steps_per_second": 1.762
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,589 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9040894380700206,
3
+ "best_model_checkpoint": "trillsson3-ft-keyword-spotting/checkpoint-3192",
4
+ "epoch": 9.999373825923607,
5
+ "global_step": 7980,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 3.533834586466165e-05,
13
+ "loss": 7.4799,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.25,
18
+ "learning_rate": 7.293233082706766e-05,
19
+ "loss": 4.7288,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.38,
24
+ "learning_rate": 0.00011052631578947366,
25
+ "loss": 3.0455,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.5,
30
+ "learning_rate": 0.00014812030075187968,
31
+ "loss": 2.3495,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.63,
36
+ "learning_rate": 0.00018571428571428572,
37
+ "loss": 1.836,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.75,
42
+ "learning_rate": 0.00022330827067669172,
43
+ "loss": 1.4179,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 0.88,
48
+ "learning_rate": 0.0002609022556390977,
49
+ "loss": 1.1824,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.7488967343336276,
55
+ "eval_loss": 0.6477929949760437,
56
+ "eval_runtime": 42.5234,
57
+ "eval_samples_per_second": 159.865,
58
+ "eval_steps_per_second": 2.516,
59
+ "step": 798
60
+ },
61
+ {
62
+ "epoch": 1.0,
63
+ "learning_rate": 0.0002984962406015037,
64
+ "loss": 1.0261,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 1.13,
69
+ "learning_rate": 0.00029598997493734334,
70
+ "loss": 0.9596,
71
+ "step": 900
72
+ },
73
+ {
74
+ "epoch": 1.25,
75
+ "learning_rate": 0.000291812865497076,
76
+ "loss": 0.8732,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 1.38,
81
+ "learning_rate": 0.00028763575605680864,
82
+ "loss": 0.8509,
83
+ "step": 1100
84
+ },
85
+ {
86
+ "epoch": 1.5,
87
+ "learning_rate": 0.00028345864661654134,
88
+ "loss": 0.8162,
89
+ "step": 1200
90
+ },
91
+ {
92
+ "epoch": 1.63,
93
+ "learning_rate": 0.000279281537176274,
94
+ "loss": 0.7603,
95
+ "step": 1300
96
+ },
97
+ {
98
+ "epoch": 1.75,
99
+ "learning_rate": 0.00027510442773600664,
100
+ "loss": 0.7638,
101
+ "step": 1400
102
+ },
103
+ {
104
+ "epoch": 1.88,
105
+ "learning_rate": 0.00027092731829573934,
106
+ "loss": 0.7448,
107
+ "step": 1500
108
+ },
109
+ {
110
+ "epoch": 2.0,
111
+ "eval_accuracy": 0.8727566931450427,
112
+ "eval_loss": 0.4273872375488281,
113
+ "eval_runtime": 42.1254,
114
+ "eval_samples_per_second": 161.375,
115
+ "eval_steps_per_second": 2.54,
116
+ "step": 1596
117
+ },
118
+ {
119
+ "epoch": 2.01,
120
+ "learning_rate": 0.000266750208855472,
121
+ "loss": 0.7268,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 2.13,
126
+ "learning_rate": 0.00026257309941520463,
127
+ "loss": 0.7503,
128
+ "step": 1700
129
+ },
130
+ {
131
+ "epoch": 2.26,
132
+ "learning_rate": 0.00025839598997493734,
133
+ "loss": 0.719,
134
+ "step": 1800
135
+ },
136
+ {
137
+ "epoch": 2.38,
138
+ "learning_rate": 0.00025421888053467,
139
+ "loss": 0.7174,
140
+ "step": 1900
141
+ },
142
+ {
143
+ "epoch": 2.51,
144
+ "learning_rate": 0.00025004177109440263,
145
+ "loss": 0.6869,
146
+ "step": 2000
147
+ },
148
+ {
149
+ "epoch": 2.63,
150
+ "learning_rate": 0.00024586466165413533,
151
+ "loss": 0.7113,
152
+ "step": 2100
153
+ },
154
+ {
155
+ "epoch": 2.76,
156
+ "learning_rate": 0.00024168755221386798,
157
+ "loss": 0.7011,
158
+ "step": 2200
159
+ },
160
+ {
161
+ "epoch": 2.88,
162
+ "learning_rate": 0.00023751044277360066,
163
+ "loss": 0.7089,
164
+ "step": 2300
165
+ },
166
+ {
167
+ "epoch": 3.0,
168
+ "eval_accuracy": 0.8949691085613416,
169
+ "eval_loss": 0.3723289370536804,
170
+ "eval_runtime": 41.1393,
171
+ "eval_samples_per_second": 165.243,
172
+ "eval_steps_per_second": 2.601,
173
+ "step": 2394
174
+ },
175
+ {
176
+ "epoch": 3.01,
177
+ "learning_rate": 0.0002333333333333333,
178
+ "loss": 0.6834,
179
+ "step": 2400
180
+ },
181
+ {
182
+ "epoch": 3.13,
183
+ "learning_rate": 0.00022915622389306598,
184
+ "loss": 0.6804,
185
+ "step": 2500
186
+ },
187
+ {
188
+ "epoch": 3.26,
189
+ "learning_rate": 0.00022497911445279865,
190
+ "loss": 0.6771,
191
+ "step": 2600
192
+ },
193
+ {
194
+ "epoch": 3.38,
195
+ "learning_rate": 0.0002208020050125313,
196
+ "loss": 0.684,
197
+ "step": 2700
198
+ },
199
+ {
200
+ "epoch": 3.51,
201
+ "learning_rate": 0.00021662489557226398,
202
+ "loss": 0.6636,
203
+ "step": 2800
204
+ },
205
+ {
206
+ "epoch": 3.63,
207
+ "learning_rate": 0.00021244778613199665,
208
+ "loss": 0.7003,
209
+ "step": 2900
210
+ },
211
+ {
212
+ "epoch": 3.76,
213
+ "learning_rate": 0.0002082706766917293,
214
+ "loss": 0.6594,
215
+ "step": 3000
216
+ },
217
+ {
218
+ "epoch": 3.88,
219
+ "learning_rate": 0.00020409356725146197,
220
+ "loss": 0.6781,
221
+ "step": 3100
222
+ },
223
+ {
224
+ "epoch": 4.0,
225
+ "eval_accuracy": 0.9040894380700206,
226
+ "eval_loss": 0.3562973439693451,
227
+ "eval_runtime": 42.576,
228
+ "eval_samples_per_second": 159.667,
229
+ "eval_steps_per_second": 2.513,
230
+ "step": 3192
231
+ },
232
+ {
233
+ "epoch": 4.01,
234
+ "learning_rate": 0.00019991645781119465,
235
+ "loss": 0.6921,
236
+ "step": 3200
237
+ },
238
+ {
239
+ "epoch": 4.14,
240
+ "learning_rate": 0.0001957393483709273,
241
+ "loss": 0.6615,
242
+ "step": 3300
243
+ },
244
+ {
245
+ "epoch": 4.26,
246
+ "learning_rate": 0.00019156223893065997,
247
+ "loss": 0.6707,
248
+ "step": 3400
249
+ },
250
+ {
251
+ "epoch": 4.39,
252
+ "learning_rate": 0.00018738512949039265,
253
+ "loss": 0.6511,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 4.51,
258
+ "learning_rate": 0.0001832080200501253,
259
+ "loss": 0.6974,
260
+ "step": 3600
261
+ },
262
+ {
263
+ "epoch": 4.64,
264
+ "learning_rate": 0.00017903091060985797,
265
+ "loss": 0.6447,
266
+ "step": 3700
267
+ },
268
+ {
269
+ "epoch": 4.76,
270
+ "learning_rate": 0.00017485380116959065,
271
+ "loss": 0.643,
272
+ "step": 3800
273
+ },
274
+ {
275
+ "epoch": 4.89,
276
+ "learning_rate": 0.0001706766917293233,
277
+ "loss": 0.6386,
278
+ "step": 3900
279
+ },
280
+ {
281
+ "epoch": 5.0,
282
+ "eval_accuracy": 0.8986466607825832,
283
+ "eval_loss": 0.3440994322299957,
284
+ "eval_runtime": 42.3928,
285
+ "eval_samples_per_second": 160.358,
286
+ "eval_steps_per_second": 2.524,
287
+ "step": 3990
288
+ },
289
+ {
290
+ "epoch": 5.01,
291
+ "learning_rate": 0.00016649958228905597,
292
+ "loss": 0.6611,
293
+ "step": 4000
294
+ },
295
+ {
296
+ "epoch": 5.14,
297
+ "learning_rate": 0.0001623642439431913,
298
+ "loss": 0.6634,
299
+ "step": 4100
300
+ },
301
+ {
302
+ "epoch": 5.26,
303
+ "learning_rate": 0.00015818713450292397,
304
+ "loss": 0.6419,
305
+ "step": 4200
306
+ },
307
+ {
308
+ "epoch": 5.39,
309
+ "learning_rate": 0.00015401002506265662,
310
+ "loss": 0.6341,
311
+ "step": 4300
312
+ },
313
+ {
314
+ "epoch": 5.51,
315
+ "learning_rate": 0.0001498329156223893,
316
+ "loss": 0.6283,
317
+ "step": 4400
318
+ },
319
+ {
320
+ "epoch": 5.64,
321
+ "learning_rate": 0.00014565580618212197,
322
+ "loss": 0.6551,
323
+ "step": 4500
324
+ },
325
+ {
326
+ "epoch": 5.76,
327
+ "learning_rate": 0.00014147869674185462,
328
+ "loss": 0.6318,
329
+ "step": 4600
330
+ },
331
+ {
332
+ "epoch": 5.89,
333
+ "learning_rate": 0.0001373015873015873,
334
+ "loss": 0.6342,
335
+ "step": 4700
336
+ },
337
+ {
338
+ "epoch": 6.0,
339
+ "eval_accuracy": 0.8993821712268314,
340
+ "eval_loss": 0.3380272388458252,
341
+ "eval_runtime": 42.8693,
342
+ "eval_samples_per_second": 158.575,
343
+ "eval_steps_per_second": 2.496,
344
+ "step": 4788
345
+ },
346
+ {
347
+ "epoch": 6.02,
348
+ "learning_rate": 0.00013312447786131997,
349
+ "loss": 0.6371,
350
+ "step": 4800
351
+ },
352
+ {
353
+ "epoch": 6.14,
354
+ "learning_rate": 0.00012894736842105261,
355
+ "loss": 0.6484,
356
+ "step": 4900
357
+ },
358
+ {
359
+ "epoch": 6.27,
360
+ "learning_rate": 0.0001247702589807853,
361
+ "loss": 0.6331,
362
+ "step": 5000
363
+ },
364
+ {
365
+ "epoch": 6.39,
366
+ "learning_rate": 0.00012059314954051795,
367
+ "loss": 0.6201,
368
+ "step": 5100
369
+ },
370
+ {
371
+ "epoch": 6.52,
372
+ "learning_rate": 0.00011641604010025061,
373
+ "loss": 0.6619,
374
+ "step": 5200
375
+ },
376
+ {
377
+ "epoch": 6.64,
378
+ "learning_rate": 0.00011223893065998329,
379
+ "loss": 0.6337,
380
+ "step": 5300
381
+ },
382
+ {
383
+ "epoch": 6.77,
384
+ "learning_rate": 0.00010806182121971595,
385
+ "loss": 0.6356,
386
+ "step": 5400
387
+ },
388
+ {
389
+ "epoch": 6.89,
390
+ "learning_rate": 0.00010388471177944861,
391
+ "loss": 0.6275,
392
+ "step": 5500
393
+ },
394
+ {
395
+ "epoch": 7.0,
396
+ "eval_accuracy": 0.8982053545160341,
397
+ "eval_loss": 0.33755984902381897,
398
+ "eval_runtime": 42.4768,
399
+ "eval_samples_per_second": 160.04,
400
+ "eval_steps_per_second": 2.519,
401
+ "step": 5586
402
+ },
403
+ {
404
+ "epoch": 7.02,
405
+ "learning_rate": 9.970760233918129e-05,
406
+ "loss": 0.6359,
407
+ "step": 5600
408
+ },
409
+ {
410
+ "epoch": 7.14,
411
+ "learning_rate": 9.553049289891395e-05,
412
+ "loss": 0.6502,
413
+ "step": 5700
414
+ },
415
+ {
416
+ "epoch": 7.27,
417
+ "learning_rate": 9.135338345864661e-05,
418
+ "loss": 0.6138,
419
+ "step": 5800
420
+ },
421
+ {
422
+ "epoch": 7.39,
423
+ "learning_rate": 8.717627401837928e-05,
424
+ "loss": 0.6309,
425
+ "step": 5900
426
+ },
427
+ {
428
+ "epoch": 7.52,
429
+ "learning_rate": 8.299916457811194e-05,
430
+ "loss": 0.6386,
431
+ "step": 6000
432
+ },
433
+ {
434
+ "epoch": 7.64,
435
+ "learning_rate": 7.88220551378446e-05,
436
+ "loss": 0.6149,
437
+ "step": 6100
438
+ },
439
+ {
440
+ "epoch": 7.77,
441
+ "learning_rate": 7.464494569757727e-05,
442
+ "loss": 0.6218,
443
+ "step": 6200
444
+ },
445
+ {
446
+ "epoch": 7.89,
447
+ "learning_rate": 7.050960735171261e-05,
448
+ "loss": 0.6349,
449
+ "step": 6300
450
+ },
451
+ {
452
+ "epoch": 8.0,
453
+ "eval_accuracy": 0.9014416004707266,
454
+ "eval_loss": 0.3333294689655304,
455
+ "eval_runtime": 43.296,
456
+ "eval_samples_per_second": 157.012,
457
+ "eval_steps_per_second": 2.471,
458
+ "step": 6384
459
+ },
460
+ {
461
+ "epoch": 8.02,
462
+ "learning_rate": 6.633249791144527e-05,
463
+ "loss": 0.6115,
464
+ "step": 6400
465
+ },
466
+ {
467
+ "epoch": 8.15,
468
+ "learning_rate": 6.215538847117793e-05,
469
+ "loss": 0.6463,
470
+ "step": 6500
471
+ },
472
+ {
473
+ "epoch": 8.27,
474
+ "learning_rate": 5.79782790309106e-05,
475
+ "loss": 0.6183,
476
+ "step": 6600
477
+ },
478
+ {
479
+ "epoch": 8.4,
480
+ "learning_rate": 5.380116959064327e-05,
481
+ "loss": 0.6475,
482
+ "step": 6700
483
+ },
484
+ {
485
+ "epoch": 8.52,
486
+ "learning_rate": 4.962406015037593e-05,
487
+ "loss": 0.6437,
488
+ "step": 6800
489
+ },
490
+ {
491
+ "epoch": 8.65,
492
+ "learning_rate": 4.54469507101086e-05,
493
+ "loss": 0.6265,
494
+ "step": 6900
495
+ },
496
+ {
497
+ "epoch": 8.77,
498
+ "learning_rate": 4.1269841269841266e-05,
499
+ "loss": 0.6326,
500
+ "step": 7000
501
+ },
502
+ {
503
+ "epoch": 8.9,
504
+ "learning_rate": 3.709273182957393e-05,
505
+ "loss": 0.6261,
506
+ "step": 7100
507
+ },
508
+ {
509
+ "epoch": 9.0,
510
+ "eval_accuracy": 0.9024713150926743,
511
+ "eval_loss": 0.32949280738830566,
512
+ "eval_runtime": 42.5233,
513
+ "eval_samples_per_second": 159.865,
514
+ "eval_steps_per_second": 2.516,
515
+ "step": 7182
516
+ },
517
+ {
518
+ "epoch": 9.02,
519
+ "learning_rate": 3.2915622389306596e-05,
520
+ "loss": 0.6306,
521
+ "step": 7200
522
+ },
523
+ {
524
+ "epoch": 9.15,
525
+ "learning_rate": 2.873851294903926e-05,
526
+ "loss": 0.6362,
527
+ "step": 7300
528
+ },
529
+ {
530
+ "epoch": 9.27,
531
+ "learning_rate": 2.4561403508771925e-05,
532
+ "loss": 0.6318,
533
+ "step": 7400
534
+ },
535
+ {
536
+ "epoch": 9.4,
537
+ "learning_rate": 2.0384294068504594e-05,
538
+ "loss": 0.6506,
539
+ "step": 7500
540
+ },
541
+ {
542
+ "epoch": 9.52,
543
+ "learning_rate": 1.620718462823726e-05,
544
+ "loss": 0.6202,
545
+ "step": 7600
546
+ },
547
+ {
548
+ "epoch": 9.65,
549
+ "learning_rate": 1.2030075187969923e-05,
550
+ "loss": 0.6217,
551
+ "step": 7700
552
+ },
553
+ {
554
+ "epoch": 9.77,
555
+ "learning_rate": 7.852965747702588e-06,
556
+ "loss": 0.6307,
557
+ "step": 7800
558
+ },
559
+ {
560
+ "epoch": 9.9,
561
+ "learning_rate": 3.6758563074352545e-06,
562
+ "loss": 0.6188,
563
+ "step": 7900
564
+ },
565
+ {
566
+ "epoch": 10.0,
567
+ "eval_accuracy": 0.9024713150926743,
568
+ "eval_loss": 0.33220550417900085,
569
+ "eval_runtime": 42.0746,
570
+ "eval_samples_per_second": 161.57,
571
+ "eval_steps_per_second": 2.543,
572
+ "step": 7980
573
+ },
574
+ {
575
+ "epoch": 10.0,
576
+ "step": 7980,
577
+ "total_flos": 0.0,
578
+ "train_loss": 0.8915880279732229,
579
+ "train_runtime": 4529.6112,
580
+ "train_samples_per_second": 112.8,
581
+ "train_steps_per_second": 1.762
582
+ }
583
+ ],
584
+ "max_steps": 7980,
585
+ "num_train_epochs": 10,
586
+ "total_flos": 0.0,
587
+ "trial_name": null,
588
+ "trial_params": null
589
+ }