ales commited on
Commit
b8c52bf
·
1 Parent(s): 9a9c11d

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 0.25,
3
- "eval_loss": 0.5364237427711487,
4
- "eval_runtime": 16.6084,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.853,
7
- "eval_steps_per_second": 0.12,
8
- "eval_wer": 54.57875457875458,
9
- "train_loss": 0.0719480574131012,
10
- "train_runtime": 406.2172,
11
- "train_samples_per_second": 15.755,
12
- "train_steps_per_second": 0.492
13
  }
 
1
  {
2
+ "epoch": 0.33,
3
+ "eval_loss": 0.5074095726013184,
4
+ "eval_runtime": 16.8249,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.804,
7
+ "eval_steps_per_second": 0.119,
8
+ "eval_wer": 52.197802197802204,
9
+ "train_loss": 0.10702953418095906,
10
+ "train_runtime": 833.6007,
11
+ "train_samples_per_second": 11.516,
12
+ "train_steps_per_second": 0.36
13
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.25,
3
- "eval_loss": 0.5364237427711487,
4
- "eval_runtime": 16.6084,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.853,
7
- "eval_steps_per_second": 0.12,
8
- "eval_wer": 54.57875457875458
9
  }
 
1
  {
2
+ "epoch": 0.33,
3
+ "eval_loss": 0.5074095726013184,
4
+ "eval_runtime": 16.8249,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.804,
7
+ "eval_steps_per_second": 0.119,
8
+ "eval_wer": 52.197802197802204
9
  }
runs/Dec13_12-14-07_d7f040c448a8/events.out.tfevents.1670934730.d7f040c448a8.15037.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:621b7a2306709878e0351fd168351ff2ec7c15bf93824a12f4627acf91ccc932
3
+ size 358
train.log CHANGED
@@ -118,3 +118,9 @@
118
  {'loss': 0.4407, 'learning_rate': 5.033333333333333e-06, 'epoch': 0.33}
119
  {'eval_loss': 0.5046072602272034, 'eval_wer': 53.11355311355312, 'eval_runtime': 17.9261, 'eval_samples_per_second': 3.57, 'eval_steps_per_second': 0.112, 'epoch': 0.33}
120
  {'train_runtime': 833.6007, 'train_samples_per_second': 11.516, 'train_steps_per_second': 0.36, 'train_loss': 0.10702953418095906, 'epoch': 0.33}
 
 
 
 
 
 
 
118
  {'loss': 0.4407, 'learning_rate': 5.033333333333333e-06, 'epoch': 0.33}
119
  {'eval_loss': 0.5046072602272034, 'eval_wer': 53.11355311355312, 'eval_runtime': 17.9261, 'eval_samples_per_second': 3.57, 'eval_steps_per_second': 0.112, 'epoch': 0.33}
120
  {'train_runtime': 833.6007, 'train_samples_per_second': 11.516, 'train_steps_per_second': 0.36, 'train_loss': 0.10702953418095906, 'epoch': 0.33}
121
+ ***** train metrics *****
122
+ epoch = 0.33
123
+ train_loss = 0.107
124
+ train_runtime = 0:13:53.60
125
+ train_samples_per_second = 11.516
126
+ train_steps_per_second = 0.36
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 0.25,
3
- "train_loss": 0.0719480574131012,
4
- "train_runtime": 406.2172,
5
- "train_samples_per_second": 15.755,
6
- "train_steps_per_second": 0.492
7
  }
 
1
  {
2
+ "epoch": 0.33,
3
+ "train_loss": 0.10702953418095906,
4
+ "train_runtime": 833.6007,
5
+ "train_samples_per_second": 11.516,
6
+ "train_steps_per_second": 0.36
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 54.57875457875458,
3
- "best_model_checkpoint": "./checkpoint-160",
4
- "epoch": 0.25,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -317,18 +317,168 @@
317
  "step": 200
318
  },
319
  {
320
- "epoch": 0.25,
321
- "step": 200,
322
- "total_flos": 1.57560864768e+17,
323
- "train_loss": 0.0719480574131012,
324
- "train_runtime": 406.2172,
325
- "train_samples_per_second": 15.755,
326
- "train_steps_per_second": 0.492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  }
328
  ],
329
- "max_steps": 200,
330
  "num_train_epochs": 9223372036854775807,
331
- "total_flos": 1.57560864768e+17,
332
  "trial_name": null,
333
  "trial_params": null
334
  }
 
1
  {
2
+ "best_metric": 52.197802197802204,
3
+ "best_model_checkpoint": "./checkpoint-280",
4
+ "epoch": 0.3333333333333333,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
317
  "step": 200
318
  },
319
  {
320
+ "epoch": 0.03,
321
+ "learning_rate": 8.033333333333335e-06,
322
+ "loss": 0.1922,
323
+ "step": 210
324
+ },
325
+ {
326
+ "epoch": 0.03,
327
+ "eval_loss": 0.5239479541778564,
328
+ "eval_runtime": 16.5572,
329
+ "eval_samples_per_second": 3.865,
330
+ "eval_steps_per_second": 0.121,
331
+ "eval_wer": 55.494505494505496,
332
+ "step": 210
333
+ },
334
+ {
335
+ "epoch": 0.07,
336
+ "learning_rate": 7.7e-06,
337
+ "loss": 0.2229,
338
+ "step": 220
339
+ },
340
+ {
341
+ "epoch": 0.07,
342
+ "eval_loss": 0.5312361717224121,
343
+ "eval_runtime": 17.8694,
344
+ "eval_samples_per_second": 3.582,
345
+ "eval_steps_per_second": 0.112,
346
+ "eval_wer": 54.395604395604394,
347
+ "step": 220
348
+ },
349
+ {
350
+ "epoch": 0.1,
351
+ "learning_rate": 7.3666666666666676e-06,
352
+ "loss": 0.1976,
353
+ "step": 230
354
+ },
355
+ {
356
+ "epoch": 0.1,
357
+ "eval_loss": 0.5302589535713196,
358
+ "eval_runtime": 17.0912,
359
+ "eval_samples_per_second": 3.745,
360
+ "eval_steps_per_second": 0.117,
361
+ "eval_wer": 54.02930402930403,
362
+ "step": 230
363
+ },
364
+ {
365
+ "epoch": 0.13,
366
+ "learning_rate": 7.033333333333334e-06,
367
+ "loss": 0.2823,
368
+ "step": 240
369
+ },
370
+ {
371
+ "epoch": 0.13,
372
+ "eval_loss": 0.5269189476966858,
373
+ "eval_runtime": 17.9989,
374
+ "eval_samples_per_second": 3.556,
375
+ "eval_steps_per_second": 0.111,
376
+ "eval_wer": 54.02930402930403,
377
+ "step": 240
378
+ },
379
+ {
380
+ "epoch": 0.17,
381
+ "learning_rate": 6.700000000000001e-06,
382
+ "loss": 0.2265,
383
+ "step": 250
384
+ },
385
+ {
386
+ "epoch": 0.17,
387
+ "eval_loss": 0.5312862992286682,
388
+ "eval_runtime": 18.4593,
389
+ "eval_samples_per_second": 3.467,
390
+ "eval_steps_per_second": 0.108,
391
+ "eval_wer": 55.67765567765568,
392
+ "step": 250
393
+ },
394
+ {
395
+ "epoch": 0.2,
396
+ "learning_rate": 6.366666666666668e-06,
397
+ "loss": 0.3728,
398
+ "step": 260
399
+ },
400
+ {
401
+ "epoch": 0.2,
402
+ "eval_loss": 0.5128015279769897,
403
+ "eval_runtime": 18.2249,
404
+ "eval_samples_per_second": 3.512,
405
+ "eval_steps_per_second": 0.11,
406
+ "eval_wer": 53.47985347985348,
407
+ "step": 260
408
+ },
409
+ {
410
+ "epoch": 0.23,
411
+ "learning_rate": 6.033333333333335e-06,
412
+ "loss": 0.3738,
413
+ "step": 270
414
+ },
415
+ {
416
+ "epoch": 0.23,
417
+ "eval_loss": 0.5025143623352051,
418
+ "eval_runtime": 17.2543,
419
+ "eval_samples_per_second": 3.709,
420
+ "eval_steps_per_second": 0.116,
421
+ "eval_wer": 52.74725274725275,
422
+ "step": 270
423
+ },
424
+ {
425
+ "epoch": 0.27,
426
+ "learning_rate": 5.7e-06,
427
+ "loss": 0.488,
428
+ "step": 280
429
+ },
430
+ {
431
+ "epoch": 0.27,
432
+ "eval_loss": 0.5074095726013184,
433
+ "eval_runtime": 18.4062,
434
+ "eval_samples_per_second": 3.477,
435
+ "eval_steps_per_second": 0.109,
436
+ "eval_wer": 52.197802197802204,
437
+ "step": 280
438
+ },
439
+ {
440
+ "epoch": 0.3,
441
+ "learning_rate": 5.366666666666666e-06,
442
+ "loss": 0.4142,
443
+ "step": 290
444
+ },
445
+ {
446
+ "epoch": 0.3,
447
+ "eval_loss": 0.5057792663574219,
448
+ "eval_runtime": 16.5894,
449
+ "eval_samples_per_second": 3.858,
450
+ "eval_steps_per_second": 0.121,
451
+ "eval_wer": 52.56410256410257,
452
+ "step": 290
453
+ },
454
+ {
455
+ "epoch": 0.33,
456
+ "learning_rate": 5.033333333333333e-06,
457
+ "loss": 0.4407,
458
+ "step": 300
459
+ },
460
+ {
461
+ "epoch": 0.33,
462
+ "eval_loss": 0.5046072602272034,
463
+ "eval_runtime": 17.9261,
464
+ "eval_samples_per_second": 3.57,
465
+ "eval_steps_per_second": 0.112,
466
+ "eval_wer": 53.11355311355312,
467
+ "step": 300
468
+ },
469
+ {
470
+ "epoch": 0.33,
471
+ "step": 300,
472
+ "total_flos": 2.36341297152e+17,
473
+ "train_loss": 0.10702953418095906,
474
+ "train_runtime": 833.6007,
475
+ "train_samples_per_second": 11.516,
476
+ "train_steps_per_second": 0.36
477
  }
478
  ],
479
+ "max_steps": 300,
480
  "num_train_epochs": 9223372036854775807,
481
+ "total_flos": 2.36341297152e+17,
482
  "trial_name": null,
483
  "trial_params": null
484
  }