marinone94 commited on
Commit
d71bd83
β€’
1 Parent(s): a5ea3ed

Training in progress, step 700

Browse files
{checkpoint-400 β†’ checkpoint-700}/config.json RENAMED
File without changes
{checkpoint-400 β†’ checkpoint-700}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf161ebd936c8ad7c9d8aa09d6cfacea3c1f1b7830996289de6c09c5b17597b5
3
  size 2490346001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069822e7cb50437bf3b0c3c681ba2097aca5275abda255e4fc6af68096387a0f
3
  size 2490346001
{checkpoint-400 β†’ checkpoint-700}/preprocessor_config.json RENAMED
File without changes
{checkpoint-400 β†’ checkpoint-700}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3615598ae508e9ab81f60392073741effb41877fe43b3faf976c78e298f8b7ce
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82fd4ef9d1ee16be752146f67271d9079ee4c70b3fc0deb5e49afca834a8366
3
  size 1262067185
{checkpoint-400 β†’ checkpoint-700}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6539ac9bd8d5283f6052bfeb40b134408bae7309c4985eb1a92a55b39e90aa50
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87ff61a351af84ca56c488c49a3cc628f0fcf141b18f2d6a7cc51cc4bc0d60cf
3
  size 14567
{checkpoint-400 β†’ checkpoint-700}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c521307eec3705f40e440815e13caacabdaf54c21d0d7c9b9858aad14a054aa
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78925f079cf346f5de839091ad9ea905df589af85f400d7d4ba825eb420db33
3
  size 559
{checkpoint-400 β†’ checkpoint-700}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46afeb3652f3b68ce369c52d170c5c98db425df4214d18b1edefe2376ca2a92e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8642bc4d6c53b743cbfe106c9335222647650f89ec52d95e575d8e1f87f4f2be
3
  size 623
{checkpoint-400 β†’ checkpoint-700}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1845444059976932,
5
- "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -282,11 +282,218 @@
282
  "eval_steps_per_second": 1.115,
283
  "eval_wer": 1.0,
284
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  }
286
  ],
287
  "max_steps": 6501,
288
  "num_train_epochs": 3,
289
- "total_flos": 7.566114292968407e+18,
290
  "trial_name": null,
291
  "trial_params": null
292
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3229527104959631,
5
+ "global_step": 700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
282
  "eval_steps_per_second": 1.115,
283
  "eval_wer": 1.0,
284
  "step": 400
285
+ },
286
+ {
287
+ "epoch": 0.19,
288
+ "learning_rate": 7.183281004709575e-05,
289
+ "loss": 3.0018,
290
+ "step": 410
291
+ },
292
+ {
293
+ "epoch": 0.19,
294
+ "learning_rate": 7.171507064364207e-05,
295
+ "loss": 2.9947,
296
+ "step": 420
297
+ },
298
+ {
299
+ "epoch": 0.2,
300
+ "learning_rate": 7.159733124018838e-05,
301
+ "loss": 2.9935,
302
+ "step": 430
303
+ },
304
+ {
305
+ "epoch": 0.2,
306
+ "learning_rate": 7.147959183673469e-05,
307
+ "loss": 3.0818,
308
+ "step": 440
309
+ },
310
+ {
311
+ "epoch": 0.21,
312
+ "learning_rate": 7.137362637362637e-05,
313
+ "loss": 3.3386,
314
+ "step": 450
315
+ },
316
+ {
317
+ "epoch": 0.21,
318
+ "learning_rate": 7.125588697017268e-05,
319
+ "loss": 3.0803,
320
+ "step": 460
321
+ },
322
+ {
323
+ "epoch": 0.22,
324
+ "learning_rate": 7.1138147566719e-05,
325
+ "loss": 3.1655,
326
+ "step": 470
327
+ },
328
+ {
329
+ "epoch": 0.22,
330
+ "learning_rate": 7.10204081632653e-05,
331
+ "loss": 3.0899,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 0.23,
336
+ "learning_rate": 7.090266875981161e-05,
337
+ "loss": 3.1638,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 0.23,
342
+ "learning_rate": 7.079670329670328e-05,
343
+ "loss": 3.3518,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 0.23,
348
+ "eval_loss": 2.9412434101104736,
349
+ "eval_runtime": 130.1575,
350
+ "eval_samples_per_second": 35.495,
351
+ "eval_steps_per_second": 1.114,
352
+ "eval_wer": 1.0,
353
+ "step": 500
354
+ },
355
+ {
356
+ "epoch": 0.24,
357
+ "learning_rate": 7.06789638932496e-05,
358
+ "loss": 2.9352,
359
+ "step": 510
360
+ },
361
+ {
362
+ "epoch": 0.24,
363
+ "learning_rate": 7.056122448979591e-05,
364
+ "loss": 2.9405,
365
+ "step": 520
366
+ },
367
+ {
368
+ "epoch": 0.24,
369
+ "learning_rate": 7.044348508634222e-05,
370
+ "loss": 2.9125,
371
+ "step": 530
372
+ },
373
+ {
374
+ "epoch": 0.25,
375
+ "learning_rate": 7.032574568288854e-05,
376
+ "loss": 2.9401,
377
+ "step": 540
378
+ },
379
+ {
380
+ "epoch": 0.25,
381
+ "learning_rate": 7.021978021978021e-05,
382
+ "loss": 3.1443,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 0.26,
387
+ "learning_rate": 7.010204081632653e-05,
388
+ "loss": 2.7692,
389
+ "step": 560
390
+ },
391
+ {
392
+ "epoch": 0.26,
393
+ "learning_rate": 6.998430141287284e-05,
394
+ "loss": 2.7574,
395
+ "step": 570
396
+ },
397
+ {
398
+ "epoch": 0.27,
399
+ "learning_rate": 6.986656200941914e-05,
400
+ "loss": 2.706,
401
+ "step": 580
402
+ },
403
+ {
404
+ "epoch": 0.27,
405
+ "learning_rate": 6.974882260596545e-05,
406
+ "loss": 2.7155,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 0.28,
411
+ "learning_rate": 6.964285714285713e-05,
412
+ "loss": 2.8747,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 0.28,
417
+ "eval_loss": 2.5006003379821777,
418
+ "eval_runtime": 129.3111,
419
+ "eval_samples_per_second": 35.728,
420
+ "eval_steps_per_second": 1.121,
421
+ "eval_wer": 1.0,
422
+ "step": 600
423
+ },
424
+ {
425
+ "epoch": 0.28,
426
+ "learning_rate": 6.952511773940345e-05,
427
+ "loss": 2.5368,
428
+ "step": 610
429
+ },
430
+ {
431
+ "epoch": 0.29,
432
+ "learning_rate": 6.940737833594976e-05,
433
+ "loss": 2.5942,
434
+ "step": 620
435
+ },
436
+ {
437
+ "epoch": 0.29,
438
+ "learning_rate": 6.928963893249607e-05,
439
+ "loss": 2.4907,
440
+ "step": 630
441
+ },
442
+ {
443
+ "epoch": 0.3,
444
+ "learning_rate": 6.917189952904238e-05,
445
+ "loss": 2.472,
446
+ "step": 640
447
+ },
448
+ {
449
+ "epoch": 0.3,
450
+ "learning_rate": 6.907770800627943e-05,
451
+ "loss": 2.6826,
452
+ "step": 650
453
+ },
454
+ {
455
+ "epoch": 0.3,
456
+ "learning_rate": 6.895996860282574e-05,
457
+ "loss": 2.3167,
458
+ "step": 660
459
+ },
460
+ {
461
+ "epoch": 0.31,
462
+ "learning_rate": 6.884222919937205e-05,
463
+ "loss": 2.3457,
464
+ "step": 670
465
+ },
466
+ {
467
+ "epoch": 0.31,
468
+ "learning_rate": 6.872448979591836e-05,
469
+ "loss": 2.3532,
470
+ "step": 680
471
+ },
472
+ {
473
+ "epoch": 0.32,
474
+ "learning_rate": 6.860675039246468e-05,
475
+ "loss": 2.4822,
476
+ "step": 690
477
+ },
478
+ {
479
+ "epoch": 0.32,
480
+ "learning_rate": 6.850078492935636e-05,
481
+ "loss": 3.1002,
482
+ "step": 700
483
+ },
484
+ {
485
+ "epoch": 0.32,
486
+ "eval_loss": 2.819200277328491,
487
+ "eval_runtime": 129.5044,
488
+ "eval_samples_per_second": 35.674,
489
+ "eval_steps_per_second": 1.12,
490
+ "eval_wer": 0.9999694058618369,
491
+ "step": 700
492
  }
493
  ],
494
  "max_steps": 6501,
495
  "num_train_epochs": 3,
496
+ "total_flos": 1.3228060515474647e+19,
497
  "trial_name": null,
498
  "trial_params": null
499
  }
{checkpoint-400 β†’ checkpoint-700}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8516ba404c3005619dab698e95eba4925ba995d7c93b7c12a9a3ec6e076a633a
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82fd4ef9d1ee16be752146f67271d9079ee4c70b3fc0deb5e49afca834a8366
3
  size 1262067185