JulienRPA commited on
Commit
d4367d1
1 Parent(s): 7726262

Training in progress, step 8000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:546cd4e7ec947305a665edaa2e582e225d7b8301eaa78f72956ea9b0b6882be6
3
  size 2000137067
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43f071cc4836a60597cf222c8478e4cd1e1111165dfb21cf6697b97c233180f
3
  size 2000137067
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:938f724c0f91c737457d75986a81bb14510b0f67d394e6715914ee6427257f11
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df36a6fbb124fad805fa0396d55c0b39c87ddb96291e2ba989cc66d6b6934b19
3
  size 1002469625
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db097afe89ed3c3eb23a6fe209105077506d8139157678f90acd89f98273927d
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26de261b54d4d4f3a241740bf2341a65cfa642b817cbd9e14d3e07fd77f01229
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ace18575c64fb9061a4bdb4187294f04e31e9a65a2e4da680ca78aeef9f963e2
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fe3c6aa062ab01fcab2b20e4eaf1d3d1ce48b9ce3761ac8c445680169e65ca
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.1928721174004195,
5
- "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -399,11 +399,142 @@
399
  "eval_samples_per_second": 2.847,
400
  "eval_steps_per_second": 0.357,
401
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  }
403
  ],
404
  "max_steps": 11448,
405
  "num_train_epochs": 8,
406
- "total_flos": 3958404255138432.0,
407
  "trial_name": null,
408
  "trial_params": null
409
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.590496156533892,
5
+ "global_step": 8000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
399
  "eval_samples_per_second": 2.847,
400
  "eval_steps_per_second": 0.357,
401
  "step": 6000
402
+ },
403
+ {
404
+ "epoch": 4.26,
405
+ "learning_rate": 2.9883772910147524e-05,
406
+ "loss": 1.5968,
407
+ "step": 6100
408
+ },
409
+ {
410
+ "epoch": 4.33,
411
+ "learning_rate": 2.9324988824318283e-05,
412
+ "loss": 1.5813,
413
+ "step": 6200
414
+ },
415
+ {
416
+ "epoch": 4.4,
417
+ "learning_rate": 2.876620473848905e-05,
418
+ "loss": 1.5421,
419
+ "step": 6300
420
+ },
421
+ {
422
+ "epoch": 4.47,
423
+ "learning_rate": 2.8207420652659816e-05,
424
+ "loss": 1.4852,
425
+ "step": 6400
426
+ },
427
+ {
428
+ "epoch": 4.54,
429
+ "learning_rate": 2.7648636566830576e-05,
430
+ "loss": 1.5027,
431
+ "step": 6500
432
+ },
433
+ {
434
+ "epoch": 4.61,
435
+ "learning_rate": 2.7089852481001342e-05,
436
+ "loss": 1.4951,
437
+ "step": 6600
438
+ },
439
+ {
440
+ "epoch": 4.68,
441
+ "learning_rate": 2.653106839517211e-05,
442
+ "loss": 1.4803,
443
+ "step": 6700
444
+ },
445
+ {
446
+ "epoch": 4.75,
447
+ "learning_rate": 2.597228430934287e-05,
448
+ "loss": 1.4127,
449
+ "step": 6800
450
+ },
451
+ {
452
+ "epoch": 4.82,
453
+ "learning_rate": 2.5413500223513638e-05,
454
+ "loss": 1.3896,
455
+ "step": 6900
456
+ },
457
+ {
458
+ "epoch": 4.89,
459
+ "learning_rate": 2.48547161376844e-05,
460
+ "loss": 1.3656,
461
+ "step": 7000
462
+ },
463
+ {
464
+ "epoch": 4.96,
465
+ "learning_rate": 2.4295932051855164e-05,
466
+ "loss": 1.3432,
467
+ "step": 7100
468
+ },
469
+ {
470
+ "epoch": 5.03,
471
+ "learning_rate": 2.373714796602593e-05,
472
+ "loss": 1.2224,
473
+ "step": 7200
474
+ },
475
+ {
476
+ "epoch": 5.1,
477
+ "learning_rate": 2.3178363880196693e-05,
478
+ "loss": 1.1396,
479
+ "step": 7300
480
+ },
481
+ {
482
+ "epoch": 5.17,
483
+ "learning_rate": 2.2619579794367456e-05,
484
+ "loss": 1.1475,
485
+ "step": 7400
486
+ },
487
+ {
488
+ "epoch": 5.24,
489
+ "learning_rate": 2.206079570853822e-05,
490
+ "loss": 1.0669,
491
+ "step": 7500
492
+ },
493
+ {
494
+ "epoch": 5.31,
495
+ "learning_rate": 2.1502011622708985e-05,
496
+ "loss": 1.1356,
497
+ "step": 7600
498
+ },
499
+ {
500
+ "epoch": 5.38,
501
+ "learning_rate": 2.0943227536879752e-05,
502
+ "loss": 1.0965,
503
+ "step": 7700
504
+ },
505
+ {
506
+ "epoch": 5.45,
507
+ "learning_rate": 2.0384443451050515e-05,
508
+ "loss": 1.1086,
509
+ "step": 7800
510
+ },
511
+ {
512
+ "epoch": 5.52,
513
+ "learning_rate": 1.982565936522128e-05,
514
+ "loss": 1.0642,
515
+ "step": 7900
516
+ },
517
+ {
518
+ "epoch": 5.59,
519
+ "learning_rate": 1.9266875279392044e-05,
520
+ "loss": 1.0717,
521
+ "step": 8000
522
+ },
523
+ {
524
+ "epoch": 5.59,
525
+ "eval_bleu": 69.9606,
526
+ "eval_em": 0.0199,
527
+ "eval_gen_len": 46.0722,
528
+ "eval_loss": 1.26251220703125,
529
+ "eval_runtime": 355.6397,
530
+ "eval_samples_per_second": 3.388,
531
+ "eval_steps_per_second": 0.425,
532
+ "step": 8000
533
  }
534
  ],
535
  "max_steps": 11448,
536
  "num_train_epochs": 8,
537
+ "total_flos": 5257694441864832.0,
538
  "trial_name": null,
539
  "trial_params": null
540
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:938f724c0f91c737457d75986a81bb14510b0f67d394e6715914ee6427257f11
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df36a6fbb124fad805fa0396d55c0b39c87ddb96291e2ba989cc66d6b6934b19
3
  size 1002469625
runs/Jun05_10-45-59_0a95bf9de5ac/events.out.tfevents.1685962630.0a95bf9de5ac.3272.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a959ff46eca4f3545c8c05e80af7a2fe5e478b7b3102f1f41ccc3cfcf54b040
3
- size 19210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0edc726214577185b7f744ad847d5a0a4a05b3fab28bad4d6a50d38bc569dd95
3
+ size 22766