JulienRPA commited on
Commit
55d26a2
1 Parent(s): d4367d1

Training in progress, step 10000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e43f071cc4836a60597cf222c8478e4cd1e1111165dfb21cf6697b97c233180f
3
  size 2000137067
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4a84163d9e3f58c2ca6d7ae1232f180bd2352a5dd7f211bb9ecff86446527b
3
  size 2000137067
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df36a6fbb124fad805fa0396d55c0b39c87ddb96291e2ba989cc66d6b6934b19
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0f5499abb66ce84c1a439abb41338b00f823616fdcd09cd0b02ac791cb90e7d
3
  size 1002469625
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26de261b54d4d4f3a241740bf2341a65cfa642b817cbd9e14d3e07fd77f01229
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027bacb9a66f7124054acdc9881eec1028c531e19ceae1a2fb05ecf19f4335f1
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58fe3c6aa062ab01fcab2b20e4eaf1d3d1ce48b9ce3761ac8c445680169e65ca
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ae23ef329f4306e7c601317d8e62a0d9cdc03945f103aed1e4405fe8f53be5
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.590496156533892,
5
- "global_step": 8000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -530,11 +530,142 @@
530
  "eval_samples_per_second": 3.388,
531
  "eval_steps_per_second": 0.425,
532
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  }
534
  ],
535
  "max_steps": 11448,
536
  "num_train_epochs": 8,
537
- "total_flos": 5257694441864832.0,
538
  "trial_name": null,
539
  "trial_params": null
540
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.988120195667365,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
530
  "eval_samples_per_second": 3.388,
531
  "eval_steps_per_second": 0.425,
532
  "step": 8000
533
+ },
534
+ {
535
+ "epoch": 5.66,
536
+ "learning_rate": 1.8708091193562807e-05,
537
+ "loss": 1.0546,
538
+ "step": 8100
539
+ },
540
+ {
541
+ "epoch": 5.73,
542
+ "learning_rate": 1.8149307107733573e-05,
543
+ "loss": 1.0579,
544
+ "step": 8200
545
+ },
546
+ {
547
+ "epoch": 5.8,
548
+ "learning_rate": 1.7590523021904336e-05,
549
+ "loss": 1.0204,
550
+ "step": 8300
551
+ },
552
+ {
553
+ "epoch": 5.87,
554
+ "learning_rate": 1.70317389360751e-05,
555
+ "loss": 1.0398,
556
+ "step": 8400
557
+ },
558
+ {
559
+ "epoch": 5.94,
560
+ "learning_rate": 1.6472954850245866e-05,
561
+ "loss": 0.9992,
562
+ "step": 8500
563
+ },
564
+ {
565
+ "epoch": 6.01,
566
+ "learning_rate": 1.5914170764416632e-05,
567
+ "loss": 0.9756,
568
+ "step": 8600
569
+ },
570
+ {
571
+ "epoch": 6.08,
572
+ "learning_rate": 1.5355386678587395e-05,
573
+ "loss": 0.8385,
574
+ "step": 8700
575
+ },
576
+ {
577
+ "epoch": 6.15,
578
+ "learning_rate": 1.479660259275816e-05,
579
+ "loss": 0.8815,
580
+ "step": 8800
581
+ },
582
+ {
583
+ "epoch": 6.22,
584
+ "learning_rate": 1.4237818506928924e-05,
585
+ "loss": 0.8447,
586
+ "step": 8900
587
+ },
588
+ {
589
+ "epoch": 6.29,
590
+ "learning_rate": 1.3679034421099687e-05,
591
+ "loss": 0.8553,
592
+ "step": 9000
593
+ },
594
+ {
595
+ "epoch": 6.36,
596
+ "learning_rate": 1.312025033527045e-05,
597
+ "loss": 0.8188,
598
+ "step": 9100
599
+ },
600
+ {
601
+ "epoch": 6.43,
602
+ "learning_rate": 1.2561466249441217e-05,
603
+ "loss": 0.8241,
604
+ "step": 9200
605
+ },
606
+ {
607
+ "epoch": 6.5,
608
+ "learning_rate": 1.2002682163611981e-05,
609
+ "loss": 0.8118,
610
+ "step": 9300
611
+ },
612
+ {
613
+ "epoch": 6.57,
614
+ "learning_rate": 1.1443898077782746e-05,
615
+ "loss": 0.8357,
616
+ "step": 9400
617
+ },
618
+ {
619
+ "epoch": 6.64,
620
+ "learning_rate": 1.0885113991953509e-05,
621
+ "loss": 0.8063,
622
+ "step": 9500
623
+ },
624
+ {
625
+ "epoch": 6.71,
626
+ "learning_rate": 1.0326329906124274e-05,
627
+ "loss": 0.8263,
628
+ "step": 9600
629
+ },
630
+ {
631
+ "epoch": 6.78,
632
+ "learning_rate": 9.767545820295038e-06,
633
+ "loss": 0.8064,
634
+ "step": 9700
635
+ },
636
+ {
637
+ "epoch": 6.85,
638
+ "learning_rate": 9.208761734465803e-06,
639
+ "loss": 0.7858,
640
+ "step": 9800
641
+ },
642
+ {
643
+ "epoch": 6.92,
644
+ "learning_rate": 8.649977648636568e-06,
645
+ "loss": 0.7854,
646
+ "step": 9900
647
+ },
648
+ {
649
+ "epoch": 6.99,
650
+ "learning_rate": 8.09119356280733e-06,
651
+ "loss": 0.7765,
652
+ "step": 10000
653
+ },
654
+ {
655
+ "epoch": 6.99,
656
+ "eval_bleu": 74.7723,
657
+ "eval_em": 0.0349,
658
+ "eval_gen_len": 46.1685,
659
+ "eval_loss": 1.0809996128082275,
660
+ "eval_runtime": 352.8566,
661
+ "eval_samples_per_second": 3.415,
662
+ "eval_steps_per_second": 0.428,
663
+ "step": 10000
664
  }
665
  ],
666
  "max_steps": 11448,
667
  "num_train_epochs": 8,
668
+ "total_flos": 6573454605451008.0,
669
  "trial_name": null,
670
  "trial_params": null
671
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df36a6fbb124fad805fa0396d55c0b39c87ddb96291e2ba989cc66d6b6934b19
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0f5499abb66ce84c1a439abb41338b00f823616fdcd09cd0b02ac791cb90e7d
3
  size 1002469625
runs/Jun05_10-45-59_0a95bf9de5ac/events.out.tfevents.1685962630.0a95bf9de5ac.3272.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0edc726214577185b7f744ad847d5a0a4a05b3fab28bad4d6a50d38bc569dd95
3
- size 22766
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a5d5b48ed4e7070ca4651c54a1acddc9ff99d8b562927254692e8f85dac97b
3
+ size 26322