Bingsu commited on
Commit
88ea5b8
1 Parent(s): eb69dd3

Training in progress, step 560000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d17d11c6b724af8ea30fb05f7b9f206f1931ddd49ca15fc7205cddd2e9832348
3
  size 586828837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047de29e872fa886935b0856cae368679a5024ec78d3b02056971d0dafe03f46
3
  size 586828837
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15c30ebd022a5fa50904724c61cf334da708e967be5166a0a78886697c74d050
3
  size 146774203
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2db06e17c94930e25ba4fa153fb1d09bc548975dd61b046eadd4ef82210ad5b
3
  size 146774203
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e43e0eaad579d0c939892b8662eb3e95a1d059dc213782ff9d052bd6e7270801
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6925d15f0ca8b085673c2a9c495fa03dd265589a6d0e5da63276f20be7165697
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b6e2b6cd430c3d4500f9b7bb538e1dd473e0b991a8a35061eb4ed746fc98502
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1866493c6437f9be9b061bda7fb54561f6f075e18eb8ff9def3d978f033c740
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db8055e57f05540543729c217695af567f894e03204f78d3fde07bec2149433d
3
  size 733555848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f637d7b8ff1440e6b42939ee3d5db1515f248a64a9ccc57bfd7e929c8ce06320
3
  size 733555848
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3634871318868793,
5
- "global_step": 550000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -16506,11 +16506,311 @@
16506
  "learning_rate": 0.007174402227596965,
16507
  "loss": 8.0567,
16508
  "step": 550000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16509
  }
16510
  ],
16511
  "max_steps": 1000000,
16512
  "num_train_epochs": 5,
16513
- "total_flos": 8.766050956131656e+17,
16514
  "trial_name": null,
16515
  "trial_params": null
16516
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.4064596251939134,
5
+ "global_step": 560000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
16506
  "learning_rate": 0.007174402227596965,
16507
  "loss": 8.0567,
16508
  "step": 550000
16509
+ },
16510
+ {
16511
+ "epoch": 2.36,
16512
+ "learning_rate": 0.007170359981885019,
16513
+ "loss": 8.0657,
16514
+ "step": 550200
16515
+ },
16516
+ {
16517
+ "epoch": 2.37,
16518
+ "learning_rate": 0.007166315987569351,
16519
+ "loss": 8.0443,
16520
+ "step": 550400
16521
+ },
16522
+ {
16523
+ "epoch": 2.37,
16524
+ "learning_rate": 0.007162270247908135,
16525
+ "loss": 8.0611,
16526
+ "step": 550600
16527
+ },
16528
+ {
16529
+ "epoch": 2.37,
16530
+ "learning_rate": 0.00715824300789773,
16531
+ "loss": 8.061,
16532
+ "step": 550800
16533
+ },
16534
+ {
16535
+ "epoch": 2.37,
16536
+ "learning_rate": 0.007154193796011567,
16537
+ "loss": 8.0521,
16538
+ "step": 551000
16539
+ },
16540
+ {
16541
+ "epoch": 2.37,
16542
+ "learning_rate": 0.007150142848546487,
16543
+ "loss": 8.057,
16544
+ "step": 551200
16545
+ },
16546
+ {
16547
+ "epoch": 2.37,
16548
+ "learning_rate": 0.007146090168766263,
16549
+ "loss": 8.0605,
16550
+ "step": 551400
16551
+ },
16552
+ {
16553
+ "epoch": 2.37,
16554
+ "learning_rate": 0.007142035759936066,
16555
+ "loss": 8.0472,
16556
+ "step": 551600
16557
+ },
16558
+ {
16559
+ "epoch": 2.37,
16560
+ "learning_rate": 0.007137979625322462,
16561
+ "loss": 8.0535,
16562
+ "step": 551800
16563
+ },
16564
+ {
16565
+ "epoch": 2.37,
16566
+ "learning_rate": 0.0071339217681934035,
16567
+ "loss": 8.0519,
16568
+ "step": 552000
16569
+ },
16570
+ {
16571
+ "epoch": 2.37,
16572
+ "learning_rate": 0.007129862191818233,
16573
+ "loss": 8.0626,
16574
+ "step": 552200
16575
+ },
16576
+ {
16577
+ "epoch": 2.37,
16578
+ "learning_rate": 0.007125800899467677,
16579
+ "loss": 8.0612,
16580
+ "step": 552400
16581
+ },
16582
+ {
16583
+ "epoch": 2.37,
16584
+ "learning_rate": 0.007121737894413849,
16585
+ "loss": 8.051,
16586
+ "step": 552600
16587
+ },
16588
+ {
16589
+ "epoch": 2.38,
16590
+ "learning_rate": 0.007117693507749444,
16591
+ "loss": 8.0404,
16592
+ "step": 552800
16593
+ },
16594
+ {
16595
+ "epoch": 2.38,
16596
+ "learning_rate": 0.0071136270956335375,
16597
+ "loss": 8.0493,
16598
+ "step": 553000
16599
+ },
16600
+ {
16601
+ "epoch": 2.38,
16602
+ "learning_rate": 0.007109558980622571,
16603
+ "loss": 8.0652,
16604
+ "step": 553200
16605
+ },
16606
+ {
16607
+ "epoch": 2.38,
16608
+ "learning_rate": 0.007105489165994151,
16609
+ "loss": 8.0499,
16610
+ "step": 553400
16611
+ },
16612
+ {
16613
+ "epoch": 2.38,
16614
+ "learning_rate": 0.007101417655027253,
16615
+ "loss": 8.0657,
16616
+ "step": 553600
16617
+ },
16618
+ {
16619
+ "epoch": 2.38,
16620
+ "learning_rate": 0.007097344451002222,
16621
+ "loss": 8.0415,
16622
+ "step": 553800
16623
+ },
16624
+ {
16625
+ "epoch": 2.38,
16626
+ "learning_rate": 0.0070932695572007605,
16627
+ "loss": 8.0431,
16628
+ "step": 554000
16629
+ },
16630
+ {
16631
+ "epoch": 2.38,
16632
+ "learning_rate": 0.007089192976905942,
16633
+ "loss": 8.0563,
16634
+ "step": 554200
16635
+ },
16636
+ {
16637
+ "epoch": 2.38,
16638
+ "learning_rate": 0.007085114713402188,
16639
+ "loss": 8.0635,
16640
+ "step": 554400
16641
+ },
16642
+ {
16643
+ "epoch": 2.38,
16644
+ "learning_rate": 0.007081034769975286,
16645
+ "loss": 8.0628,
16646
+ "step": 554600
16647
+ },
16648
+ {
16649
+ "epoch": 2.38,
16650
+ "learning_rate": 0.00707697356217788,
16651
+ "loss": 8.0692,
16652
+ "step": 554800
16653
+ },
16654
+ {
16655
+ "epoch": 2.38,
16656
+ "learning_rate": 0.007072890277125996,
16657
+ "loss": 8.0654,
16658
+ "step": 555000
16659
+ },
16660
+ {
16661
+ "epoch": 2.39,
16662
+ "learning_rate": 0.00706880532199997,
16663
+ "loss": 8.0595,
16664
+ "step": 555200
16665
+ },
16666
+ {
16667
+ "epoch": 2.39,
16668
+ "learning_rate": 0.007064718700090976,
16669
+ "loss": 8.0514,
16670
+ "step": 555400
16671
+ },
16672
+ {
16673
+ "epoch": 2.39,
16674
+ "learning_rate": 0.007060630414691535,
16675
+ "loss": 8.0607,
16676
+ "step": 555600
16677
+ },
16678
+ {
16679
+ "epoch": 2.39,
16680
+ "learning_rate": 0.0070565404690954995,
16681
+ "loss": 8.0671,
16682
+ "step": 555800
16683
+ },
16684
+ {
16685
+ "epoch": 2.39,
16686
+ "learning_rate": 0.007052448866598068,
16687
+ "loss": 8.0563,
16688
+ "step": 556000
16689
+ },
16690
+ {
16691
+ "epoch": 2.39,
16692
+ "learning_rate": 0.00704835561049577,
16693
+ "loss": 8.0695,
16694
+ "step": 556200
16695
+ },
16696
+ {
16697
+ "epoch": 2.39,
16698
+ "learning_rate": 0.007044260704086468,
16699
+ "loss": 8.0473,
16700
+ "step": 556400
16701
+ },
16702
+ {
16703
+ "epoch": 2.39,
16704
+ "learning_rate": 0.007040164150669354,
16705
+ "loss": 8.0468,
16706
+ "step": 556600
16707
+ },
16708
+ {
16709
+ "epoch": 2.39,
16710
+ "learning_rate": 0.007036086448613831,
16711
+ "loss": 8.0611,
16712
+ "step": 556800
16713
+ },
16714
+ {
16715
+ "epoch": 2.39,
16716
+ "learning_rate": 0.007031986619277786,
16717
+ "loss": 8.0643,
16718
+ "step": 557000
16719
+ },
16720
+ {
16721
+ "epoch": 2.39,
16722
+ "learning_rate": 0.0070278851528229385,
16723
+ "loss": 8.0717,
16724
+ "step": 557200
16725
+ },
16726
+ {
16727
+ "epoch": 2.4,
16728
+ "learning_rate": 0.0070237820525537635,
16729
+ "loss": 8.0483,
16730
+ "step": 557400
16731
+ },
16732
+ {
16733
+ "epoch": 2.4,
16734
+ "learning_rate": 0.007019677321776058,
16735
+ "loss": 8.0599,
16736
+ "step": 557600
16737
+ },
16738
+ {
16739
+ "epoch": 2.4,
16740
+ "learning_rate": 0.007015570963796927,
16741
+ "loss": 8.0636,
16742
+ "step": 557800
16743
+ },
16744
+ {
16745
+ "epoch": 2.4,
16746
+ "learning_rate": 0.00701146298192479,
16747
+ "loss": 8.0634,
16748
+ "step": 558000
16749
+ },
16750
+ {
16751
+ "epoch": 2.4,
16752
+ "learning_rate": 0.007007353379469375,
16753
+ "loss": 8.0698,
16754
+ "step": 558200
16755
+ },
16756
+ {
16757
+ "epoch": 2.4,
16758
+ "learning_rate": 0.007003242159741711,
16759
+ "loss": 8.0581,
16760
+ "step": 558400
16761
+ },
16762
+ {
16763
+ "epoch": 2.4,
16764
+ "learning_rate": 0.0069991293260541374,
16765
+ "loss": 8.0534,
16766
+ "step": 558600
16767
+ },
16768
+ {
16769
+ "epoch": 2.4,
16770
+ "learning_rate": 0.006995035457942955,
16771
+ "loss": 8.0641,
16772
+ "step": 558800
16773
+ },
16774
+ {
16775
+ "epoch": 2.4,
16776
+ "learning_rate": 0.006990919414306169,
16777
+ "loss": 8.0571,
16778
+ "step": 559000
16779
+ },
16780
+ {
16781
+ "epoch": 2.4,
16782
+ "learning_rate": 0.0069868017666376864,
16783
+ "loss": 8.0577,
16784
+ "step": 559200
16785
+ },
16786
+ {
16787
+ "epoch": 2.4,
16788
+ "learning_rate": 0.006982703118473221,
16789
+ "loss": 8.0753,
16790
+ "step": 559400
16791
+ },
16792
+ {
16793
+ "epoch": 2.4,
16794
+ "learning_rate": 0.006978582280673894,
16795
+ "loss": 8.0559,
16796
+ "step": 559600
16797
+ },
16798
+ {
16799
+ "epoch": 2.41,
16800
+ "learning_rate": 0.006974459848782675,
16801
+ "loss": 8.0529,
16802
+ "step": 559800
16803
+ },
16804
+ {
16805
+ "epoch": 2.41,
16806
+ "learning_rate": 0.006970335826120932,
16807
+ "loss": 8.046,
16808
+ "step": 560000
16809
  }
16810
  ],
16811
  "max_steps": 1000000,
16812
  "num_train_epochs": 5,
16813
+ "total_flos": 8.925433954352456e+17,
16814
  "trial_name": null,
16815
  "trial_params": null
16816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15c30ebd022a5fa50904724c61cf334da708e967be5166a0a78886697c74d050
3
  size 146774203
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2db06e17c94930e25ba4fa153fb1d09bc548975dd61b046eadd4ef82210ad5b
3
  size 146774203