amazingvince commited on
Commit
9f7ec5a
1 Parent(s): a54bfee

Upload folder using huggingface_hub

Browse files
latest CHANGED
@@ -1 +1 @@
1
- global_step11200
 
1
+ global_step12000
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26e2d02abd2945dca4a601867e9efb3e52a57105c0b5a43e4977147a4745ad31
3
  size 4944210912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2f922cbafd08d7c3c9709c63870639b4fe5a77197b1aed02013fa0a46d37ab
3
  size 4944210912
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a300bb006217ced006739cf5cdc9607975347903d71726b8d493b43871da745
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b990bef30e1c5b9ae4bb3b37e37b31d793cfbf6af883deeba8aafd44531998bc
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57d0e3071a24f3679a46a489024ba1323740842030b1b7d4dd348b7da0a1744f
3
  size 4541564920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308a00b64f20b918c2170564dfd59fab520e889ef27bfcfd89fb157907315e40
3
  size 4541564920
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.45492625218958643,
5
  "eval_steps": 800,
6
- "global_step": 11200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -13565,6 +13565,974 @@
13565
  "eval_samples_per_second": 16.393,
13566
  "eval_steps_per_second": 2.737,
13567
  "step": 11200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13568
  }
13569
  ],
13570
  "logging_steps": 5,
@@ -13572,7 +14540,7 @@
13572
  "num_input_tokens_seen": 0,
13573
  "num_train_epochs": 1,
13574
  "save_steps": 400,
13575
- "total_flos": 1565092777697280.0,
13576
  "trial_name": null,
13577
  "trial_params": null
13578
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.48742098448884263,
5
  "eval_steps": 800,
6
+ "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
13565
  "eval_samples_per_second": 16.393,
13566
  "eval_steps_per_second": 2.737,
13567
  "step": 11200
13568
+ },
13569
+ {
13570
+ "epoch": 0.46,
13571
+ "learning_rate": 1.15763110831054e-06,
13572
+ "loss": 0.6646,
13573
+ "step": 11205
13574
+ },
13575
+ {
13576
+ "epoch": 0.46,
13577
+ "learning_rate": 1.1569946246316182e-06,
13578
+ "loss": 0.7085,
13579
+ "step": 11210
13580
+ },
13581
+ {
13582
+ "epoch": 0.46,
13583
+ "learning_rate": 1.156358075738495e-06,
13584
+ "loss": 0.6621,
13585
+ "step": 11215
13586
+ },
13587
+ {
13588
+ "epoch": 0.46,
13589
+ "learning_rate": 1.1557214618955868e-06,
13590
+ "loss": 0.6703,
13591
+ "step": 11220
13592
+ },
13593
+ {
13594
+ "epoch": 0.46,
13595
+ "learning_rate": 1.1550847833673374e-06,
13596
+ "loss": 0.7204,
13597
+ "step": 11225
13598
+ },
13599
+ {
13600
+ "epoch": 0.46,
13601
+ "learning_rate": 1.154448040418218e-06,
13602
+ "loss": 0.6923,
13603
+ "step": 11230
13604
+ },
13605
+ {
13606
+ "epoch": 0.46,
13607
+ "learning_rate": 1.1538112333127253e-06,
13608
+ "loss": 0.6608,
13609
+ "step": 11235
13610
+ },
13611
+ {
13612
+ "epoch": 0.46,
13613
+ "learning_rate": 1.1531743623153842e-06,
13614
+ "loss": 0.6824,
13615
+ "step": 11240
13616
+ },
13617
+ {
13618
+ "epoch": 0.46,
13619
+ "learning_rate": 1.1525374276907449e-06,
13620
+ "loss": 0.7322,
13621
+ "step": 11245
13622
+ },
13623
+ {
13624
+ "epoch": 0.46,
13625
+ "learning_rate": 1.1519004297033847e-06,
13626
+ "loss": 0.6432,
13627
+ "step": 11250
13628
+ },
13629
+ {
13630
+ "epoch": 0.46,
13631
+ "learning_rate": 1.1512633686179071e-06,
13632
+ "loss": 0.6795,
13633
+ "step": 11255
13634
+ },
13635
+ {
13636
+ "epoch": 0.46,
13637
+ "learning_rate": 1.1506262446989417e-06,
13638
+ "loss": 0.7229,
13639
+ "step": 11260
13640
+ },
13641
+ {
13642
+ "epoch": 0.46,
13643
+ "learning_rate": 1.149989058211144e-06,
13644
+ "loss": 0.6954,
13645
+ "step": 11265
13646
+ },
13647
+ {
13648
+ "epoch": 0.46,
13649
+ "learning_rate": 1.149351809419196e-06,
13650
+ "loss": 0.6879,
13651
+ "step": 11270
13652
+ },
13653
+ {
13654
+ "epoch": 0.46,
13655
+ "learning_rate": 1.148714498587805e-06,
13656
+ "loss": 0.6642,
13657
+ "step": 11275
13658
+ },
13659
+ {
13660
+ "epoch": 0.46,
13661
+ "learning_rate": 1.1480771259817048e-06,
13662
+ "loss": 0.7015,
13663
+ "step": 11280
13664
+ },
13665
+ {
13666
+ "epoch": 0.46,
13667
+ "learning_rate": 1.147439691865654e-06,
13668
+ "loss": 0.6467,
13669
+ "step": 11285
13670
+ },
13671
+ {
13672
+ "epoch": 0.46,
13673
+ "learning_rate": 1.1468021965044377e-06,
13674
+ "loss": 0.7045,
13675
+ "step": 11290
13676
+ },
13677
+ {
13678
+ "epoch": 0.46,
13679
+ "learning_rate": 1.1461646401628654e-06,
13680
+ "loss": 0.6635,
13681
+ "step": 11295
13682
+ },
13683
+ {
13684
+ "epoch": 0.46,
13685
+ "learning_rate": 1.1455270231057728e-06,
13686
+ "loss": 0.6943,
13687
+ "step": 11300
13688
+ },
13689
+ {
13690
+ "epoch": 0.46,
13691
+ "learning_rate": 1.14488934559802e-06,
13692
+ "loss": 0.6626,
13693
+ "step": 11305
13694
+ },
13695
+ {
13696
+ "epoch": 0.46,
13697
+ "learning_rate": 1.1442516079044932e-06,
13698
+ "loss": 0.6716,
13699
+ "step": 11310
13700
+ },
13701
+ {
13702
+ "epoch": 0.46,
13703
+ "learning_rate": 1.1436138102901031e-06,
13704
+ "loss": 0.6748,
13705
+ "step": 11315
13706
+ },
13707
+ {
13708
+ "epoch": 0.46,
13709
+ "learning_rate": 1.142975953019785e-06,
13710
+ "loss": 0.7028,
13711
+ "step": 11320
13712
+ },
13713
+ {
13714
+ "epoch": 0.46,
13715
+ "learning_rate": 1.1423380363584999e-06,
13716
+ "loss": 0.6044,
13717
+ "step": 11325
13718
+ },
13719
+ {
13720
+ "epoch": 0.46,
13721
+ "learning_rate": 1.1417000605712316e-06,
13722
+ "loss": 0.6831,
13723
+ "step": 11330
13724
+ },
13725
+ {
13726
+ "epoch": 0.46,
13727
+ "learning_rate": 1.1410620259229908e-06,
13728
+ "loss": 0.6632,
13729
+ "step": 11335
13730
+ },
13731
+ {
13732
+ "epoch": 0.46,
13733
+ "learning_rate": 1.1404239326788115e-06,
13734
+ "loss": 0.6393,
13735
+ "step": 11340
13736
+ },
13737
+ {
13738
+ "epoch": 0.46,
13739
+ "learning_rate": 1.1397857811037512e-06,
13740
+ "loss": 0.6501,
13741
+ "step": 11345
13742
+ },
13743
+ {
13744
+ "epoch": 0.46,
13745
+ "learning_rate": 1.1391475714628932e-06,
13746
+ "loss": 0.6398,
13747
+ "step": 11350
13748
+ },
13749
+ {
13750
+ "epoch": 0.46,
13751
+ "learning_rate": 1.138509304021344e-06,
13752
+ "loss": 0.6784,
13753
+ "step": 11355
13754
+ },
13755
+ {
13756
+ "epoch": 0.46,
13757
+ "learning_rate": 1.1378709790442346e-06,
13758
+ "loss": 0.7065,
13759
+ "step": 11360
13760
+ },
13761
+ {
13762
+ "epoch": 0.46,
13763
+ "learning_rate": 1.1372325967967196e-06,
13764
+ "loss": 0.6189,
13765
+ "step": 11365
13766
+ },
13767
+ {
13768
+ "epoch": 0.46,
13769
+ "learning_rate": 1.1365941575439772e-06,
13770
+ "loss": 0.6652,
13771
+ "step": 11370
13772
+ },
13773
+ {
13774
+ "epoch": 0.46,
13775
+ "learning_rate": 1.1359556615512099e-06,
13776
+ "loss": 0.6752,
13777
+ "step": 11375
13778
+ },
13779
+ {
13780
+ "epoch": 0.46,
13781
+ "learning_rate": 1.1353171090836427e-06,
13782
+ "loss": 0.6668,
13783
+ "step": 11380
13784
+ },
13785
+ {
13786
+ "epoch": 0.46,
13787
+ "learning_rate": 1.134678500406525e-06,
13788
+ "loss": 0.6587,
13789
+ "step": 11385
13790
+ },
13791
+ {
13792
+ "epoch": 0.46,
13793
+ "learning_rate": 1.13403983578513e-06,
13794
+ "loss": 0.6873,
13795
+ "step": 11390
13796
+ },
13797
+ {
13798
+ "epoch": 0.46,
13799
+ "learning_rate": 1.1334011154847527e-06,
13800
+ "loss": 0.6975,
13801
+ "step": 11395
13802
+ },
13803
+ {
13804
+ "epoch": 0.46,
13805
+ "learning_rate": 1.1327623397707122e-06,
13806
+ "loss": 0.6784,
13807
+ "step": 11400
13808
+ },
13809
+ {
13810
+ "epoch": 0.46,
13811
+ "learning_rate": 1.1321235089083502e-06,
13812
+ "loss": 0.6643,
13813
+ "step": 11405
13814
+ },
13815
+ {
13816
+ "epoch": 0.46,
13817
+ "learning_rate": 1.1314846231630315e-06,
13818
+ "loss": 0.6754,
13819
+ "step": 11410
13820
+ },
13821
+ {
13822
+ "epoch": 0.46,
13823
+ "learning_rate": 1.1308456828001441e-06,
13824
+ "loss": 0.6689,
13825
+ "step": 11415
13826
+ },
13827
+ {
13828
+ "epoch": 0.46,
13829
+ "learning_rate": 1.1302066880850975e-06,
13830
+ "loss": 0.6594,
13831
+ "step": 11420
13832
+ },
13833
+ {
13834
+ "epoch": 0.46,
13835
+ "learning_rate": 1.1295676392833253e-06,
13836
+ "loss": 0.6416,
13837
+ "step": 11425
13838
+ },
13839
+ {
13840
+ "epoch": 0.46,
13841
+ "learning_rate": 1.1289285366602826e-06,
13842
+ "loss": 0.7223,
13843
+ "step": 11430
13844
+ },
13845
+ {
13846
+ "epoch": 0.46,
13847
+ "learning_rate": 1.1282893804814468e-06,
13848
+ "loss": 0.6944,
13849
+ "step": 11435
13850
+ },
13851
+ {
13852
+ "epoch": 0.46,
13853
+ "learning_rate": 1.127650171012318e-06,
13854
+ "loss": 0.6598,
13855
+ "step": 11440
13856
+ },
13857
+ {
13858
+ "epoch": 0.46,
13859
+ "learning_rate": 1.1270109085184182e-06,
13860
+ "loss": 0.7102,
13861
+ "step": 11445
13862
+ },
13863
+ {
13864
+ "epoch": 0.47,
13865
+ "learning_rate": 1.1263715932652919e-06,
13866
+ "loss": 0.6803,
13867
+ "step": 11450
13868
+ },
13869
+ {
13870
+ "epoch": 0.47,
13871
+ "learning_rate": 1.1257322255185044e-06,
13872
+ "loss": 0.65,
13873
+ "step": 11455
13874
+ },
13875
+ {
13876
+ "epoch": 0.47,
13877
+ "learning_rate": 1.1250928055436443e-06,
13878
+ "loss": 0.7018,
13879
+ "step": 11460
13880
+ },
13881
+ {
13882
+ "epoch": 0.47,
13883
+ "learning_rate": 1.12445333360632e-06,
13884
+ "loss": 0.6409,
13885
+ "step": 11465
13886
+ },
13887
+ {
13888
+ "epoch": 0.47,
13889
+ "learning_rate": 1.1238138099721634e-06,
13890
+ "loss": 0.6724,
13891
+ "step": 11470
13892
+ },
13893
+ {
13894
+ "epoch": 0.47,
13895
+ "learning_rate": 1.1231742349068271e-06,
13896
+ "loss": 0.6854,
13897
+ "step": 11475
13898
+ },
13899
+ {
13900
+ "epoch": 0.47,
13901
+ "learning_rate": 1.1225346086759846e-06,
13902
+ "loss": 0.7102,
13903
+ "step": 11480
13904
+ },
13905
+ {
13906
+ "epoch": 0.47,
13907
+ "learning_rate": 1.1218949315453314e-06,
13908
+ "loss": 0.6584,
13909
+ "step": 11485
13910
+ },
13911
+ {
13912
+ "epoch": 0.47,
13913
+ "learning_rate": 1.1212552037805836e-06,
13914
+ "loss": 0.6584,
13915
+ "step": 11490
13916
+ },
13917
+ {
13918
+ "epoch": 0.47,
13919
+ "learning_rate": 1.1206154256474786e-06,
13920
+ "loss": 0.6846,
13921
+ "step": 11495
13922
+ },
13923
+ {
13924
+ "epoch": 0.47,
13925
+ "learning_rate": 1.119975597411775e-06,
13926
+ "loss": 0.7037,
13927
+ "step": 11500
13928
+ },
13929
+ {
13930
+ "epoch": 0.47,
13931
+ "learning_rate": 1.1193357193392512e-06,
13932
+ "loss": 0.6689,
13933
+ "step": 11505
13934
+ },
13935
+ {
13936
+ "epoch": 0.47,
13937
+ "learning_rate": 1.1186957916957078e-06,
13938
+ "loss": 0.6556,
13939
+ "step": 11510
13940
+ },
13941
+ {
13942
+ "epoch": 0.47,
13943
+ "learning_rate": 1.1180558147469642e-06,
13944
+ "loss": 0.6557,
13945
+ "step": 11515
13946
+ },
13947
+ {
13948
+ "epoch": 0.47,
13949
+ "learning_rate": 1.1174157887588623e-06,
13950
+ "loss": 0.6662,
13951
+ "step": 11520
13952
+ },
13953
+ {
13954
+ "epoch": 0.47,
13955
+ "learning_rate": 1.1167757139972626e-06,
13956
+ "loss": 0.6702,
13957
+ "step": 11525
13958
+ },
13959
+ {
13960
+ "epoch": 0.47,
13961
+ "learning_rate": 1.116135590728047e-06,
13962
+ "loss": 0.6682,
13963
+ "step": 11530
13964
+ },
13965
+ {
13966
+ "epoch": 0.47,
13967
+ "learning_rate": 1.115495419217117e-06,
13968
+ "loss": 0.6855,
13969
+ "step": 11535
13970
+ },
13971
+ {
13972
+ "epoch": 0.47,
13973
+ "learning_rate": 1.114855199730394e-06,
13974
+ "loss": 0.6796,
13975
+ "step": 11540
13976
+ },
13977
+ {
13978
+ "epoch": 0.47,
13979
+ "learning_rate": 1.1142149325338199e-06,
13980
+ "loss": 0.6481,
13981
+ "step": 11545
13982
+ },
13983
+ {
13984
+ "epoch": 0.47,
13985
+ "learning_rate": 1.1135746178933563e-06,
13986
+ "loss": 0.7167,
13987
+ "step": 11550
13988
+ },
13989
+ {
13990
+ "epoch": 0.47,
13991
+ "learning_rate": 1.112934256074984e-06,
13992
+ "loss": 0.6602,
13993
+ "step": 11555
13994
+ },
13995
+ {
13996
+ "epoch": 0.47,
13997
+ "learning_rate": 1.1122938473447038e-06,
13998
+ "loss": 0.6848,
13999
+ "step": 11560
14000
+ },
14001
+ {
14002
+ "epoch": 0.47,
14003
+ "learning_rate": 1.1116533919685361e-06,
14004
+ "loss": 0.6628,
14005
+ "step": 11565
14006
+ },
14007
+ {
14008
+ "epoch": 0.47,
14009
+ "learning_rate": 1.1110128902125201e-06,
14010
+ "loss": 0.6826,
14011
+ "step": 11570
14012
+ },
14013
+ {
14014
+ "epoch": 0.47,
14015
+ "learning_rate": 1.1103723423427153e-06,
14016
+ "loss": 0.6527,
14017
+ "step": 11575
14018
+ },
14019
+ {
14020
+ "epoch": 0.47,
14021
+ "learning_rate": 1.1097317486251992e-06,
14022
+ "loss": 0.6737,
14023
+ "step": 11580
14024
+ },
14025
+ {
14026
+ "epoch": 0.47,
14027
+ "learning_rate": 1.109091109326069e-06,
14028
+ "loss": 0.6854,
14029
+ "step": 11585
14030
+ },
14031
+ {
14032
+ "epoch": 0.47,
14033
+ "learning_rate": 1.1084504247114406e-06,
14034
+ "loss": 0.7145,
14035
+ "step": 11590
14036
+ },
14037
+ {
14038
+ "epoch": 0.47,
14039
+ "learning_rate": 1.107809695047449e-06,
14040
+ "loss": 0.6756,
14041
+ "step": 11595
14042
+ },
14043
+ {
14044
+ "epoch": 0.47,
14045
+ "learning_rate": 1.1071689206002474e-06,
14046
+ "loss": 0.6725,
14047
+ "step": 11600
14048
+ },
14049
+ {
14050
+ "epoch": 0.47,
14051
+ "learning_rate": 1.1065281016360083e-06,
14052
+ "loss": 0.7145,
14053
+ "step": 11605
14054
+ },
14055
+ {
14056
+ "epoch": 0.47,
14057
+ "learning_rate": 1.1058872384209224e-06,
14058
+ "loss": 0.6899,
14059
+ "step": 11610
14060
+ },
14061
+ {
14062
+ "epoch": 0.47,
14063
+ "learning_rate": 1.1052463312211983e-06,
14064
+ "loss": 0.6344,
14065
+ "step": 11615
14066
+ },
14067
+ {
14068
+ "epoch": 0.47,
14069
+ "learning_rate": 1.1046053803030637e-06,
14070
+ "loss": 0.654,
14071
+ "step": 11620
14072
+ },
14073
+ {
14074
+ "epoch": 0.47,
14075
+ "learning_rate": 1.1039643859327635e-06,
14076
+ "loss": 0.6741,
14077
+ "step": 11625
14078
+ },
14079
+ {
14080
+ "epoch": 0.47,
14081
+ "learning_rate": 1.1033233483765615e-06,
14082
+ "loss": 0.6814,
14083
+ "step": 11630
14084
+ },
14085
+ {
14086
+ "epoch": 0.47,
14087
+ "learning_rate": 1.1026822679007395e-06,
14088
+ "loss": 0.6565,
14089
+ "step": 11635
14090
+ },
14091
+ {
14092
+ "epoch": 0.47,
14093
+ "learning_rate": 1.1020411447715961e-06,
14094
+ "loss": 0.6916,
14095
+ "step": 11640
14096
+ },
14097
+ {
14098
+ "epoch": 0.47,
14099
+ "learning_rate": 1.1013999792554486e-06,
14100
+ "loss": 0.6894,
14101
+ "step": 11645
14102
+ },
14103
+ {
14104
+ "epoch": 0.47,
14105
+ "learning_rate": 1.1007587716186317e-06,
14106
+ "loss": 0.6698,
14107
+ "step": 11650
14108
+ },
14109
+ {
14110
+ "epoch": 0.47,
14111
+ "learning_rate": 1.1001175221274968e-06,
14112
+ "loss": 0.7096,
14113
+ "step": 11655
14114
+ },
14115
+ {
14116
+ "epoch": 0.47,
14117
+ "learning_rate": 1.0994762310484142e-06,
14118
+ "loss": 0.6887,
14119
+ "step": 11660
14120
+ },
14121
+ {
14122
+ "epoch": 0.47,
14123
+ "learning_rate": 1.0988348986477705e-06,
14124
+ "loss": 0.671,
14125
+ "step": 11665
14126
+ },
14127
+ {
14128
+ "epoch": 0.47,
14129
+ "learning_rate": 1.0981935251919693e-06,
14130
+ "loss": 0.6727,
14131
+ "step": 11670
14132
+ },
14133
+ {
14134
+ "epoch": 0.47,
14135
+ "learning_rate": 1.0975521109474318e-06,
14136
+ "loss": 0.6777,
14137
+ "step": 11675
14138
+ },
14139
+ {
14140
+ "epoch": 0.47,
14141
+ "learning_rate": 1.0969106561805952e-06,
14142
+ "loss": 0.6661,
14143
+ "step": 11680
14144
+ },
14145
+ {
14146
+ "epoch": 0.47,
14147
+ "learning_rate": 1.0962691611579154e-06,
14148
+ "loss": 0.6576,
14149
+ "step": 11685
14150
+ },
14151
+ {
14152
+ "epoch": 0.47,
14153
+ "learning_rate": 1.0956276261458629e-06,
14154
+ "loss": 0.6415,
14155
+ "step": 11690
14156
+ },
14157
+ {
14158
+ "epoch": 0.48,
14159
+ "learning_rate": 1.0949860514109264e-06,
14160
+ "loss": 0.6485,
14161
+ "step": 11695
14162
+ },
14163
+ {
14164
+ "epoch": 0.48,
14165
+ "learning_rate": 1.09434443721961e-06,
14166
+ "loss": 0.6966,
14167
+ "step": 11700
14168
+ },
14169
+ {
14170
+ "epoch": 0.48,
14171
+ "learning_rate": 1.0937027838384345e-06,
14172
+ "loss": 0.6746,
14173
+ "step": 11705
14174
+ },
14175
+ {
14176
+ "epoch": 0.48,
14177
+ "learning_rate": 1.093061091533938e-06,
14178
+ "loss": 0.6565,
14179
+ "step": 11710
14180
+ },
14181
+ {
14182
+ "epoch": 0.48,
14183
+ "learning_rate": 1.0924193605726733e-06,
14184
+ "loss": 0.7127,
14185
+ "step": 11715
14186
+ },
14187
+ {
14188
+ "epoch": 0.48,
14189
+ "learning_rate": 1.0917775912212099e-06,
14190
+ "loss": 0.6738,
14191
+ "step": 11720
14192
+ },
14193
+ {
14194
+ "epoch": 0.48,
14195
+ "learning_rate": 1.0911357837461332e-06,
14196
+ "loss": 0.6712,
14197
+ "step": 11725
14198
+ },
14199
+ {
14200
+ "epoch": 0.48,
14201
+ "learning_rate": 1.0904939384140445e-06,
14202
+ "loss": 0.6617,
14203
+ "step": 11730
14204
+ },
14205
+ {
14206
+ "epoch": 0.48,
14207
+ "learning_rate": 1.0898520554915607e-06,
14208
+ "loss": 0.6919,
14209
+ "step": 11735
14210
+ },
14211
+ {
14212
+ "epoch": 0.48,
14213
+ "learning_rate": 1.0892101352453142e-06,
14214
+ "loss": 0.6731,
14215
+ "step": 11740
14216
+ },
14217
+ {
14218
+ "epoch": 0.48,
14219
+ "learning_rate": 1.0885681779419537e-06,
14220
+ "loss": 0.6931,
14221
+ "step": 11745
14222
+ },
14223
+ {
14224
+ "epoch": 0.48,
14225
+ "learning_rate": 1.0879261838481426e-06,
14226
+ "loss": 0.631,
14227
+ "step": 11750
14228
+ },
14229
+ {
14230
+ "epoch": 0.48,
14231
+ "learning_rate": 1.0872841532305587e-06,
14232
+ "loss": 0.6515,
14233
+ "step": 11755
14234
+ },
14235
+ {
14236
+ "epoch": 0.48,
14237
+ "learning_rate": 1.0866420863558969e-06,
14238
+ "loss": 0.6674,
14239
+ "step": 11760
14240
+ },
14241
+ {
14242
+ "epoch": 0.48,
14243
+ "learning_rate": 1.0859999834908657e-06,
14244
+ "loss": 0.6514,
14245
+ "step": 11765
14246
+ },
14247
+ {
14248
+ "epoch": 0.48,
14249
+ "learning_rate": 1.0853578449021896e-06,
14250
+ "loss": 0.6393,
14251
+ "step": 11770
14252
+ },
14253
+ {
14254
+ "epoch": 0.48,
14255
+ "learning_rate": 1.084715670856607e-06,
14256
+ "loss": 0.6941,
14257
+ "step": 11775
14258
+ },
14259
+ {
14260
+ "epoch": 0.48,
14261
+ "learning_rate": 1.0840734616208712e-06,
14262
+ "loss": 0.664,
14263
+ "step": 11780
14264
+ },
14265
+ {
14266
+ "epoch": 0.48,
14267
+ "learning_rate": 1.0834312174617508e-06,
14268
+ "loss": 0.6636,
14269
+ "step": 11785
14270
+ },
14271
+ {
14272
+ "epoch": 0.48,
14273
+ "learning_rate": 1.0827889386460281e-06,
14274
+ "loss": 0.6756,
14275
+ "step": 11790
14276
+ },
14277
+ {
14278
+ "epoch": 0.48,
14279
+ "learning_rate": 1.0821466254405004e-06,
14280
+ "loss": 0.7116,
14281
+ "step": 11795
14282
+ },
14283
+ {
14284
+ "epoch": 0.48,
14285
+ "learning_rate": 1.0815042781119788e-06,
14286
+ "loss": 0.6647,
14287
+ "step": 11800
14288
+ },
14289
+ {
14290
+ "epoch": 0.48,
14291
+ "learning_rate": 1.0808618969272888e-06,
14292
+ "loss": 0.6404,
14293
+ "step": 11805
14294
+ },
14295
+ {
14296
+ "epoch": 0.48,
14297
+ "learning_rate": 1.0802194821532702e-06,
14298
+ "loss": 0.6711,
14299
+ "step": 11810
14300
+ },
14301
+ {
14302
+ "epoch": 0.48,
14303
+ "learning_rate": 1.079577034056776e-06,
14304
+ "loss": 0.6717,
14305
+ "step": 11815
14306
+ },
14307
+ {
14308
+ "epoch": 0.48,
14309
+ "learning_rate": 1.078934552904674e-06,
14310
+ "loss": 0.6705,
14311
+ "step": 11820
14312
+ },
14313
+ {
14314
+ "epoch": 0.48,
14315
+ "learning_rate": 1.0782920389638452e-06,
14316
+ "loss": 0.6713,
14317
+ "step": 11825
14318
+ },
14319
+ {
14320
+ "epoch": 0.48,
14321
+ "learning_rate": 1.0776494925011846e-06,
14322
+ "loss": 0.6474,
14323
+ "step": 11830
14324
+ },
14325
+ {
14326
+ "epoch": 0.48,
14327
+ "learning_rate": 1.0770069137836e-06,
14328
+ "loss": 0.6745,
14329
+ "step": 11835
14330
+ },
14331
+ {
14332
+ "epoch": 0.48,
14333
+ "learning_rate": 1.0763643030780126e-06,
14334
+ "loss": 0.6773,
14335
+ "step": 11840
14336
+ },
14337
+ {
14338
+ "epoch": 0.48,
14339
+ "learning_rate": 1.075721660651358e-06,
14340
+ "loss": 0.6767,
14341
+ "step": 11845
14342
+ },
14343
+ {
14344
+ "epoch": 0.48,
14345
+ "learning_rate": 1.0750789867705843e-06,
14346
+ "loss": 0.6758,
14347
+ "step": 11850
14348
+ },
14349
+ {
14350
+ "epoch": 0.48,
14351
+ "learning_rate": 1.0744362817026524e-06,
14352
+ "loss": 0.686,
14353
+ "step": 11855
14354
+ },
14355
+ {
14356
+ "epoch": 0.48,
14357
+ "learning_rate": 1.0737935457145364e-06,
14358
+ "loss": 0.6736,
14359
+ "step": 11860
14360
+ },
14361
+ {
14362
+ "epoch": 0.48,
14363
+ "learning_rate": 1.073150779073223e-06,
14364
+ "loss": 0.668,
14365
+ "step": 11865
14366
+ },
14367
+ {
14368
+ "epoch": 0.48,
14369
+ "learning_rate": 1.0725079820457123e-06,
14370
+ "loss": 0.7003,
14371
+ "step": 11870
14372
+ },
14373
+ {
14374
+ "epoch": 0.48,
14375
+ "learning_rate": 1.0718651548990163e-06,
14376
+ "loss": 0.682,
14377
+ "step": 11875
14378
+ },
14379
+ {
14380
+ "epoch": 0.48,
14381
+ "learning_rate": 1.0712222979001602e-06,
14382
+ "loss": 0.6445,
14383
+ "step": 11880
14384
+ },
14385
+ {
14386
+ "epoch": 0.48,
14387
+ "learning_rate": 1.0705794113161808e-06,
14388
+ "loss": 0.6872,
14389
+ "step": 11885
14390
+ },
14391
+ {
14392
+ "epoch": 0.48,
14393
+ "learning_rate": 1.0699364954141276e-06,
14394
+ "loss": 0.6936,
14395
+ "step": 11890
14396
+ },
14397
+ {
14398
+ "epoch": 0.48,
14399
+ "learning_rate": 1.0692935504610625e-06,
14400
+ "loss": 0.6195,
14401
+ "step": 11895
14402
+ },
14403
+ {
14404
+ "epoch": 0.48,
14405
+ "learning_rate": 1.068650576724059e-06,
14406
+ "loss": 0.6658,
14407
+ "step": 11900
14408
+ },
14409
+ {
14410
+ "epoch": 0.48,
14411
+ "learning_rate": 1.0680075744702034e-06,
14412
+ "loss": 0.6799,
14413
+ "step": 11905
14414
+ },
14415
+ {
14416
+ "epoch": 0.48,
14417
+ "learning_rate": 1.0673645439665925e-06,
14418
+ "loss": 0.6823,
14419
+ "step": 11910
14420
+ },
14421
+ {
14422
+ "epoch": 0.48,
14423
+ "learning_rate": 1.0667214854803357e-06,
14424
+ "loss": 0.6677,
14425
+ "step": 11915
14426
+ },
14427
+ {
14428
+ "epoch": 0.48,
14429
+ "learning_rate": 1.0660783992785541e-06,
14430
+ "loss": 0.6648,
14431
+ "step": 11920
14432
+ },
14433
+ {
14434
+ "epoch": 0.48,
14435
+ "learning_rate": 1.06543528562838e-06,
14436
+ "loss": 0.6313,
14437
+ "step": 11925
14438
+ },
14439
+ {
14440
+ "epoch": 0.48,
14441
+ "learning_rate": 1.0647921447969577e-06,
14442
+ "loss": 0.667,
14443
+ "step": 11930
14444
+ },
14445
+ {
14446
+ "epoch": 0.48,
14447
+ "learning_rate": 1.0641489770514418e-06,
14448
+ "loss": 0.6567,
14449
+ "step": 11935
14450
+ },
14451
+ {
14452
+ "epoch": 0.48,
14453
+ "learning_rate": 1.0635057826589987e-06,
14454
+ "loss": 0.6727,
14455
+ "step": 11940
14456
+ },
14457
+ {
14458
+ "epoch": 0.49,
14459
+ "learning_rate": 1.0628625618868056e-06,
14460
+ "loss": 0.6835,
14461
+ "step": 11945
14462
+ },
14463
+ {
14464
+ "epoch": 0.49,
14465
+ "learning_rate": 1.062219315002051e-06,
14466
+ "loss": 0.6329,
14467
+ "step": 11950
14468
+ },
14469
+ {
14470
+ "epoch": 0.49,
14471
+ "learning_rate": 1.061576042271934e-06,
14472
+ "loss": 0.6823,
14473
+ "step": 11955
14474
+ },
14475
+ {
14476
+ "epoch": 0.49,
14477
+ "learning_rate": 1.0609327439636647e-06,
14478
+ "loss": 0.6514,
14479
+ "step": 11960
14480
+ },
14481
+ {
14482
+ "epoch": 0.49,
14483
+ "learning_rate": 1.0602894203444633e-06,
14484
+ "loss": 0.716,
14485
+ "step": 11965
14486
+ },
14487
+ {
14488
+ "epoch": 0.49,
14489
+ "learning_rate": 1.0596460716815612e-06,
14490
+ "loss": 0.6778,
14491
+ "step": 11970
14492
+ },
14493
+ {
14494
+ "epoch": 0.49,
14495
+ "learning_rate": 1.059002698242199e-06,
14496
+ "loss": 0.6671,
14497
+ "step": 11975
14498
+ },
14499
+ {
14500
+ "epoch": 0.49,
14501
+ "learning_rate": 1.0583593002936298e-06,
14502
+ "loss": 0.6936,
14503
+ "step": 11980
14504
+ },
14505
+ {
14506
+ "epoch": 0.49,
14507
+ "learning_rate": 1.0577158781031147e-06,
14508
+ "loss": 0.7211,
14509
+ "step": 11985
14510
+ },
14511
+ {
14512
+ "epoch": 0.49,
14513
+ "learning_rate": 1.0570724319379254e-06,
14514
+ "loss": 0.6667,
14515
+ "step": 11990
14516
+ },
14517
+ {
14518
+ "epoch": 0.49,
14519
+ "learning_rate": 1.0564289620653446e-06,
14520
+ "loss": 0.7181,
14521
+ "step": 11995
14522
+ },
14523
+ {
14524
+ "epoch": 0.49,
14525
+ "learning_rate": 1.0557854687526632e-06,
14526
+ "loss": 0.6431,
14527
+ "step": 12000
14528
+ },
14529
+ {
14530
+ "epoch": 0.49,
14531
+ "eval_loss": 0.6379530429840088,
14532
+ "eval_runtime": 144.8913,
14533
+ "eval_samples_per_second": 16.329,
14534
+ "eval_steps_per_second": 2.726,
14535
+ "step": 12000
14536
  }
14537
  ],
14538
  "logging_steps": 5,
 
14540
  "num_input_tokens_seen": 0,
14541
  "num_train_epochs": 1,
14542
  "save_steps": 400,
14543
+ "total_flos": 1676879253282816.0,
14544
  "trial_name": null,
14545
  "trial_params": null
14546
  }