amazingvince commited on
Commit
b5c9cfb
1 Parent(s): b990fda

Upload folder using huggingface_hub

Browse files
latest CHANGED
@@ -1 +1 @@
1
- global_step21200
 
1
+ global_step22800
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39675a6badfcbf633893a8e465dd0480528653bb628a5f6efac95d1fb463d4ba
3
  size 4944210912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d91441d23aa02635dee209f09f6e93410b26201584e9f1bce97ab412c82b1fc3
3
  size 4944210912
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50241da4a5b7cb43212ee046c7aa46bb6d9c6cb934d67ed9c9fa21520578cb93
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc539e11e01c3090c81c8f8c3950abb87a747cdcf9f383afbc11d521b923257
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bc0aacd7f563371618d8cfe6b96d634ebbe7c7e8c431aeb70e2cd29c2545cac
3
  size 4541564920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ec6cd21adbfddc4a5169e1fd7db882972c50ca419d233852ce3302a3496cf6
3
  size 4541564920
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8611104059302886,
5
  "eval_steps": 800,
6
- "global_step": 21200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -25661,6 +25661,1942 @@
25661
  "learning_rate": 9.555310253116467e-08,
25662
  "loss": 0.6784,
25663
  "step": 21200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25664
  }
25665
  ],
25666
  "logging_steps": 5,
@@ -25668,7 +27604,7 @@
25668
  "num_input_tokens_seen": 0,
25669
  "num_train_epochs": 1,
25670
  "save_steps": 400,
25671
- "total_flos": 2963556718419968.0,
25672
  "trial_name": null,
25673
  "trial_params": null
25674
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.926099870528801,
5
  "eval_steps": 800,
6
+ "global_step": 22800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
25661
  "learning_rate": 9.555310253116467e-08,
25662
  "loss": 0.6784,
25663
  "step": 21200
25664
+ },
25665
+ {
25666
+ "epoch": 0.86,
25667
+ "learning_rate": 9.527835191329392e-08,
25668
+ "loss": 0.6565,
25669
+ "step": 21205
25670
+ },
25671
+ {
25672
+ "epoch": 0.86,
25673
+ "learning_rate": 9.500397710892816e-08,
25674
+ "loss": 0.6305,
25675
+ "step": 21210
25676
+ },
25677
+ {
25678
+ "epoch": 0.86,
25679
+ "learning_rate": 9.472997823203999e-08,
25680
+ "loss": 0.6524,
25681
+ "step": 21215
25682
+ },
25683
+ {
25684
+ "epoch": 0.86,
25685
+ "learning_rate": 9.445635539644615e-08,
25686
+ "loss": 0.6717,
25687
+ "step": 21220
25688
+ },
25689
+ {
25690
+ "epoch": 0.86,
25691
+ "learning_rate": 9.418310871580737e-08,
25692
+ "loss": 0.6429,
25693
+ "step": 21225
25694
+ },
25695
+ {
25696
+ "epoch": 0.86,
25697
+ "learning_rate": 9.391023830362799e-08,
25698
+ "loss": 0.6434,
25699
+ "step": 21230
25700
+ },
25701
+ {
25702
+ "epoch": 0.86,
25703
+ "learning_rate": 9.363774427325577e-08,
25704
+ "loss": 0.6648,
25705
+ "step": 21235
25706
+ },
25707
+ {
25708
+ "epoch": 0.86,
25709
+ "learning_rate": 9.336562673788228e-08,
25710
+ "loss": 0.636,
25711
+ "step": 21240
25712
+ },
25713
+ {
25714
+ "epoch": 0.86,
25715
+ "learning_rate": 9.309388581054322e-08,
25716
+ "loss": 0.6771,
25717
+ "step": 21245
25718
+ },
25719
+ {
25720
+ "epoch": 0.86,
25721
+ "learning_rate": 9.282252160411719e-08,
25722
+ "loss": 0.6502,
25723
+ "step": 21250
25724
+ },
25725
+ {
25726
+ "epoch": 0.86,
25727
+ "learning_rate": 9.255153423132622e-08,
25728
+ "loss": 0.6437,
25729
+ "step": 21255
25730
+ },
25731
+ {
25732
+ "epoch": 0.86,
25733
+ "learning_rate": 9.22809238047365e-08,
25734
+ "loss": 0.6704,
25735
+ "step": 21260
25736
+ },
25737
+ {
25738
+ "epoch": 0.86,
25739
+ "learning_rate": 9.201069043675724e-08,
25740
+ "loss": 0.6404,
25741
+ "step": 21265
25742
+ },
25743
+ {
25744
+ "epoch": 0.86,
25745
+ "learning_rate": 9.174083423964062e-08,
25746
+ "loss": 0.6834,
25747
+ "step": 21270
25748
+ },
25749
+ {
25750
+ "epoch": 0.86,
25751
+ "learning_rate": 9.147135532548311e-08,
25752
+ "loss": 0.6516,
25753
+ "step": 21275
25754
+ },
25755
+ {
25756
+ "epoch": 0.86,
25757
+ "learning_rate": 9.120225380622371e-08,
25758
+ "loss": 0.671,
25759
+ "step": 21280
25760
+ },
25761
+ {
25762
+ "epoch": 0.86,
25763
+ "learning_rate": 9.093352979364466e-08,
25764
+ "loss": 0.6583,
25765
+ "step": 21285
25766
+ },
25767
+ {
25768
+ "epoch": 0.86,
25769
+ "learning_rate": 9.066518339937157e-08,
25770
+ "loss": 0.6467,
25771
+ "step": 21290
25772
+ },
25773
+ {
25774
+ "epoch": 0.86,
25775
+ "learning_rate": 9.03972147348735e-08,
25776
+ "loss": 0.5999,
25777
+ "step": 21295
25778
+ },
25779
+ {
25780
+ "epoch": 0.87,
25781
+ "learning_rate": 9.012962391146217e-08,
25782
+ "loss": 0.6589,
25783
+ "step": 21300
25784
+ },
25785
+ {
25786
+ "epoch": 0.87,
25787
+ "learning_rate": 8.986241104029224e-08,
25788
+ "loss": 0.647,
25789
+ "step": 21305
25790
+ },
25791
+ {
25792
+ "epoch": 0.87,
25793
+ "learning_rate": 8.959557623236202e-08,
25794
+ "loss": 0.6199,
25795
+ "step": 21310
25796
+ },
25797
+ {
25798
+ "epoch": 0.87,
25799
+ "learning_rate": 8.93291195985122e-08,
25800
+ "loss": 0.6762,
25801
+ "step": 21315
25802
+ },
25803
+ {
25804
+ "epoch": 0.87,
25805
+ "learning_rate": 8.906304124942632e-08,
25806
+ "loss": 0.6446,
25807
+ "step": 21320
25808
+ },
25809
+ {
25810
+ "epoch": 0.87,
25811
+ "learning_rate": 8.879734129563132e-08,
25812
+ "loss": 0.6504,
25813
+ "step": 21325
25814
+ },
25815
+ {
25816
+ "epoch": 0.87,
25817
+ "learning_rate": 8.853201984749658e-08,
25818
+ "loss": 0.6898,
25819
+ "step": 21330
25820
+ },
25821
+ {
25822
+ "epoch": 0.87,
25823
+ "learning_rate": 8.826707701523428e-08,
25824
+ "loss": 0.6575,
25825
+ "step": 21335
25826
+ },
25827
+ {
25828
+ "epoch": 0.87,
25829
+ "learning_rate": 8.800251290889927e-08,
25830
+ "loss": 0.6208,
25831
+ "step": 21340
25832
+ },
25833
+ {
25834
+ "epoch": 0.87,
25835
+ "learning_rate": 8.773832763838939e-08,
25836
+ "loss": 0.6662,
25837
+ "step": 21345
25838
+ },
25839
+ {
25840
+ "epoch": 0.87,
25841
+ "learning_rate": 8.74745213134448e-08,
25842
+ "loss": 0.6218,
25843
+ "step": 21350
25844
+ },
25845
+ {
25846
+ "epoch": 0.87,
25847
+ "learning_rate": 8.721109404364812e-08,
25848
+ "loss": 0.6747,
25849
+ "step": 21355
25850
+ },
25851
+ {
25852
+ "epoch": 0.87,
25853
+ "learning_rate": 8.694804593842519e-08,
25854
+ "loss": 0.693,
25855
+ "step": 21360
25856
+ },
25857
+ {
25858
+ "epoch": 0.87,
25859
+ "learning_rate": 8.668537710704371e-08,
25860
+ "loss": 0.6482,
25861
+ "step": 21365
25862
+ },
25863
+ {
25864
+ "epoch": 0.87,
25865
+ "learning_rate": 8.642308765861406e-08,
25866
+ "loss": 0.6946,
25867
+ "step": 21370
25868
+ },
25869
+ {
25870
+ "epoch": 0.87,
25871
+ "learning_rate": 8.616117770208864e-08,
25872
+ "loss": 0.655,
25873
+ "step": 21375
25874
+ },
25875
+ {
25876
+ "epoch": 0.87,
25877
+ "learning_rate": 8.58996473462631e-08,
25878
+ "loss": 0.6549,
25879
+ "step": 21380
25880
+ },
25881
+ {
25882
+ "epoch": 0.87,
25883
+ "learning_rate": 8.563849669977463e-08,
25884
+ "loss": 0.6444,
25885
+ "step": 21385
25886
+ },
25887
+ {
25888
+ "epoch": 0.87,
25889
+ "learning_rate": 8.537772587110281e-08,
25890
+ "loss": 0.646,
25891
+ "step": 21390
25892
+ },
25893
+ {
25894
+ "epoch": 0.87,
25895
+ "learning_rate": 8.511733496856999e-08,
25896
+ "loss": 0.6792,
25897
+ "step": 21395
25898
+ },
25899
+ {
25900
+ "epoch": 0.87,
25901
+ "learning_rate": 8.485732410033985e-08,
25902
+ "loss": 0.6037,
25903
+ "step": 21400
25904
+ },
25905
+ {
25906
+ "epoch": 0.87,
25907
+ "learning_rate": 8.459769337441868e-08,
25908
+ "loss": 0.6055,
25909
+ "step": 21405
25910
+ },
25911
+ {
25912
+ "epoch": 0.87,
25913
+ "learning_rate": 8.433844289865521e-08,
25914
+ "loss": 0.6427,
25915
+ "step": 21410
25916
+ },
25917
+ {
25918
+ "epoch": 0.87,
25919
+ "learning_rate": 8.407957278073952e-08,
25920
+ "loss": 0.6628,
25921
+ "step": 21415
25922
+ },
25923
+ {
25924
+ "epoch": 0.87,
25925
+ "learning_rate": 8.382108312820401e-08,
25926
+ "loss": 0.6569,
25927
+ "step": 21420
25928
+ },
25929
+ {
25930
+ "epoch": 0.87,
25931
+ "learning_rate": 8.356297404842305e-08,
25932
+ "loss": 0.659,
25933
+ "step": 21425
25934
+ },
25935
+ {
25936
+ "epoch": 0.87,
25937
+ "learning_rate": 8.330524564861297e-08,
25938
+ "loss": 0.6279,
25939
+ "step": 21430
25940
+ },
25941
+ {
25942
+ "epoch": 0.87,
25943
+ "learning_rate": 8.304789803583201e-08,
25944
+ "loss": 0.6281,
25945
+ "step": 21435
25946
+ },
25947
+ {
25948
+ "epoch": 0.87,
25949
+ "learning_rate": 8.279093131697968e-08,
25950
+ "loss": 0.6327,
25951
+ "step": 21440
25952
+ },
25953
+ {
25954
+ "epoch": 0.87,
25955
+ "learning_rate": 8.253434559879835e-08,
25956
+ "loss": 0.6402,
25957
+ "step": 21445
25958
+ },
25959
+ {
25960
+ "epoch": 0.87,
25961
+ "learning_rate": 8.227814098787111e-08,
25962
+ "loss": 0.6601,
25963
+ "step": 21450
25964
+ },
25965
+ {
25966
+ "epoch": 0.87,
25967
+ "learning_rate": 8.202231759062305e-08,
25968
+ "loss": 0.6355,
25969
+ "step": 21455
25970
+ },
25971
+ {
25972
+ "epoch": 0.87,
25973
+ "learning_rate": 8.17668755133214e-08,
25974
+ "loss": 0.663,
25975
+ "step": 21460
25976
+ },
25977
+ {
25978
+ "epoch": 0.87,
25979
+ "learning_rate": 8.151181486207414e-08,
25980
+ "loss": 0.6715,
25981
+ "step": 21465
25982
+ },
25983
+ {
25984
+ "epoch": 0.87,
25985
+ "learning_rate": 8.125713574283155e-08,
25986
+ "loss": 0.6456,
25987
+ "step": 21470
25988
+ },
25989
+ {
25990
+ "epoch": 0.87,
25991
+ "learning_rate": 8.100283826138477e-08,
25992
+ "loss": 0.6243,
25993
+ "step": 21475
25994
+ },
25995
+ {
25996
+ "epoch": 0.87,
25997
+ "learning_rate": 8.074892252336718e-08,
25998
+ "loss": 0.6273,
25999
+ "step": 21480
26000
+ },
26001
+ {
26002
+ "epoch": 0.87,
26003
+ "learning_rate": 8.049538863425298e-08,
26004
+ "loss": 0.6379,
26005
+ "step": 21485
26006
+ },
26007
+ {
26008
+ "epoch": 0.87,
26009
+ "learning_rate": 8.024223669935782e-08,
26010
+ "loss": 0.6303,
26011
+ "step": 21490
26012
+ },
26013
+ {
26014
+ "epoch": 0.87,
26015
+ "learning_rate": 7.9989466823839e-08,
26016
+ "loss": 0.6826,
26017
+ "step": 21495
26018
+ },
26019
+ {
26020
+ "epoch": 0.87,
26021
+ "learning_rate": 7.973707911269489e-08,
26022
+ "loss": 0.6236,
26023
+ "step": 21500
26024
+ },
26025
+ {
26026
+ "epoch": 0.87,
26027
+ "learning_rate": 7.948507367076518e-08,
26028
+ "loss": 0.6341,
26029
+ "step": 21505
26030
+ },
26031
+ {
26032
+ "epoch": 0.87,
26033
+ "learning_rate": 7.923345060273046e-08,
26034
+ "loss": 0.6677,
26035
+ "step": 21510
26036
+ },
26037
+ {
26038
+ "epoch": 0.87,
26039
+ "learning_rate": 7.898221001311312e-08,
26040
+ "loss": 0.6299,
26041
+ "step": 21515
26042
+ },
26043
+ {
26044
+ "epoch": 0.87,
26045
+ "learning_rate": 7.873135200627623e-08,
26046
+ "loss": 0.6272,
26047
+ "step": 21520
26048
+ },
26049
+ {
26050
+ "epoch": 0.87,
26051
+ "learning_rate": 7.848087668642377e-08,
26052
+ "loss": 0.6455,
26053
+ "step": 21525
26054
+ },
26055
+ {
26056
+ "epoch": 0.87,
26057
+ "learning_rate": 7.823078415760143e-08,
26058
+ "loss": 0.6406,
26059
+ "step": 21530
26060
+ },
26061
+ {
26062
+ "epoch": 0.87,
26063
+ "learning_rate": 7.798107452369517e-08,
26064
+ "loss": 0.7099,
26065
+ "step": 21535
26066
+ },
26067
+ {
26068
+ "epoch": 0.87,
26069
+ "learning_rate": 7.773174788843218e-08,
26070
+ "loss": 0.6831,
26071
+ "step": 21540
26072
+ },
26073
+ {
26074
+ "epoch": 0.88,
26075
+ "learning_rate": 7.74828043553808e-08,
26076
+ "loss": 0.6205,
26077
+ "step": 21545
26078
+ },
26079
+ {
26080
+ "epoch": 0.88,
26081
+ "learning_rate": 7.723424402794998e-08,
26082
+ "loss": 0.649,
26083
+ "step": 21550
26084
+ },
26085
+ {
26086
+ "epoch": 0.88,
26087
+ "learning_rate": 7.698606700938936e-08,
26088
+ "loss": 0.6636,
26089
+ "step": 21555
26090
+ },
26091
+ {
26092
+ "epoch": 0.88,
26093
+ "learning_rate": 7.673827340278937e-08,
26094
+ "loss": 0.6314,
26095
+ "step": 21560
26096
+ },
26097
+ {
26098
+ "epoch": 0.88,
26099
+ "learning_rate": 7.649086331108178e-08,
26100
+ "loss": 0.6969,
26101
+ "step": 21565
26102
+ },
26103
+ {
26104
+ "epoch": 0.88,
26105
+ "learning_rate": 7.624383683703839e-08,
26106
+ "loss": 0.6516,
26107
+ "step": 21570
26108
+ },
26109
+ {
26110
+ "epoch": 0.88,
26111
+ "learning_rate": 7.599719408327155e-08,
26112
+ "loss": 0.7174,
26113
+ "step": 21575
26114
+ },
26115
+ {
26116
+ "epoch": 0.88,
26117
+ "learning_rate": 7.575093515223496e-08,
26118
+ "loss": 0.6436,
26119
+ "step": 21580
26120
+ },
26121
+ {
26122
+ "epoch": 0.88,
26123
+ "learning_rate": 7.550506014622215e-08,
26124
+ "loss": 0.6571,
26125
+ "step": 21585
26126
+ },
26127
+ {
26128
+ "epoch": 0.88,
26129
+ "learning_rate": 7.525956916736753e-08,
26130
+ "loss": 0.6919,
26131
+ "step": 21590
26132
+ },
26133
+ {
26134
+ "epoch": 0.88,
26135
+ "learning_rate": 7.501446231764607e-08,
26136
+ "loss": 0.6461,
26137
+ "step": 21595
26138
+ },
26139
+ {
26140
+ "epoch": 0.88,
26141
+ "learning_rate": 7.47697396988729e-08,
26142
+ "loss": 0.6169,
26143
+ "step": 21600
26144
+ },
26145
+ {
26146
+ "epoch": 0.88,
26147
+ "eval_loss": 0.6164625883102417,
26148
+ "eval_runtime": 140.1585,
26149
+ "eval_samples_per_second": 16.881,
26150
+ "eval_steps_per_second": 2.818,
26151
+ "step": 21600
26152
+ },
26153
+ {
26154
+ "epoch": 0.88,
26155
+ "learning_rate": 7.452540141270358e-08,
26156
+ "loss": 0.643,
26157
+ "step": 21605
26158
+ },
26159
+ {
26160
+ "epoch": 0.88,
26161
+ "learning_rate": 7.428144756063415e-08,
26162
+ "loss": 0.6571,
26163
+ "step": 21610
26164
+ },
26165
+ {
26166
+ "epoch": 0.88,
26167
+ "learning_rate": 7.403787824400098e-08,
26168
+ "loss": 0.6473,
26169
+ "step": 21615
26170
+ },
26171
+ {
26172
+ "epoch": 0.88,
26173
+ "learning_rate": 7.379469356398072e-08,
26174
+ "loss": 0.6089,
26175
+ "step": 21620
26176
+ },
26177
+ {
26178
+ "epoch": 0.88,
26179
+ "learning_rate": 7.355189362158997e-08,
26180
+ "loss": 0.6356,
26181
+ "step": 21625
26182
+ },
26183
+ {
26184
+ "epoch": 0.88,
26185
+ "learning_rate": 7.330947851768588e-08,
26186
+ "loss": 0.6338,
26187
+ "step": 21630
26188
+ },
26189
+ {
26190
+ "epoch": 0.88,
26191
+ "learning_rate": 7.306744835296563e-08,
26192
+ "loss": 0.6296,
26193
+ "step": 21635
26194
+ },
26195
+ {
26196
+ "epoch": 0.88,
26197
+ "learning_rate": 7.282580322796606e-08,
26198
+ "loss": 0.6403,
26199
+ "step": 21640
26200
+ },
26201
+ {
26202
+ "epoch": 0.88,
26203
+ "learning_rate": 7.258454324306495e-08,
26204
+ "loss": 0.649,
26205
+ "step": 21645
26206
+ },
26207
+ {
26208
+ "epoch": 0.88,
26209
+ "learning_rate": 7.23436684984794e-08,
26210
+ "loss": 0.6433,
26211
+ "step": 21650
26212
+ },
26213
+ {
26214
+ "epoch": 0.88,
26215
+ "learning_rate": 7.210317909426656e-08,
26216
+ "loss": 0.6741,
26217
+ "step": 21655
26218
+ },
26219
+ {
26220
+ "epoch": 0.88,
26221
+ "learning_rate": 7.186307513032364e-08,
26222
+ "loss": 0.6607,
26223
+ "step": 21660
26224
+ },
26225
+ {
26226
+ "epoch": 0.88,
26227
+ "learning_rate": 7.162335670638797e-08,
26228
+ "loss": 0.6845,
26229
+ "step": 21665
26230
+ },
26231
+ {
26232
+ "epoch": 0.88,
26233
+ "learning_rate": 7.138402392203646e-08,
26234
+ "loss": 0.5908,
26235
+ "step": 21670
26236
+ },
26237
+ {
26238
+ "epoch": 0.88,
26239
+ "learning_rate": 7.114507687668559e-08,
26240
+ "loss": 0.6756,
26241
+ "step": 21675
26242
+ },
26243
+ {
26244
+ "epoch": 0.88,
26245
+ "learning_rate": 7.090651566959216e-08,
26246
+ "loss": 0.6435,
26247
+ "step": 21680
26248
+ },
26249
+ {
26250
+ "epoch": 0.88,
26251
+ "learning_rate": 7.066834039985237e-08,
26252
+ "loss": 0.6275,
26253
+ "step": 21685
26254
+ },
26255
+ {
26256
+ "epoch": 0.88,
26257
+ "learning_rate": 7.043055116640206e-08,
26258
+ "loss": 0.6286,
26259
+ "step": 21690
26260
+ },
26261
+ {
26262
+ "epoch": 0.88,
26263
+ "learning_rate": 7.019314806801679e-08,
26264
+ "loss": 0.6561,
26265
+ "step": 21695
26266
+ },
26267
+ {
26268
+ "epoch": 0.88,
26269
+ "learning_rate": 6.99561312033119e-08,
26270
+ "loss": 0.6653,
26271
+ "step": 21700
26272
+ },
26273
+ {
26274
+ "epoch": 0.88,
26275
+ "learning_rate": 6.971950067074206e-08,
26276
+ "loss": 0.6333,
26277
+ "step": 21705
26278
+ },
26279
+ {
26280
+ "epoch": 0.88,
26281
+ "learning_rate": 6.948325656860143e-08,
26282
+ "loss": 0.6574,
26283
+ "step": 21710
26284
+ },
26285
+ {
26286
+ "epoch": 0.88,
26287
+ "learning_rate": 6.924739899502396e-08,
26288
+ "loss": 0.6581,
26289
+ "step": 21715
26290
+ },
26291
+ {
26292
+ "epoch": 0.88,
26293
+ "learning_rate": 6.901192804798272e-08,
26294
+ "loss": 0.6574,
26295
+ "step": 21720
26296
+ },
26297
+ {
26298
+ "epoch": 0.88,
26299
+ "learning_rate": 6.877684382529025e-08,
26300
+ "loss": 0.6292,
26301
+ "step": 21725
26302
+ },
26303
+ {
26304
+ "epoch": 0.88,
26305
+ "learning_rate": 6.854214642459855e-08,
26306
+ "loss": 0.6288,
26307
+ "step": 21730
26308
+ },
26309
+ {
26310
+ "epoch": 0.88,
26311
+ "learning_rate": 6.830783594339895e-08,
26312
+ "loss": 0.6242,
26313
+ "step": 21735
26314
+ },
26315
+ {
26316
+ "epoch": 0.88,
26317
+ "learning_rate": 6.807391247902195e-08,
26318
+ "loss": 0.6551,
26319
+ "step": 21740
26320
+ },
26321
+ {
26322
+ "epoch": 0.88,
26323
+ "learning_rate": 6.784037612863702e-08,
26324
+ "loss": 0.6485,
26325
+ "step": 21745
26326
+ },
26327
+ {
26328
+ "epoch": 0.88,
26329
+ "learning_rate": 6.760722698925358e-08,
26330
+ "loss": 0.6398,
26331
+ "step": 21750
26332
+ },
26333
+ {
26334
+ "epoch": 0.88,
26335
+ "learning_rate": 6.737446515771961e-08,
26336
+ "loss": 0.6063,
26337
+ "step": 21755
26338
+ },
26339
+ {
26340
+ "epoch": 0.88,
26341
+ "learning_rate": 6.714209073072218e-08,
26342
+ "loss": 0.6095,
26343
+ "step": 21760
26344
+ },
26345
+ {
26346
+ "epoch": 0.88,
26347
+ "learning_rate": 6.691010380478779e-08,
26348
+ "loss": 0.6306,
26349
+ "step": 21765
26350
+ },
26351
+ {
26352
+ "epoch": 0.88,
26353
+ "learning_rate": 6.667850447628175e-08,
26354
+ "loss": 0.5991,
26355
+ "step": 21770
26356
+ },
26357
+ {
26358
+ "epoch": 0.88,
26359
+ "learning_rate": 6.644729284140826e-08,
26360
+ "loss": 0.6475,
26361
+ "step": 21775
26362
+ },
26363
+ {
26364
+ "epoch": 0.88,
26365
+ "learning_rate": 6.621646899621091e-08,
26366
+ "loss": 0.6737,
26367
+ "step": 21780
26368
+ },
26369
+ {
26370
+ "epoch": 0.88,
26371
+ "learning_rate": 6.598603303657179e-08,
26372
+ "loss": 0.6395,
26373
+ "step": 21785
26374
+ },
26375
+ {
26376
+ "epoch": 0.89,
26377
+ "learning_rate": 6.5755985058212e-08,
26378
+ "loss": 0.6428,
26379
+ "step": 21790
26380
+ },
26381
+ {
26382
+ "epoch": 0.89,
26383
+ "learning_rate": 6.552632515669121e-08,
26384
+ "loss": 0.6312,
26385
+ "step": 21795
26386
+ },
26387
+ {
26388
+ "epoch": 0.89,
26389
+ "learning_rate": 6.529705342740843e-08,
26390
+ "loss": 0.6315,
26391
+ "step": 21800
26392
+ },
26393
+ {
26394
+ "epoch": 0.89,
26395
+ "learning_rate": 6.506816996560127e-08,
26396
+ "loss": 0.6268,
26397
+ "step": 21805
26398
+ },
26399
+ {
26400
+ "epoch": 0.89,
26401
+ "learning_rate": 6.483967486634546e-08,
26402
+ "loss": 0.664,
26403
+ "step": 21810
26404
+ },
26405
+ {
26406
+ "epoch": 0.89,
26407
+ "learning_rate": 6.461156822455638e-08,
26408
+ "loss": 0.6397,
26409
+ "step": 21815
26410
+ },
26411
+ {
26412
+ "epoch": 0.89,
26413
+ "learning_rate": 6.438385013498726e-08,
26414
+ "loss": 0.6273,
26415
+ "step": 21820
26416
+ },
26417
+ {
26418
+ "epoch": 0.89,
26419
+ "learning_rate": 6.415652069223032e-08,
26420
+ "loss": 0.6245,
26421
+ "step": 21825
26422
+ },
26423
+ {
26424
+ "epoch": 0.89,
26425
+ "learning_rate": 6.392957999071602e-08,
26426
+ "loss": 0.6921,
26427
+ "step": 21830
26428
+ },
26429
+ {
26430
+ "epoch": 0.89,
26431
+ "learning_rate": 6.370302812471384e-08,
26432
+ "loss": 0.6685,
26433
+ "step": 21835
26434
+ },
26435
+ {
26436
+ "epoch": 0.89,
26437
+ "learning_rate": 6.34768651883314e-08,
26438
+ "loss": 0.6563,
26439
+ "step": 21840
26440
+ },
26441
+ {
26442
+ "epoch": 0.89,
26443
+ "learning_rate": 6.325109127551465e-08,
26444
+ "loss": 0.6676,
26445
+ "step": 21845
26446
+ },
26447
+ {
26448
+ "epoch": 0.89,
26449
+ "learning_rate": 6.302570648004834e-08,
26450
+ "loss": 0.682,
26451
+ "step": 21850
26452
+ },
26453
+ {
26454
+ "epoch": 0.89,
26455
+ "learning_rate": 6.280071089555516e-08,
26456
+ "loss": 0.63,
26457
+ "step": 21855
26458
+ },
26459
+ {
26460
+ "epoch": 0.89,
26461
+ "learning_rate": 6.257610461549634e-08,
26462
+ "loss": 0.6781,
26463
+ "step": 21860
26464
+ },
26465
+ {
26466
+ "epoch": 0.89,
26467
+ "learning_rate": 6.235188773317146e-08,
26468
+ "loss": 0.6647,
26469
+ "step": 21865
26470
+ },
26471
+ {
26472
+ "epoch": 0.89,
26473
+ "learning_rate": 6.212806034171836e-08,
26474
+ "loss": 0.6611,
26475
+ "step": 21870
26476
+ },
26477
+ {
26478
+ "epoch": 0.89,
26479
+ "learning_rate": 6.190462253411277e-08,
26480
+ "loss": 0.658,
26481
+ "step": 21875
26482
+ },
26483
+ {
26484
+ "epoch": 0.89,
26485
+ "learning_rate": 6.16815744031688e-08,
26486
+ "loss": 0.6362,
26487
+ "step": 21880
26488
+ },
26489
+ {
26490
+ "epoch": 0.89,
26491
+ "learning_rate": 6.145891604153886e-08,
26492
+ "loss": 0.64,
26493
+ "step": 21885
26494
+ },
26495
+ {
26496
+ "epoch": 0.89,
26497
+ "learning_rate": 6.123664754171331e-08,
26498
+ "loss": 0.6428,
26499
+ "step": 21890
26500
+ },
26501
+ {
26502
+ "epoch": 0.89,
26503
+ "learning_rate": 6.101476899602043e-08,
26504
+ "loss": 0.6626,
26505
+ "step": 21895
26506
+ },
26507
+ {
26508
+ "epoch": 0.89,
26509
+ "learning_rate": 6.079328049662668e-08,
26510
+ "loss": 0.6502,
26511
+ "step": 21900
26512
+ },
26513
+ {
26514
+ "epoch": 0.89,
26515
+ "learning_rate": 6.057218213553661e-08,
26516
+ "loss": 0.6694,
26517
+ "step": 21905
26518
+ },
26519
+ {
26520
+ "epoch": 0.89,
26521
+ "learning_rate": 6.035147400459217e-08,
26522
+ "loss": 0.642,
26523
+ "step": 21910
26524
+ },
26525
+ {
26526
+ "epoch": 0.89,
26527
+ "learning_rate": 6.013115619547404e-08,
26528
+ "loss": 0.6864,
26529
+ "step": 21915
26530
+ },
26531
+ {
26532
+ "epoch": 0.89,
26533
+ "learning_rate": 5.991122879970012e-08,
26534
+ "loss": 0.636,
26535
+ "step": 21920
26536
+ },
26537
+ {
26538
+ "epoch": 0.89,
26539
+ "learning_rate": 5.969169190862644e-08,
26540
+ "loss": 0.6338,
26541
+ "step": 21925
26542
+ },
26543
+ {
26544
+ "epoch": 0.89,
26545
+ "learning_rate": 5.947254561344628e-08,
26546
+ "loss": 0.6647,
26547
+ "step": 21930
26548
+ },
26549
+ {
26550
+ "epoch": 0.89,
26551
+ "learning_rate": 5.9253790005191705e-08,
26552
+ "loss": 0.65,
26553
+ "step": 21935
26554
+ },
26555
+ {
26556
+ "epoch": 0.89,
26557
+ "learning_rate": 5.90354251747317e-08,
26558
+ "loss": 0.6274,
26559
+ "step": 21940
26560
+ },
26561
+ {
26562
+ "epoch": 0.89,
26563
+ "learning_rate": 5.8817451212772815e-08,
26564
+ "loss": 0.6559,
26565
+ "step": 21945
26566
+ },
26567
+ {
26568
+ "epoch": 0.89,
26569
+ "learning_rate": 5.859986820985985e-08,
26570
+ "loss": 0.6318,
26571
+ "step": 21950
26572
+ },
26573
+ {
26574
+ "epoch": 0.89,
26575
+ "learning_rate": 5.838267625637494e-08,
26576
+ "loss": 0.6755,
26577
+ "step": 21955
26578
+ },
26579
+ {
26580
+ "epoch": 0.89,
26581
+ "learning_rate": 5.8165875442537594e-08,
26582
+ "loss": 0.6342,
26583
+ "step": 21960
26584
+ },
26585
+ {
26586
+ "epoch": 0.89,
26587
+ "learning_rate": 5.7949465858404766e-08,
26588
+ "loss": 0.6707,
26589
+ "step": 21965
26590
+ },
26591
+ {
26592
+ "epoch": 0.89,
26593
+ "learning_rate": 5.773344759387155e-08,
26594
+ "loss": 0.6339,
26595
+ "step": 21970
26596
+ },
26597
+ {
26598
+ "epoch": 0.89,
26599
+ "learning_rate": 5.751782073866984e-08,
26600
+ "loss": 0.6389,
26601
+ "step": 21975
26602
+ },
26603
+ {
26604
+ "epoch": 0.89,
26605
+ "learning_rate": 5.730258538236909e-08,
26606
+ "loss": 0.6287,
26607
+ "step": 21980
26608
+ },
26609
+ {
26610
+ "epoch": 0.89,
26611
+ "learning_rate": 5.708774161437635e-08,
26612
+ "loss": 0.6844,
26613
+ "step": 21985
26614
+ },
26615
+ {
26616
+ "epoch": 0.89,
26617
+ "learning_rate": 5.6873289523935775e-08,
26618
+ "loss": 0.6497,
26619
+ "step": 21990
26620
+ },
26621
+ {
26622
+ "epoch": 0.89,
26623
+ "learning_rate": 5.665922920012878e-08,
26624
+ "loss": 0.7022,
26625
+ "step": 21995
26626
+ },
26627
+ {
26628
+ "epoch": 0.89,
26629
+ "learning_rate": 5.644556073187445e-08,
26630
+ "loss": 0.6279,
26631
+ "step": 22000
26632
+ },
26633
+ {
26634
+ "epoch": 0.89,
26635
+ "learning_rate": 5.6232284207928584e-08,
26636
+ "loss": 0.6533,
26637
+ "step": 22005
26638
+ },
26639
+ {
26640
+ "epoch": 0.89,
26641
+ "learning_rate": 5.601939971688452e-08,
26642
+ "loss": 0.635,
26643
+ "step": 22010
26644
+ },
26645
+ {
26646
+ "epoch": 0.89,
26647
+ "learning_rate": 5.580690734717241e-08,
26648
+ "loss": 0.6474,
26649
+ "step": 22015
26650
+ },
26651
+ {
26652
+ "epoch": 0.89,
26653
+ "learning_rate": 5.559480718706e-08,
26654
+ "loss": 0.6732,
26655
+ "step": 22020
26656
+ },
26657
+ {
26658
+ "epoch": 0.89,
26659
+ "learning_rate": 5.5383099324651684e-08,
26660
+ "loss": 0.6351,
26661
+ "step": 22025
26662
+ },
26663
+ {
26664
+ "epoch": 0.89,
26665
+ "learning_rate": 5.5171783847889006e-08,
26666
+ "loss": 0.6832,
26667
+ "step": 22030
26668
+ },
26669
+ {
26670
+ "epoch": 0.9,
26671
+ "learning_rate": 5.496086084455087e-08,
26672
+ "loss": 0.6403,
26673
+ "step": 22035
26674
+ },
26675
+ {
26676
+ "epoch": 0.9,
26677
+ "learning_rate": 5.475033040225274e-08,
26678
+ "loss": 0.6231,
26679
+ "step": 22040
26680
+ },
26681
+ {
26682
+ "epoch": 0.9,
26683
+ "learning_rate": 5.454019260844678e-08,
26684
+ "loss": 0.6585,
26685
+ "step": 22045
26686
+ },
26687
+ {
26688
+ "epoch": 0.9,
26689
+ "learning_rate": 5.433044755042293e-08,
26690
+ "loss": 0.6478,
26691
+ "step": 22050
26692
+ },
26693
+ {
26694
+ "epoch": 0.9,
26695
+ "learning_rate": 5.4121095315307173e-08,
26696
+ "loss": 0.627,
26697
+ "step": 22055
26698
+ },
26699
+ {
26700
+ "epoch": 0.9,
26701
+ "learning_rate": 5.3912135990062726e-08,
26702
+ "loss": 0.6431,
26703
+ "step": 22060
26704
+ },
26705
+ {
26706
+ "epoch": 0.9,
26707
+ "learning_rate": 5.370356966148914e-08,
26708
+ "loss": 0.6224,
26709
+ "step": 22065
26710
+ },
26711
+ {
26712
+ "epoch": 0.9,
26713
+ "learning_rate": 5.3495396416223584e-08,
26714
+ "loss": 0.6851,
26715
+ "step": 22070
26716
+ },
26717
+ {
26718
+ "epoch": 0.9,
26719
+ "learning_rate": 5.3287616340739084e-08,
26720
+ "loss": 0.6471,
26721
+ "step": 22075
26722
+ },
26723
+ {
26724
+ "epoch": 0.9,
26725
+ "learning_rate": 5.308022952134561e-08,
26726
+ "loss": 0.6231,
26727
+ "step": 22080
26728
+ },
26729
+ {
26730
+ "epoch": 0.9,
26731
+ "learning_rate": 5.287323604419014e-08,
26732
+ "loss": 0.6358,
26733
+ "step": 22085
26734
+ },
26735
+ {
26736
+ "epoch": 0.9,
26737
+ "learning_rate": 5.266663599525578e-08,
26738
+ "loss": 0.6723,
26739
+ "step": 22090
26740
+ },
26741
+ {
26742
+ "epoch": 0.9,
26743
+ "learning_rate": 5.246042946036244e-08,
26744
+ "loss": 0.674,
26745
+ "step": 22095
26746
+ },
26747
+ {
26748
+ "epoch": 0.9,
26749
+ "learning_rate": 5.225461652516639e-08,
26750
+ "loss": 0.6099,
26751
+ "step": 22100
26752
+ },
26753
+ {
26754
+ "epoch": 0.9,
26755
+ "learning_rate": 5.204919727516066e-08,
26756
+ "loss": 0.6175,
26757
+ "step": 22105
26758
+ },
26759
+ {
26760
+ "epoch": 0.9,
26761
+ "learning_rate": 5.184417179567468e-08,
26762
+ "loss": 0.6383,
26763
+ "step": 22110
26764
+ },
26765
+ {
26766
+ "epoch": 0.9,
26767
+ "learning_rate": 5.163954017187399e-08,
26768
+ "loss": 0.6796,
26769
+ "step": 22115
26770
+ },
26771
+ {
26772
+ "epoch": 0.9,
26773
+ "learning_rate": 5.143530248876116e-08,
26774
+ "loss": 0.6428,
26775
+ "step": 22120
26776
+ },
26777
+ {
26778
+ "epoch": 0.9,
26779
+ "learning_rate": 5.123145883117452e-08,
26780
+ "loss": 0.7068,
26781
+ "step": 22125
26782
+ },
26783
+ {
26784
+ "epoch": 0.9,
26785
+ "learning_rate": 5.102800928378881e-08,
26786
+ "loss": 0.6485,
26787
+ "step": 22130
26788
+ },
26789
+ {
26790
+ "epoch": 0.9,
26791
+ "learning_rate": 5.082495393111563e-08,
26792
+ "loss": 0.6488,
26793
+ "step": 22135
26794
+ },
26795
+ {
26796
+ "epoch": 0.9,
26797
+ "learning_rate": 5.062229285750208e-08,
26798
+ "loss": 0.639,
26799
+ "step": 22140
26800
+ },
26801
+ {
26802
+ "epoch": 0.9,
26803
+ "learning_rate": 5.0420026147131925e-08,
26804
+ "loss": 0.6629,
26805
+ "step": 22145
26806
+ },
26807
+ {
26808
+ "epoch": 0.9,
26809
+ "learning_rate": 5.021815388402473e-08,
26810
+ "loss": 0.6624,
26811
+ "step": 22150
26812
+ },
26813
+ {
26814
+ "epoch": 0.9,
26815
+ "learning_rate": 5.0016676152036974e-08,
26816
+ "loss": 0.6282,
26817
+ "step": 22155
26818
+ },
26819
+ {
26820
+ "epoch": 0.9,
26821
+ "learning_rate": 4.981559303486038e-08,
26822
+ "loss": 0.6319,
26823
+ "step": 22160
26824
+ },
26825
+ {
26826
+ "epoch": 0.9,
26827
+ "learning_rate": 4.9614904616023134e-08,
26828
+ "loss": 0.6655,
26829
+ "step": 22165
26830
+ },
26831
+ {
26832
+ "epoch": 0.9,
26833
+ "learning_rate": 4.941461097888966e-08,
26834
+ "loss": 0.6915,
26835
+ "step": 22170
26836
+ },
26837
+ {
26838
+ "epoch": 0.9,
26839
+ "learning_rate": 4.921471220666018e-08,
26840
+ "loss": 0.5924,
26841
+ "step": 22175
26842
+ },
26843
+ {
26844
+ "epoch": 0.9,
26845
+ "learning_rate": 4.901520838237061e-08,
26846
+ "loss": 0.6699,
26847
+ "step": 22180
26848
+ },
26849
+ {
26850
+ "epoch": 0.9,
26851
+ "learning_rate": 4.8816099588893436e-08,
26852
+ "loss": 0.635,
26853
+ "step": 22185
26854
+ },
26855
+ {
26856
+ "epoch": 0.9,
26857
+ "learning_rate": 4.86173859089366e-08,
26858
+ "loss": 0.6572,
26859
+ "step": 22190
26860
+ },
26861
+ {
26862
+ "epoch": 0.9,
26863
+ "learning_rate": 4.8419067425044094e-08,
26864
+ "loss": 0.6296,
26865
+ "step": 22195
26866
+ },
26867
+ {
26868
+ "epoch": 0.9,
26869
+ "learning_rate": 4.822114421959545e-08,
26870
+ "loss": 0.6598,
26871
+ "step": 22200
26872
+ },
26873
+ {
26874
+ "epoch": 0.9,
26875
+ "learning_rate": 4.8023616374806564e-08,
26876
+ "loss": 0.6451,
26877
+ "step": 22205
26878
+ },
26879
+ {
26880
+ "epoch": 0.9,
26881
+ "learning_rate": 4.782648397272859e-08,
26882
+ "loss": 0.6616,
26883
+ "step": 22210
26884
+ },
26885
+ {
26886
+ "epoch": 0.9,
26887
+ "learning_rate": 4.762974709524858e-08,
26888
+ "loss": 0.6583,
26889
+ "step": 22215
26890
+ },
26891
+ {
26892
+ "epoch": 0.9,
26893
+ "learning_rate": 4.743340582408961e-08,
26894
+ "loss": 0.66,
26895
+ "step": 22220
26896
+ },
26897
+ {
26898
+ "epoch": 0.9,
26899
+ "learning_rate": 4.723746024080988e-08,
26900
+ "loss": 0.6213,
26901
+ "step": 22225
26902
+ },
26903
+ {
26904
+ "epoch": 0.9,
26905
+ "learning_rate": 4.70419104268035e-08,
26906
+ "loss": 0.6316,
26907
+ "step": 22230
26908
+ },
26909
+ {
26910
+ "epoch": 0.9,
26911
+ "learning_rate": 4.6846756463300054e-08,
26912
+ "loss": 0.6505,
26913
+ "step": 22235
26914
+ },
26915
+ {
26916
+ "epoch": 0.9,
26917
+ "learning_rate": 4.665199843136513e-08,
26918
+ "loss": 0.6645,
26919
+ "step": 22240
26920
+ },
26921
+ {
26922
+ "epoch": 0.9,
26923
+ "learning_rate": 4.645763641189937e-08,
26924
+ "loss": 0.6523,
26925
+ "step": 22245
26926
+ },
26927
+ {
26928
+ "epoch": 0.9,
26929
+ "learning_rate": 4.626367048563884e-08,
26930
+ "loss": 0.6516,
26931
+ "step": 22250
26932
+ },
26933
+ {
26934
+ "epoch": 0.9,
26935
+ "learning_rate": 4.607010073315565e-08,
26936
+ "loss": 0.638,
26937
+ "step": 22255
26938
+ },
26939
+ {
26940
+ "epoch": 0.9,
26941
+ "learning_rate": 4.587692723485681e-08,
26942
+ "loss": 0.5924,
26943
+ "step": 22260
26944
+ },
26945
+ {
26946
+ "epoch": 0.9,
26947
+ "learning_rate": 4.5684150070984804e-08,
26948
+ "loss": 0.6316,
26949
+ "step": 22265
26950
+ },
26951
+ {
26952
+ "epoch": 0.9,
26953
+ "learning_rate": 4.549176932161791e-08,
26954
+ "loss": 0.6585,
26955
+ "step": 22270
26956
+ },
26957
+ {
26958
+ "epoch": 0.9,
26959
+ "learning_rate": 4.5299785066669205e-08,
26960
+ "loss": 0.6547,
26961
+ "step": 22275
26962
+ },
26963
+ {
26964
+ "epoch": 0.9,
26965
+ "learning_rate": 4.5108197385887335e-08,
26966
+ "loss": 0.6432,
26967
+ "step": 22280
26968
+ },
26969
+ {
26970
+ "epoch": 0.91,
26971
+ "learning_rate": 4.491700635885598e-08,
26972
+ "loss": 0.6075,
26973
+ "step": 22285
26974
+ },
26975
+ {
26976
+ "epoch": 0.91,
26977
+ "learning_rate": 4.4726212064994493e-08,
26978
+ "loss": 0.5892,
26979
+ "step": 22290
26980
+ },
26981
+ {
26982
+ "epoch": 0.91,
26983
+ "learning_rate": 4.453581458355704e-08,
26984
+ "loss": 0.6202,
26985
+ "step": 22295
26986
+ },
26987
+ {
26988
+ "epoch": 0.91,
26989
+ "learning_rate": 4.4345813993632905e-08,
26990
+ "loss": 0.6405,
26991
+ "step": 22300
26992
+ },
26993
+ {
26994
+ "epoch": 0.91,
26995
+ "learning_rate": 4.4156210374147075e-08,
26996
+ "loss": 0.6393,
26997
+ "step": 22305
26998
+ },
26999
+ {
27000
+ "epoch": 0.91,
27001
+ "learning_rate": 4.396700380385898e-08,
27002
+ "loss": 0.6548,
27003
+ "step": 22310
27004
+ },
27005
+ {
27006
+ "epoch": 0.91,
27007
+ "learning_rate": 4.377819436136332e-08,
27008
+ "loss": 0.6813,
27009
+ "step": 22315
27010
+ },
27011
+ {
27012
+ "epoch": 0.91,
27013
+ "learning_rate": 4.358978212509012e-08,
27014
+ "loss": 0.6689,
27015
+ "step": 22320
27016
+ },
27017
+ {
27018
+ "epoch": 0.91,
27019
+ "learning_rate": 4.340176717330413e-08,
27020
+ "loss": 0.6631,
27021
+ "step": 22325
27022
+ },
27023
+ {
27024
+ "epoch": 0.91,
27025
+ "learning_rate": 4.3214149584105076e-08,
27026
+ "loss": 0.6586,
27027
+ "step": 22330
27028
+ },
27029
+ {
27030
+ "epoch": 0.91,
27031
+ "learning_rate": 4.3026929435427516e-08,
27032
+ "loss": 0.6643,
27033
+ "step": 22335
27034
+ },
27035
+ {
27036
+ "epoch": 0.91,
27037
+ "learning_rate": 4.2840106805041354e-08,
27038
+ "loss": 0.654,
27039
+ "step": 22340
27040
+ },
27041
+ {
27042
+ "epoch": 0.91,
27043
+ "learning_rate": 4.2653681770550955e-08,
27044
+ "loss": 0.6688,
27045
+ "step": 22345
27046
+ },
27047
+ {
27048
+ "epoch": 0.91,
27049
+ "learning_rate": 4.2467654409395484e-08,
27050
+ "loss": 0.6761,
27051
+ "step": 22350
27052
+ },
27053
+ {
27054
+ "epoch": 0.91,
27055
+ "learning_rate": 4.228202479884946e-08,
27056
+ "loss": 0.6667,
27057
+ "step": 22355
27058
+ },
27059
+ {
27060
+ "epoch": 0.91,
27061
+ "learning_rate": 4.209679301602165e-08,
27062
+ "loss": 0.6749,
27063
+ "step": 22360
27064
+ },
27065
+ {
27066
+ "epoch": 0.91,
27067
+ "learning_rate": 4.191195913785561e-08,
27068
+ "loss": 0.6396,
27069
+ "step": 22365
27070
+ },
27071
+ {
27072
+ "epoch": 0.91,
27073
+ "learning_rate": 4.1727523241129606e-08,
27074
+ "loss": 0.6696,
27075
+ "step": 22370
27076
+ },
27077
+ {
27078
+ "epoch": 0.91,
27079
+ "learning_rate": 4.154348540245711e-08,
27080
+ "loss": 0.6364,
27081
+ "step": 22375
27082
+ },
27083
+ {
27084
+ "epoch": 0.91,
27085
+ "learning_rate": 4.135984569828566e-08,
27086
+ "loss": 0.6495,
27087
+ "step": 22380
27088
+ },
27089
+ {
27090
+ "epoch": 0.91,
27091
+ "learning_rate": 4.1176604204897434e-08,
27092
+ "loss": 0.6496,
27093
+ "step": 22385
27094
+ },
27095
+ {
27096
+ "epoch": 0.91,
27097
+ "learning_rate": 4.099376099840968e-08,
27098
+ "loss": 0.6268,
27099
+ "step": 22390
27100
+ },
27101
+ {
27102
+ "epoch": 0.91,
27103
+ "learning_rate": 4.0811316154773515e-08,
27104
+ "loss": 0.6527,
27105
+ "step": 22395
27106
+ },
27107
+ {
27108
+ "epoch": 0.91,
27109
+ "learning_rate": 4.06292697497751e-08,
27110
+ "loss": 0.651,
27111
+ "step": 22400
27112
+ },
27113
+ {
27114
+ "epoch": 0.91,
27115
+ "eval_loss": 0.6161190867424011,
27116
+ "eval_runtime": 139.4449,
27117
+ "eval_samples_per_second": 16.967,
27118
+ "eval_steps_per_second": 2.833,
27119
+ "step": 22400
27120
+ },
27121
+ {
27122
+ "epoch": 0.91,
27123
+ "learning_rate": 4.044762185903494e-08,
27124
+ "loss": 0.6551,
27125
+ "step": 22405
27126
+ },
27127
+ {
27128
+ "epoch": 0.91,
27129
+ "learning_rate": 4.026637255800813e-08,
27130
+ "loss": 0.6677,
27131
+ "step": 22410
27132
+ },
27133
+ {
27134
+ "epoch": 0.91,
27135
+ "learning_rate": 4.008552192198378e-08,
27136
+ "loss": 0.6738,
27137
+ "step": 22415
27138
+ },
27139
+ {
27140
+ "epoch": 0.91,
27141
+ "learning_rate": 3.9905070026085784e-08,
27142
+ "loss": 0.6642,
27143
+ "step": 22420
27144
+ },
27145
+ {
27146
+ "epoch": 0.91,
27147
+ "learning_rate": 3.9725016945272416e-08,
27148
+ "loss": 0.6399,
27149
+ "step": 22425
27150
+ },
27151
+ {
27152
+ "epoch": 0.91,
27153
+ "learning_rate": 3.9545362754335955e-08,
27154
+ "loss": 0.6706,
27155
+ "step": 22430
27156
+ },
27157
+ {
27158
+ "epoch": 0.91,
27159
+ "learning_rate": 3.936610752790326e-08,
27160
+ "loss": 0.686,
27161
+ "step": 22435
27162
+ },
27163
+ {
27164
+ "epoch": 0.91,
27165
+ "learning_rate": 3.9187251340435653e-08,
27166
+ "loss": 0.5905,
27167
+ "step": 22440
27168
+ },
27169
+ {
27170
+ "epoch": 0.91,
27171
+ "learning_rate": 3.900879426622794e-08,
27172
+ "loss": 0.5991,
27173
+ "step": 22445
27174
+ },
27175
+ {
27176
+ "epoch": 0.91,
27177
+ "learning_rate": 3.8830736379409814e-08,
27178
+ "loss": 0.6265,
27179
+ "step": 22450
27180
+ },
27181
+ {
27182
+ "epoch": 0.91,
27183
+ "learning_rate": 3.865307775394533e-08,
27184
+ "loss": 0.6525,
27185
+ "step": 22455
27186
+ },
27187
+ {
27188
+ "epoch": 0.91,
27189
+ "learning_rate": 3.84758184636319e-08,
27190
+ "loss": 0.6228,
27191
+ "step": 22460
27192
+ },
27193
+ {
27194
+ "epoch": 0.91,
27195
+ "learning_rate": 3.829895858210186e-08,
27196
+ "loss": 0.6736,
27197
+ "step": 22465
27198
+ },
27199
+ {
27200
+ "epoch": 0.91,
27201
+ "learning_rate": 3.812249818282076e-08,
27202
+ "loss": 0.6303,
27203
+ "step": 22470
27204
+ },
27205
+ {
27206
+ "epoch": 0.91,
27207
+ "learning_rate": 3.79464373390892e-08,
27208
+ "loss": 0.6367,
27209
+ "step": 22475
27210
+ },
27211
+ {
27212
+ "epoch": 0.91,
27213
+ "learning_rate": 3.777077612404123e-08,
27214
+ "loss": 0.6334,
27215
+ "step": 22480
27216
+ },
27217
+ {
27218
+ "epoch": 0.91,
27219
+ "learning_rate": 3.75955146106447e-08,
27220
+ "loss": 0.6403,
27221
+ "step": 22485
27222
+ },
27223
+ {
27224
+ "epoch": 0.91,
27225
+ "learning_rate": 3.742065287170215e-08,
27226
+ "loss": 0.6089,
27227
+ "step": 22490
27228
+ },
27229
+ {
27230
+ "epoch": 0.91,
27231
+ "learning_rate": 3.724619097984916e-08,
27232
+ "loss": 0.6471,
27233
+ "step": 22495
27234
+ },
27235
+ {
27236
+ "epoch": 0.91,
27237
+ "learning_rate": 3.707212900755608e-08,
27238
+ "loss": 0.6479,
27239
+ "step": 22500
27240
+ },
27241
+ {
27242
+ "epoch": 0.91,
27243
+ "learning_rate": 3.689846702712651e-08,
27244
+ "loss": 0.6429,
27245
+ "step": 22505
27246
+ },
27247
+ {
27248
+ "epoch": 0.91,
27249
+ "learning_rate": 3.672520511069821e-08,
27250
+ "loss": 0.6175,
27251
+ "step": 22510
27252
+ },
27253
+ {
27254
+ "epoch": 0.91,
27255
+ "learning_rate": 3.655234333024271e-08,
27256
+ "loss": 0.6745,
27257
+ "step": 22515
27258
+ },
27259
+ {
27260
+ "epoch": 0.91,
27261
+ "learning_rate": 3.637988175756512e-08,
27262
+ "loss": 0.6604,
27263
+ "step": 22520
27264
+ },
27265
+ {
27266
+ "epoch": 0.91,
27267
+ "learning_rate": 3.6207820464304814e-08,
27268
+ "loss": 0.6285,
27269
+ "step": 22525
27270
+ },
27271
+ {
27272
+ "epoch": 0.92,
27273
+ "learning_rate": 3.603615952193417e-08,
27274
+ "loss": 0.6314,
27275
+ "step": 22530
27276
+ },
27277
+ {
27278
+ "epoch": 0.92,
27279
+ "learning_rate": 3.5864899001759706e-08,
27280
+ "loss": 0.6703,
27281
+ "step": 22535
27282
+ },
27283
+ {
27284
+ "epoch": 0.92,
27285
+ "learning_rate": 3.569403897492185e-08,
27286
+ "loss": 0.6586,
27287
+ "step": 22540
27288
+ },
27289
+ {
27290
+ "epoch": 0.92,
27291
+ "learning_rate": 3.552357951239427e-08,
27292
+ "loss": 0.6588,
27293
+ "step": 22545
27294
+ },
27295
+ {
27296
+ "epoch": 0.92,
27297
+ "learning_rate": 3.5353520684984096e-08,
27298
+ "loss": 0.6623,
27299
+ "step": 22550
27300
+ },
27301
+ {
27302
+ "epoch": 0.92,
27303
+ "learning_rate": 3.51838625633325e-08,
27304
+ "loss": 0.6619,
27305
+ "step": 22555
27306
+ },
27307
+ {
27308
+ "epoch": 0.92,
27309
+ "learning_rate": 3.501460521791399e-08,
27310
+ "loss": 0.7056,
27311
+ "step": 22560
27312
+ },
27313
+ {
27314
+ "epoch": 0.92,
27315
+ "learning_rate": 3.484574871903656e-08,
27316
+ "loss": 0.6647,
27317
+ "step": 22565
27318
+ },
27319
+ {
27320
+ "epoch": 0.92,
27321
+ "learning_rate": 3.467729313684153e-08,
27322
+ "loss": 0.6277,
27323
+ "step": 22570
27324
+ },
27325
+ {
27326
+ "epoch": 0.92,
27327
+ "learning_rate": 3.4509238541304384e-08,
27328
+ "loss": 0.6705,
27329
+ "step": 22575
27330
+ },
27331
+ {
27332
+ "epoch": 0.92,
27333
+ "learning_rate": 3.4341585002232945e-08,
27334
+ "loss": 0.654,
27335
+ "step": 22580
27336
+ },
27337
+ {
27338
+ "epoch": 0.92,
27339
+ "learning_rate": 3.4174332589269385e-08,
27340
+ "loss": 0.6352,
27341
+ "step": 22585
27342
+ },
27343
+ {
27344
+ "epoch": 0.92,
27345
+ "learning_rate": 3.4007481371888915e-08,
27346
+ "loss": 0.6517,
27347
+ "step": 22590
27348
+ },
27349
+ {
27350
+ "epoch": 0.92,
27351
+ "learning_rate": 3.384103141940009e-08,
27352
+ "loss": 0.6898,
27353
+ "step": 22595
27354
+ },
27355
+ {
27356
+ "epoch": 0.92,
27357
+ "learning_rate": 3.3674982800944604e-08,
27358
+ "loss": 0.6067,
27359
+ "step": 22600
27360
+ },
27361
+ {
27362
+ "epoch": 0.92,
27363
+ "learning_rate": 3.350933558549751e-08,
27364
+ "loss": 0.6752,
27365
+ "step": 22605
27366
+ },
27367
+ {
27368
+ "epoch": 0.92,
27369
+ "learning_rate": 3.334408984186765e-08,
27370
+ "loss": 0.664,
27371
+ "step": 22610
27372
+ },
27373
+ {
27374
+ "epoch": 0.92,
27375
+ "learning_rate": 3.317924563869634e-08,
27376
+ "loss": 0.6458,
27377
+ "step": 22615
27378
+ },
27379
+ {
27380
+ "epoch": 0.92,
27381
+ "learning_rate": 3.301480304445836e-08,
27382
+ "loss": 0.7108,
27383
+ "step": 22620
27384
+ },
27385
+ {
27386
+ "epoch": 0.92,
27387
+ "learning_rate": 3.2850762127462184e-08,
27388
+ "loss": 0.6561,
27389
+ "step": 22625
27390
+ },
27391
+ {
27392
+ "epoch": 0.92,
27393
+ "learning_rate": 3.268712295584841e-08,
27394
+ "loss": 0.7372,
27395
+ "step": 22630
27396
+ },
27397
+ {
27398
+ "epoch": 0.92,
27399
+ "learning_rate": 3.252388559759156e-08,
27400
+ "loss": 0.6515,
27401
+ "step": 22635
27402
+ },
27403
+ {
27404
+ "epoch": 0.92,
27405
+ "learning_rate": 3.2361050120499275e-08,
27406
+ "loss": 0.6417,
27407
+ "step": 22640
27408
+ },
27409
+ {
27410
+ "epoch": 0.92,
27411
+ "learning_rate": 3.219861659221168e-08,
27412
+ "loss": 0.6537,
27413
+ "step": 22645
27414
+ },
27415
+ {
27416
+ "epoch": 0.92,
27417
+ "learning_rate": 3.203658508020235e-08,
27418
+ "loss": 0.6385,
27419
+ "step": 22650
27420
+ },
27421
+ {
27422
+ "epoch": 0.92,
27423
+ "learning_rate": 3.1874955651777667e-08,
27424
+ "loss": 0.6112,
27425
+ "step": 22655
27426
+ },
27427
+ {
27428
+ "epoch": 0.92,
27429
+ "learning_rate": 3.171372837407738e-08,
27430
+ "loss": 0.6437,
27431
+ "step": 22660
27432
+ },
27433
+ {
27434
+ "epoch": 0.92,
27435
+ "learning_rate": 3.155290331407357e-08,
27436
+ "loss": 0.6222,
27437
+ "step": 22665
27438
+ },
27439
+ {
27440
+ "epoch": 0.92,
27441
+ "learning_rate": 3.1392480538571574e-08,
27442
+ "loss": 0.6439,
27443
+ "step": 22670
27444
+ },
27445
+ {
27446
+ "epoch": 0.92,
27447
+ "learning_rate": 3.123246011420999e-08,
27448
+ "loss": 0.6528,
27449
+ "step": 22675
27450
+ },
27451
+ {
27452
+ "epoch": 0.92,
27453
+ "learning_rate": 3.107284210745953e-08,
27454
+ "loss": 0.6654,
27455
+ "step": 22680
27456
+ },
27457
+ {
27458
+ "epoch": 0.92,
27459
+ "learning_rate": 3.0913626584624266e-08,
27460
+ "loss": 0.6474,
27461
+ "step": 22685
27462
+ },
27463
+ {
27464
+ "epoch": 0.92,
27465
+ "learning_rate": 3.0754813611840846e-08,
27466
+ "loss": 0.6528,
27467
+ "step": 22690
27468
+ },
27469
+ {
27470
+ "epoch": 0.92,
27471
+ "learning_rate": 3.0596403255078954e-08,
27472
+ "loss": 0.6708,
27473
+ "step": 22695
27474
+ },
27475
+ {
27476
+ "epoch": 0.92,
27477
+ "learning_rate": 3.043839558014083e-08,
27478
+ "loss": 0.6365,
27479
+ "step": 22700
27480
+ },
27481
+ {
27482
+ "epoch": 0.92,
27483
+ "learning_rate": 3.028079065266142e-08,
27484
+ "loss": 0.6687,
27485
+ "step": 22705
27486
+ },
27487
+ {
27488
+ "epoch": 0.92,
27489
+ "learning_rate": 3.012358853810859e-08,
27490
+ "loss": 0.6645,
27491
+ "step": 22710
27492
+ },
27493
+ {
27494
+ "epoch": 0.92,
27495
+ "learning_rate": 2.9966789301782535e-08,
27496
+ "loss": 0.6366,
27497
+ "step": 22715
27498
+ },
27499
+ {
27500
+ "epoch": 0.92,
27501
+ "learning_rate": 2.981039300881627e-08,
27502
+ "loss": 0.6575,
27503
+ "step": 22720
27504
+ },
27505
+ {
27506
+ "epoch": 0.92,
27507
+ "learning_rate": 2.9654399724175828e-08,
27508
+ "loss": 0.6966,
27509
+ "step": 22725
27510
+ },
27511
+ {
27512
+ "epoch": 0.92,
27513
+ "learning_rate": 2.949880951265904e-08,
27514
+ "loss": 0.6842,
27515
+ "step": 22730
27516
+ },
27517
+ {
27518
+ "epoch": 0.92,
27519
+ "learning_rate": 2.9343622438896875e-08,
27520
+ "loss": 0.6586,
27521
+ "step": 22735
27522
+ },
27523
+ {
27524
+ "epoch": 0.92,
27525
+ "learning_rate": 2.918883856735277e-08,
27526
+ "loss": 0.6629,
27527
+ "step": 22740
27528
+ },
27529
+ {
27530
+ "epoch": 0.92,
27531
+ "learning_rate": 2.9034457962322513e-08,
27532
+ "loss": 0.6283,
27533
+ "step": 22745
27534
+ },
27535
+ {
27536
+ "epoch": 0.92,
27537
+ "learning_rate": 2.8880480687934473e-08,
27538
+ "loss": 0.6272,
27539
+ "step": 22750
27540
+ },
27541
+ {
27542
+ "epoch": 0.92,
27543
+ "learning_rate": 2.8726906808149486e-08,
27544
+ "loss": 0.616,
27545
+ "step": 22755
27546
+ },
27547
+ {
27548
+ "epoch": 0.92,
27549
+ "learning_rate": 2.857373638676097e-08,
27550
+ "loss": 0.6605,
27551
+ "step": 22760
27552
+ },
27553
+ {
27554
+ "epoch": 0.92,
27555
+ "learning_rate": 2.8420969487394143e-08,
27556
+ "loss": 0.6212,
27557
+ "step": 22765
27558
+ },
27559
+ {
27560
+ "epoch": 0.92,
27561
+ "learning_rate": 2.826860617350746e-08,
27562
+ "loss": 0.6844,
27563
+ "step": 22770
27564
+ },
27565
+ {
27566
+ "epoch": 0.93,
27567
+ "learning_rate": 2.8116646508391183e-08,
27568
+ "loss": 0.645,
27569
+ "step": 22775
27570
+ },
27571
+ {
27572
+ "epoch": 0.93,
27573
+ "learning_rate": 2.7965090555168047e-08,
27574
+ "loss": 0.6497,
27575
+ "step": 22780
27576
+ },
27577
+ {
27578
+ "epoch": 0.93,
27579
+ "learning_rate": 2.7813938376793134e-08,
27580
+ "loss": 0.6361,
27581
+ "step": 22785
27582
+ },
27583
+ {
27584
+ "epoch": 0.93,
27585
+ "learning_rate": 2.7663190036053552e-08,
27586
+ "loss": 0.5859,
27587
+ "step": 22790
27588
+ },
27589
+ {
27590
+ "epoch": 0.93,
27591
+ "learning_rate": 2.75128455955691e-08,
27592
+ "loss": 0.6026,
27593
+ "step": 22795
27594
+ },
27595
+ {
27596
+ "epoch": 0.93,
27597
+ "learning_rate": 2.7362905117791268e-08,
27598
+ "loss": 0.6694,
27599
+ "step": 22800
27600
  }
27601
  ],
27602
  "logging_steps": 5,
 
27604
  "num_input_tokens_seen": 0,
27605
  "num_train_epochs": 1,
27606
  "save_steps": 400,
27607
+ "total_flos": 3187222391169024.0,
27608
  "trial_name": null,
27609
  "trial_params": null
27610
  }