ManyingZ commited on
Commit
7776ece
·
verified ·
1 Parent(s): d9b0206

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. adapter_model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +1753 -3
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e14f41c277b4c40791c7614c45f430870b4e6ff0de72ed591e7697769f954db
3
  size 3208536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc3080bc8c9d5423bd93b737dad7a79888723a50b70ebcf4c6c61f113338df6
3
  size 3208536
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6abc07af13aef57d723217ab8296653fafcbd7efdf2fa42fe37cf6d282d567ed
3
  size 1720890
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c3614839da41671462f92a3b76cf7997814e78fb2974ee91022d1eb91c3f32
3
  size 1720890
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b47f665b7594fb8f1cbf7365548078416d01f3370869f8b02e9c417375d31831
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456a5ee388d6bddca5fe294594c1eefd9c1493fdfe252053684e890bf8c56752
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac27f19033af4f59d0bd685d83ef77383f2454d3ac3822066f3bdf1bae4aaa9b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c26800e6aad79542c71a265990c35470e0f0c258d88e99aef73f16b83c77e16
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 122.02562538133007,
5
  "eval_steps": 500,
6
- "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8757,6 +8757,1756 @@
8757
  "learning_rate": 3.672131147540984e-05,
8758
  "loss": 0.0179,
8759
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8760
  }
8761
  ],
8762
  "logging_steps": 20,
@@ -8776,7 +10526,7 @@
8776
  "attributes": {}
8777
  }
8778
  },
8779
- "total_flos": 3.286196662311844e+17,
8780
  "train_batch_size": 1,
8781
  "trial_name": null,
8782
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 146.4307504575961,
5
  "eval_steps": 500,
6
+ "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8757
  "learning_rate": 3.672131147540984e-05,
8758
  "loss": 0.0179,
8759
  "step": 25000
8760
+ },
8761
+ {
8762
+ "epoch": 122.12324588163514,
8763
+ "grad_norm": 0.5783191323280334,
8764
+ "learning_rate": 3.659016393442623e-05,
8765
+ "loss": 0.0159,
8766
+ "step": 25020
8767
+ },
8768
+ {
8769
+ "epoch": 122.22086638194021,
8770
+ "grad_norm": 0.6851247549057007,
8771
+ "learning_rate": 3.6459016393442625e-05,
8772
+ "loss": 0.0167,
8773
+ "step": 25040
8774
+ },
8775
+ {
8776
+ "epoch": 122.31848688224527,
8777
+ "grad_norm": 0.5190562009811401,
8778
+ "learning_rate": 3.632786885245902e-05,
8779
+ "loss": 0.0168,
8780
+ "step": 25060
8781
+ },
8782
+ {
8783
+ "epoch": 122.41610738255034,
8784
+ "grad_norm": 0.8032932281494141,
8785
+ "learning_rate": 3.619672131147541e-05,
8786
+ "loss": 0.0173,
8787
+ "step": 25080
8788
+ },
8789
+ {
8790
+ "epoch": 122.5137278828554,
8791
+ "grad_norm": 1.162681221961975,
8792
+ "learning_rate": 3.6065573770491806e-05,
8793
+ "loss": 0.0175,
8794
+ "step": 25100
8795
+ },
8796
+ {
8797
+ "epoch": 122.61134838316046,
8798
+ "grad_norm": 0.9898841977119446,
8799
+ "learning_rate": 3.59344262295082e-05,
8800
+ "loss": 0.0154,
8801
+ "step": 25120
8802
+ },
8803
+ {
8804
+ "epoch": 122.70896888346553,
8805
+ "grad_norm": 0.7703188061714172,
8806
+ "learning_rate": 3.580327868852459e-05,
8807
+ "loss": 0.0177,
8808
+ "step": 25140
8809
+ },
8810
+ {
8811
+ "epoch": 122.80658938377059,
8812
+ "grad_norm": 0.6557360291481018,
8813
+ "learning_rate": 3.567213114754099e-05,
8814
+ "loss": 0.0187,
8815
+ "step": 25160
8816
+ },
8817
+ {
8818
+ "epoch": 122.90420988407566,
8819
+ "grad_norm": 0.6278268694877625,
8820
+ "learning_rate": 3.554098360655738e-05,
8821
+ "loss": 0.0173,
8822
+ "step": 25180
8823
+ },
8824
+ {
8825
+ "epoch": 123.00183038438072,
8826
+ "grad_norm": 0.5595793128013611,
8827
+ "learning_rate": 3.5409836065573773e-05,
8828
+ "loss": 0.02,
8829
+ "step": 25200
8830
+ },
8831
+ {
8832
+ "epoch": 123.09945088468578,
8833
+ "grad_norm": 0.8069674968719482,
8834
+ "learning_rate": 3.527868852459016e-05,
8835
+ "loss": 0.0157,
8836
+ "step": 25220
8837
+ },
8838
+ {
8839
+ "epoch": 123.19707138499085,
8840
+ "grad_norm": 0.5641182661056519,
8841
+ "learning_rate": 3.514754098360656e-05,
8842
+ "loss": 0.0162,
8843
+ "step": 25240
8844
+ },
8845
+ {
8846
+ "epoch": 123.29469188529592,
8847
+ "grad_norm": 1.641262412071228,
8848
+ "learning_rate": 3.5016393442622955e-05,
8849
+ "loss": 0.0153,
8850
+ "step": 25260
8851
+ },
8852
+ {
8853
+ "epoch": 123.39231238560097,
8854
+ "grad_norm": 0.828906238079071,
8855
+ "learning_rate": 3.4885245901639344e-05,
8856
+ "loss": 0.0165,
8857
+ "step": 25280
8858
+ },
8859
+ {
8860
+ "epoch": 123.48993288590604,
8861
+ "grad_norm": 0.4439915418624878,
8862
+ "learning_rate": 3.475409836065574e-05,
8863
+ "loss": 0.0175,
8864
+ "step": 25300
8865
+ },
8866
+ {
8867
+ "epoch": 123.5875533862111,
8868
+ "grad_norm": 0.5250588059425354,
8869
+ "learning_rate": 3.462295081967214e-05,
8870
+ "loss": 0.016,
8871
+ "step": 25320
8872
+ },
8873
+ {
8874
+ "epoch": 123.68517388651617,
8875
+ "grad_norm": 1.8672527074813843,
8876
+ "learning_rate": 3.4491803278688526e-05,
8877
+ "loss": 0.0168,
8878
+ "step": 25340
8879
+ },
8880
+ {
8881
+ "epoch": 123.78279438682124,
8882
+ "grad_norm": 0.905852735042572,
8883
+ "learning_rate": 3.436065573770492e-05,
8884
+ "loss": 0.0166,
8885
+ "step": 25360
8886
+ },
8887
+ {
8888
+ "epoch": 123.88041488712629,
8889
+ "grad_norm": 1.0820852518081665,
8890
+ "learning_rate": 3.422950819672131e-05,
8891
+ "loss": 0.018,
8892
+ "step": 25380
8893
+ },
8894
+ {
8895
+ "epoch": 123.97803538743136,
8896
+ "grad_norm": 0.901567816734314,
8897
+ "learning_rate": 3.409836065573771e-05,
8898
+ "loss": 0.0175,
8899
+ "step": 25400
8900
+ },
8901
+ {
8902
+ "epoch": 124.07565588773643,
8903
+ "grad_norm": 0.5788626074790955,
8904
+ "learning_rate": 3.39672131147541e-05,
8905
+ "loss": 0.0153,
8906
+ "step": 25420
8907
+ },
8908
+ {
8909
+ "epoch": 124.17327638804149,
8910
+ "grad_norm": 2.4590864181518555,
8911
+ "learning_rate": 3.383606557377049e-05,
8912
+ "loss": 0.0156,
8913
+ "step": 25440
8914
+ },
8915
+ {
8916
+ "epoch": 124.27089688834656,
8917
+ "grad_norm": 0.5568335056304932,
8918
+ "learning_rate": 3.370491803278689e-05,
8919
+ "loss": 0.0145,
8920
+ "step": 25460
8921
+ },
8922
+ {
8923
+ "epoch": 124.36851738865161,
8924
+ "grad_norm": 0.9648571014404297,
8925
+ "learning_rate": 3.357377049180328e-05,
8926
+ "loss": 0.0139,
8927
+ "step": 25480
8928
+ },
8929
+ {
8930
+ "epoch": 124.46613788895668,
8931
+ "grad_norm": 1.7256869077682495,
8932
+ "learning_rate": 3.3442622950819675e-05,
8933
+ "loss": 0.0161,
8934
+ "step": 25500
8935
+ },
8936
+ {
8937
+ "epoch": 124.56375838926175,
8938
+ "grad_norm": 0.7551959753036499,
8939
+ "learning_rate": 3.331147540983607e-05,
8940
+ "loss": 0.0165,
8941
+ "step": 25520
8942
+ },
8943
+ {
8944
+ "epoch": 124.6613788895668,
8945
+ "grad_norm": 0.6856973767280579,
8946
+ "learning_rate": 3.318032786885246e-05,
8947
+ "loss": 0.0167,
8948
+ "step": 25540
8949
+ },
8950
+ {
8951
+ "epoch": 124.75899938987187,
8952
+ "grad_norm": 0.6650362610816956,
8953
+ "learning_rate": 3.3049180327868857e-05,
8954
+ "loss": 0.0164,
8955
+ "step": 25560
8956
+ },
8957
+ {
8958
+ "epoch": 124.85661989017694,
8959
+ "grad_norm": 1.0952746868133545,
8960
+ "learning_rate": 3.291803278688525e-05,
8961
+ "loss": 0.0181,
8962
+ "step": 25580
8963
+ },
8964
+ {
8965
+ "epoch": 124.954240390482,
8966
+ "grad_norm": 0.8695099353790283,
8967
+ "learning_rate": 3.2786885245901635e-05,
8968
+ "loss": 0.0167,
8969
+ "step": 25600
8970
+ },
8971
+ {
8972
+ "epoch": 125.05186089078707,
8973
+ "grad_norm": 0.5697212219238281,
8974
+ "learning_rate": 3.265573770491803e-05,
8975
+ "loss": 0.0166,
8976
+ "step": 25620
8977
+ },
8978
+ {
8979
+ "epoch": 125.14948139109212,
8980
+ "grad_norm": 0.6281394958496094,
8981
+ "learning_rate": 3.252459016393443e-05,
8982
+ "loss": 0.0138,
8983
+ "step": 25640
8984
+ },
8985
+ {
8986
+ "epoch": 125.2471018913972,
8987
+ "grad_norm": 0.7632110118865967,
8988
+ "learning_rate": 3.2393442622950824e-05,
8989
+ "loss": 0.0163,
8990
+ "step": 25660
8991
+ },
8992
+ {
8993
+ "epoch": 125.34472239170226,
8994
+ "grad_norm": 0.587164580821991,
8995
+ "learning_rate": 3.226229508196721e-05,
8996
+ "loss": 0.0169,
8997
+ "step": 25680
8998
+ },
8999
+ {
9000
+ "epoch": 125.44234289200732,
9001
+ "grad_norm": 0.8123992681503296,
9002
+ "learning_rate": 3.213114754098361e-05,
9003
+ "loss": 0.0156,
9004
+ "step": 25700
9005
+ },
9006
+ {
9007
+ "epoch": 125.53996339231239,
9008
+ "grad_norm": 0.7210849523544312,
9009
+ "learning_rate": 3.2000000000000005e-05,
9010
+ "loss": 0.0161,
9011
+ "step": 25720
9012
+ },
9013
+ {
9014
+ "epoch": 125.63758389261746,
9015
+ "grad_norm": 0.6011385917663574,
9016
+ "learning_rate": 3.1868852459016395e-05,
9017
+ "loss": 0.0178,
9018
+ "step": 25740
9019
+ },
9020
+ {
9021
+ "epoch": 125.73520439292251,
9022
+ "grad_norm": 0.8048945665359497,
9023
+ "learning_rate": 3.173770491803279e-05,
9024
+ "loss": 0.0172,
9025
+ "step": 25760
9026
+ },
9027
+ {
9028
+ "epoch": 125.83282489322758,
9029
+ "grad_norm": 0.5456706285476685,
9030
+ "learning_rate": 3.160655737704919e-05,
9031
+ "loss": 0.0188,
9032
+ "step": 25780
9033
+ },
9034
+ {
9035
+ "epoch": 125.93044539353264,
9036
+ "grad_norm": 1.419385313987732,
9037
+ "learning_rate": 3.1475409836065576e-05,
9038
+ "loss": 0.0187,
9039
+ "step": 25800
9040
+ },
9041
+ {
9042
+ "epoch": 126.0280658938377,
9043
+ "grad_norm": 0.8208538293838501,
9044
+ "learning_rate": 3.1344262295081966e-05,
9045
+ "loss": 0.0149,
9046
+ "step": 25820
9047
+ },
9048
+ {
9049
+ "epoch": 126.12568639414278,
9050
+ "grad_norm": 0.45135247707366943,
9051
+ "learning_rate": 3.121311475409836e-05,
9052
+ "loss": 0.0139,
9053
+ "step": 25840
9054
+ },
9055
+ {
9056
+ "epoch": 126.22330689444783,
9057
+ "grad_norm": 0.565280556678772,
9058
+ "learning_rate": 3.108196721311475e-05,
9059
+ "loss": 0.0123,
9060
+ "step": 25860
9061
+ },
9062
+ {
9063
+ "epoch": 126.3209273947529,
9064
+ "grad_norm": 0.742659866809845,
9065
+ "learning_rate": 3.095081967213115e-05,
9066
+ "loss": 0.0151,
9067
+ "step": 25880
9068
+ },
9069
+ {
9070
+ "epoch": 126.41854789505797,
9071
+ "grad_norm": 1.5381386280059814,
9072
+ "learning_rate": 3.0819672131147544e-05,
9073
+ "loss": 0.0157,
9074
+ "step": 25900
9075
+ },
9076
+ {
9077
+ "epoch": 126.51616839536302,
9078
+ "grad_norm": 0.626524031162262,
9079
+ "learning_rate": 3.068852459016393e-05,
9080
+ "loss": 0.0151,
9081
+ "step": 25920
9082
+ },
9083
+ {
9084
+ "epoch": 126.6137888956681,
9085
+ "grad_norm": 0.6463727355003357,
9086
+ "learning_rate": 3.055737704918033e-05,
9087
+ "loss": 0.0174,
9088
+ "step": 25940
9089
+ },
9090
+ {
9091
+ "epoch": 126.71140939597315,
9092
+ "grad_norm": 0.48679399490356445,
9093
+ "learning_rate": 3.0426229508196725e-05,
9094
+ "loss": 0.0154,
9095
+ "step": 25960
9096
+ },
9097
+ {
9098
+ "epoch": 126.80902989627822,
9099
+ "grad_norm": 0.9534430503845215,
9100
+ "learning_rate": 3.0295081967213118e-05,
9101
+ "loss": 0.0185,
9102
+ "step": 25980
9103
+ },
9104
+ {
9105
+ "epoch": 126.90665039658329,
9106
+ "grad_norm": 0.571997344493866,
9107
+ "learning_rate": 3.016393442622951e-05,
9108
+ "loss": 0.0174,
9109
+ "step": 26000
9110
+ },
9111
+ {
9112
+ "epoch": 127.00427089688834,
9113
+ "grad_norm": 0.8983253836631775,
9114
+ "learning_rate": 3.00327868852459e-05,
9115
+ "loss": 0.0183,
9116
+ "step": 26020
9117
+ },
9118
+ {
9119
+ "epoch": 127.10189139719341,
9120
+ "grad_norm": 0.37496012449264526,
9121
+ "learning_rate": 2.9901639344262293e-05,
9122
+ "loss": 0.0135,
9123
+ "step": 26040
9124
+ },
9125
+ {
9126
+ "epoch": 127.19951189749847,
9127
+ "grad_norm": 0.7320930361747742,
9128
+ "learning_rate": 2.977049180327869e-05,
9129
+ "loss": 0.0138,
9130
+ "step": 26060
9131
+ },
9132
+ {
9133
+ "epoch": 127.29713239780354,
9134
+ "grad_norm": 1.5510950088500977,
9135
+ "learning_rate": 2.963934426229508e-05,
9136
+ "loss": 0.0181,
9137
+ "step": 26080
9138
+ },
9139
+ {
9140
+ "epoch": 127.3947528981086,
9141
+ "grad_norm": 0.25900664925575256,
9142
+ "learning_rate": 2.9508196721311478e-05,
9143
+ "loss": 0.0145,
9144
+ "step": 26100
9145
+ },
9146
+ {
9147
+ "epoch": 127.49237339841366,
9148
+ "grad_norm": 0.7860931754112244,
9149
+ "learning_rate": 2.937704918032787e-05,
9150
+ "loss": 0.0149,
9151
+ "step": 26120
9152
+ },
9153
+ {
9154
+ "epoch": 127.58999389871873,
9155
+ "grad_norm": 1.5779728889465332,
9156
+ "learning_rate": 2.9245901639344263e-05,
9157
+ "loss": 0.0168,
9158
+ "step": 26140
9159
+ },
9160
+ {
9161
+ "epoch": 127.6876143990238,
9162
+ "grad_norm": 0.8237743377685547,
9163
+ "learning_rate": 2.911475409836066e-05,
9164
+ "loss": 0.0173,
9165
+ "step": 26160
9166
+ },
9167
+ {
9168
+ "epoch": 127.78523489932886,
9169
+ "grad_norm": 0.5260995626449585,
9170
+ "learning_rate": 2.8983606557377052e-05,
9171
+ "loss": 0.0148,
9172
+ "step": 26180
9173
+ },
9174
+ {
9175
+ "epoch": 127.88285539963393,
9176
+ "grad_norm": 1.158836007118225,
9177
+ "learning_rate": 2.8852459016393445e-05,
9178
+ "loss": 0.0179,
9179
+ "step": 26200
9180
+ },
9181
+ {
9182
+ "epoch": 127.98047589993898,
9183
+ "grad_norm": 1.7822519540786743,
9184
+ "learning_rate": 2.872131147540984e-05,
9185
+ "loss": 0.0153,
9186
+ "step": 26220
9187
+ },
9188
+ {
9189
+ "epoch": 128.07809640024405,
9190
+ "grad_norm": 0.6409000158309937,
9191
+ "learning_rate": 2.8590163934426227e-05,
9192
+ "loss": 0.0178,
9193
+ "step": 26240
9194
+ },
9195
+ {
9196
+ "epoch": 128.1757169005491,
9197
+ "grad_norm": 0.7198218107223511,
9198
+ "learning_rate": 2.8459016393442623e-05,
9199
+ "loss": 0.0143,
9200
+ "step": 26260
9201
+ },
9202
+ {
9203
+ "epoch": 128.2733374008542,
9204
+ "grad_norm": 0.9570964574813843,
9205
+ "learning_rate": 2.8327868852459016e-05,
9206
+ "loss": 0.0132,
9207
+ "step": 26280
9208
+ },
9209
+ {
9210
+ "epoch": 128.37095790115924,
9211
+ "grad_norm": 0.40788573026657104,
9212
+ "learning_rate": 2.819672131147541e-05,
9213
+ "loss": 0.0151,
9214
+ "step": 26300
9215
+ },
9216
+ {
9217
+ "epoch": 128.4685784014643,
9218
+ "grad_norm": 1.0642712116241455,
9219
+ "learning_rate": 2.8065573770491805e-05,
9220
+ "loss": 0.0154,
9221
+ "step": 26320
9222
+ },
9223
+ {
9224
+ "epoch": 128.56619890176938,
9225
+ "grad_norm": 0.5972766280174255,
9226
+ "learning_rate": 2.7934426229508198e-05,
9227
+ "loss": 0.015,
9228
+ "step": 26340
9229
+ },
9230
+ {
9231
+ "epoch": 128.66381940207444,
9232
+ "grad_norm": 0.5974799990653992,
9233
+ "learning_rate": 2.7803278688524594e-05,
9234
+ "loss": 0.0144,
9235
+ "step": 26360
9236
+ },
9237
+ {
9238
+ "epoch": 128.7614399023795,
9239
+ "grad_norm": 0.8131697773933411,
9240
+ "learning_rate": 2.7672131147540987e-05,
9241
+ "loss": 0.0166,
9242
+ "step": 26380
9243
+ },
9244
+ {
9245
+ "epoch": 128.85906040268458,
9246
+ "grad_norm": 0.8219912648200989,
9247
+ "learning_rate": 2.754098360655738e-05,
9248
+ "loss": 0.0186,
9249
+ "step": 26400
9250
+ },
9251
+ {
9252
+ "epoch": 128.95668090298963,
9253
+ "grad_norm": 0.7310410737991333,
9254
+ "learning_rate": 2.7409836065573775e-05,
9255
+ "loss": 0.0158,
9256
+ "step": 26420
9257
+ },
9258
+ {
9259
+ "epoch": 129.0543014032947,
9260
+ "grad_norm": 0.7448714375495911,
9261
+ "learning_rate": 2.7278688524590168e-05,
9262
+ "loss": 0.0161,
9263
+ "step": 26440
9264
+ },
9265
+ {
9266
+ "epoch": 129.15192190359974,
9267
+ "grad_norm": 1.0379338264465332,
9268
+ "learning_rate": 2.7147540983606558e-05,
9269
+ "loss": 0.0129,
9270
+ "step": 26460
9271
+ },
9272
+ {
9273
+ "epoch": 129.24954240390483,
9274
+ "grad_norm": 0.4505363404750824,
9275
+ "learning_rate": 2.701639344262295e-05,
9276
+ "loss": 0.0139,
9277
+ "step": 26480
9278
+ },
9279
+ {
9280
+ "epoch": 129.34716290420988,
9281
+ "grad_norm": 0.49264198541641235,
9282
+ "learning_rate": 2.6885245901639343e-05,
9283
+ "loss": 0.0141,
9284
+ "step": 26500
9285
+ },
9286
+ {
9287
+ "epoch": 129.44478340451494,
9288
+ "grad_norm": 0.38399410247802734,
9289
+ "learning_rate": 2.675409836065574e-05,
9290
+ "loss": 0.0148,
9291
+ "step": 26520
9292
+ },
9293
+ {
9294
+ "epoch": 129.54240390482002,
9295
+ "grad_norm": 0.8914321660995483,
9296
+ "learning_rate": 2.6622950819672132e-05,
9297
+ "loss": 0.0159,
9298
+ "step": 26540
9299
+ },
9300
+ {
9301
+ "epoch": 129.64002440512508,
9302
+ "grad_norm": 0.8293542265892029,
9303
+ "learning_rate": 2.6491803278688525e-05,
9304
+ "loss": 0.0157,
9305
+ "step": 26560
9306
+ },
9307
+ {
9308
+ "epoch": 129.73764490543013,
9309
+ "grad_norm": 0.5534564256668091,
9310
+ "learning_rate": 2.636065573770492e-05,
9311
+ "loss": 0.0158,
9312
+ "step": 26580
9313
+ },
9314
+ {
9315
+ "epoch": 129.8352654057352,
9316
+ "grad_norm": 0.7157993912696838,
9317
+ "learning_rate": 2.6229508196721314e-05,
9318
+ "loss": 0.016,
9319
+ "step": 26600
9320
+ },
9321
+ {
9322
+ "epoch": 129.93288590604027,
9323
+ "grad_norm": 0.7746397256851196,
9324
+ "learning_rate": 2.6098360655737706e-05,
9325
+ "loss": 0.0192,
9326
+ "step": 26620
9327
+ },
9328
+ {
9329
+ "epoch": 130.03050640634532,
9330
+ "grad_norm": 0.7727970480918884,
9331
+ "learning_rate": 2.5967213114754103e-05,
9332
+ "loss": 0.0151,
9333
+ "step": 26640
9334
+ },
9335
+ {
9336
+ "epoch": 130.1281269066504,
9337
+ "grad_norm": 0.514680802822113,
9338
+ "learning_rate": 2.5836065573770492e-05,
9339
+ "loss": 0.0159,
9340
+ "step": 26660
9341
+ },
9342
+ {
9343
+ "epoch": 130.22574740695546,
9344
+ "grad_norm": 0.87467360496521,
9345
+ "learning_rate": 2.5704918032786885e-05,
9346
+ "loss": 0.0137,
9347
+ "step": 26680
9348
+ },
9349
+ {
9350
+ "epoch": 130.32336790726052,
9351
+ "grad_norm": 0.7342318296432495,
9352
+ "learning_rate": 2.5573770491803277e-05,
9353
+ "loss": 0.0164,
9354
+ "step": 26700
9355
+ },
9356
+ {
9357
+ "epoch": 130.4209884075656,
9358
+ "grad_norm": 0.46169203519821167,
9359
+ "learning_rate": 2.5442622950819674e-05,
9360
+ "loss": 0.0148,
9361
+ "step": 26720
9362
+ },
9363
+ {
9364
+ "epoch": 130.51860890787066,
9365
+ "grad_norm": 0.5552070140838623,
9366
+ "learning_rate": 2.5311475409836066e-05,
9367
+ "loss": 0.0146,
9368
+ "step": 26740
9369
+ },
9370
+ {
9371
+ "epoch": 130.6162294081757,
9372
+ "grad_norm": 2.3732874393463135,
9373
+ "learning_rate": 2.518032786885246e-05,
9374
+ "loss": 0.0151,
9375
+ "step": 26760
9376
+ },
9377
+ {
9378
+ "epoch": 130.71384990848077,
9379
+ "grad_norm": 0.7399420142173767,
9380
+ "learning_rate": 2.5049180327868855e-05,
9381
+ "loss": 0.0136,
9382
+ "step": 26780
9383
+ },
9384
+ {
9385
+ "epoch": 130.81147040878585,
9386
+ "grad_norm": 0.7631209492683411,
9387
+ "learning_rate": 2.4918032786885248e-05,
9388
+ "loss": 0.0168,
9389
+ "step": 26800
9390
+ },
9391
+ {
9392
+ "epoch": 130.9090909090909,
9393
+ "grad_norm": 0.4778473675251007,
9394
+ "learning_rate": 2.478688524590164e-05,
9395
+ "loss": 0.0144,
9396
+ "step": 26820
9397
+ },
9398
+ {
9399
+ "epoch": 131.00671140939596,
9400
+ "grad_norm": 0.48981741070747375,
9401
+ "learning_rate": 2.4655737704918033e-05,
9402
+ "loss": 0.0174,
9403
+ "step": 26840
9404
+ },
9405
+ {
9406
+ "epoch": 131.10433190970105,
9407
+ "grad_norm": 0.550786018371582,
9408
+ "learning_rate": 2.4524590163934426e-05,
9409
+ "loss": 0.0144,
9410
+ "step": 26860
9411
+ },
9412
+ {
9413
+ "epoch": 131.2019524100061,
9414
+ "grad_norm": 1.1115200519561768,
9415
+ "learning_rate": 2.4393442622950822e-05,
9416
+ "loss": 0.0137,
9417
+ "step": 26880
9418
+ },
9419
+ {
9420
+ "epoch": 131.29957291031116,
9421
+ "grad_norm": 0.7832316160202026,
9422
+ "learning_rate": 2.4262295081967215e-05,
9423
+ "loss": 0.0138,
9424
+ "step": 26900
9425
+ },
9426
+ {
9427
+ "epoch": 131.39719341061624,
9428
+ "grad_norm": 0.7918095588684082,
9429
+ "learning_rate": 2.4131147540983608e-05,
9430
+ "loss": 0.0153,
9431
+ "step": 26920
9432
+ },
9433
+ {
9434
+ "epoch": 131.4948139109213,
9435
+ "grad_norm": 0.5915355682373047,
9436
+ "learning_rate": 2.4e-05,
9437
+ "loss": 0.0159,
9438
+ "step": 26940
9439
+ },
9440
+ {
9441
+ "epoch": 131.59243441122635,
9442
+ "grad_norm": 0.6909199357032776,
9443
+ "learning_rate": 2.3868852459016393e-05,
9444
+ "loss": 0.0159,
9445
+ "step": 26960
9446
+ },
9447
+ {
9448
+ "epoch": 131.69005491153143,
9449
+ "grad_norm": 1.0566037893295288,
9450
+ "learning_rate": 2.373770491803279e-05,
9451
+ "loss": 0.0147,
9452
+ "step": 26980
9453
+ },
9454
+ {
9455
+ "epoch": 131.7876754118365,
9456
+ "grad_norm": 1.6122446060180664,
9457
+ "learning_rate": 2.3606557377049182e-05,
9458
+ "loss": 0.0141,
9459
+ "step": 27000
9460
+ },
9461
+ {
9462
+ "epoch": 131.88529591214154,
9463
+ "grad_norm": 0.8080132007598877,
9464
+ "learning_rate": 2.3475409836065575e-05,
9465
+ "loss": 0.0155,
9466
+ "step": 27020
9467
+ },
9468
+ {
9469
+ "epoch": 131.98291641244663,
9470
+ "grad_norm": 0.45939984917640686,
9471
+ "learning_rate": 2.3344262295081968e-05,
9472
+ "loss": 0.0166,
9473
+ "step": 27040
9474
+ },
9475
+ {
9476
+ "epoch": 132.08053691275168,
9477
+ "grad_norm": 0.8284308314323425,
9478
+ "learning_rate": 2.321311475409836e-05,
9479
+ "loss": 0.015,
9480
+ "step": 27060
9481
+ },
9482
+ {
9483
+ "epoch": 132.17815741305674,
9484
+ "grad_norm": 0.6223374605178833,
9485
+ "learning_rate": 2.3081967213114757e-05,
9486
+ "loss": 0.0155,
9487
+ "step": 27080
9488
+ },
9489
+ {
9490
+ "epoch": 132.2757779133618,
9491
+ "grad_norm": 1.6535650491714478,
9492
+ "learning_rate": 2.295081967213115e-05,
9493
+ "loss": 0.015,
9494
+ "step": 27100
9495
+ },
9496
+ {
9497
+ "epoch": 132.37339841366688,
9498
+ "grad_norm": 0.6285653710365295,
9499
+ "learning_rate": 2.2819672131147542e-05,
9500
+ "loss": 0.0158,
9501
+ "step": 27120
9502
+ },
9503
+ {
9504
+ "epoch": 132.47101891397193,
9505
+ "grad_norm": 0.6470975279808044,
9506
+ "learning_rate": 2.2688524590163935e-05,
9507
+ "loss": 0.0131,
9508
+ "step": 27140
9509
+ },
9510
+ {
9511
+ "epoch": 132.568639414277,
9512
+ "grad_norm": 0.6603531241416931,
9513
+ "learning_rate": 2.2557377049180328e-05,
9514
+ "loss": 0.0155,
9515
+ "step": 27160
9516
+ },
9517
+ {
9518
+ "epoch": 132.66625991458207,
9519
+ "grad_norm": 0.9789283275604248,
9520
+ "learning_rate": 2.2426229508196724e-05,
9521
+ "loss": 0.014,
9522
+ "step": 27180
9523
+ },
9524
+ {
9525
+ "epoch": 132.76388041488713,
9526
+ "grad_norm": 0.7158600687980652,
9527
+ "learning_rate": 2.2295081967213117e-05,
9528
+ "loss": 0.0149,
9529
+ "step": 27200
9530
+ },
9531
+ {
9532
+ "epoch": 132.86150091519218,
9533
+ "grad_norm": 0.4593288004398346,
9534
+ "learning_rate": 2.216393442622951e-05,
9535
+ "loss": 0.0149,
9536
+ "step": 27220
9537
+ },
9538
+ {
9539
+ "epoch": 132.95912141549726,
9540
+ "grad_norm": 0.7383930087089539,
9541
+ "learning_rate": 2.2032786885245905e-05,
9542
+ "loss": 0.0158,
9543
+ "step": 27240
9544
+ },
9545
+ {
9546
+ "epoch": 133.05674191580232,
9547
+ "grad_norm": 0.8438706398010254,
9548
+ "learning_rate": 2.1901639344262295e-05,
9549
+ "loss": 0.0152,
9550
+ "step": 27260
9551
+ },
9552
+ {
9553
+ "epoch": 133.15436241610738,
9554
+ "grad_norm": 0.3977959156036377,
9555
+ "learning_rate": 2.1770491803278688e-05,
9556
+ "loss": 0.0135,
9557
+ "step": 27280
9558
+ },
9559
+ {
9560
+ "epoch": 133.25198291641246,
9561
+ "grad_norm": 0.5032092332839966,
9562
+ "learning_rate": 2.1639344262295084e-05,
9563
+ "loss": 0.0144,
9564
+ "step": 27300
9565
+ },
9566
+ {
9567
+ "epoch": 133.3496034167175,
9568
+ "grad_norm": 0.8900758028030396,
9569
+ "learning_rate": 2.1508196721311476e-05,
9570
+ "loss": 0.0142,
9571
+ "step": 27320
9572
+ },
9573
+ {
9574
+ "epoch": 133.44722391702257,
9575
+ "grad_norm": 0.6694475412368774,
9576
+ "learning_rate": 2.1377049180327873e-05,
9577
+ "loss": 0.0148,
9578
+ "step": 27340
9579
+ },
9580
+ {
9581
+ "epoch": 133.54484441732765,
9582
+ "grad_norm": 0.6150327920913696,
9583
+ "learning_rate": 2.1245901639344262e-05,
9584
+ "loss": 0.0137,
9585
+ "step": 27360
9586
+ },
9587
+ {
9588
+ "epoch": 133.6424649176327,
9589
+ "grad_norm": 0.3980708718299866,
9590
+ "learning_rate": 2.1114754098360655e-05,
9591
+ "loss": 0.0131,
9592
+ "step": 27380
9593
+ },
9594
+ {
9595
+ "epoch": 133.74008541793776,
9596
+ "grad_norm": 0.556053876876831,
9597
+ "learning_rate": 2.098360655737705e-05,
9598
+ "loss": 0.0173,
9599
+ "step": 27400
9600
+ },
9601
+ {
9602
+ "epoch": 133.83770591824282,
9603
+ "grad_norm": 0.7154746055603027,
9604
+ "learning_rate": 2.0852459016393444e-05,
9605
+ "loss": 0.0142,
9606
+ "step": 27420
9607
+ },
9608
+ {
9609
+ "epoch": 133.9353264185479,
9610
+ "grad_norm": 0.585117757320404,
9611
+ "learning_rate": 2.0721311475409836e-05,
9612
+ "loss": 0.0148,
9613
+ "step": 27440
9614
+ },
9615
+ {
9616
+ "epoch": 134.03294691885296,
9617
+ "grad_norm": 0.4688512682914734,
9618
+ "learning_rate": 2.059016393442623e-05,
9619
+ "loss": 0.0139,
9620
+ "step": 27460
9621
+ },
9622
+ {
9623
+ "epoch": 134.130567419158,
9624
+ "grad_norm": 0.3597017824649811,
9625
+ "learning_rate": 2.0459016393442622e-05,
9626
+ "loss": 0.0125,
9627
+ "step": 27480
9628
+ },
9629
+ {
9630
+ "epoch": 134.2281879194631,
9631
+ "grad_norm": 0.6201938986778259,
9632
+ "learning_rate": 2.0327868852459018e-05,
9633
+ "loss": 0.014,
9634
+ "step": 27500
9635
+ },
9636
+ {
9637
+ "epoch": 134.32580841976815,
9638
+ "grad_norm": 0.6969265341758728,
9639
+ "learning_rate": 2.019672131147541e-05,
9640
+ "loss": 0.0133,
9641
+ "step": 27520
9642
+ },
9643
+ {
9644
+ "epoch": 134.4234289200732,
9645
+ "grad_norm": 0.6457026600837708,
9646
+ "learning_rate": 2.0065573770491804e-05,
9647
+ "loss": 0.0152,
9648
+ "step": 27540
9649
+ },
9650
+ {
9651
+ "epoch": 134.5210494203783,
9652
+ "grad_norm": 0.7583892941474915,
9653
+ "learning_rate": 1.99344262295082e-05,
9654
+ "loss": 0.0148,
9655
+ "step": 27560
9656
+ },
9657
+ {
9658
+ "epoch": 134.61866992068335,
9659
+ "grad_norm": 0.41781967878341675,
9660
+ "learning_rate": 1.980327868852459e-05,
9661
+ "loss": 0.0145,
9662
+ "step": 27580
9663
+ },
9664
+ {
9665
+ "epoch": 134.7162904209884,
9666
+ "grad_norm": 1.2802424430847168,
9667
+ "learning_rate": 1.9672131147540985e-05,
9668
+ "loss": 0.0158,
9669
+ "step": 27600
9670
+ },
9671
+ {
9672
+ "epoch": 134.81391092129348,
9673
+ "grad_norm": 0.3811515271663666,
9674
+ "learning_rate": 1.9540983606557378e-05,
9675
+ "loss": 0.0136,
9676
+ "step": 27620
9677
+ },
9678
+ {
9679
+ "epoch": 134.91153142159854,
9680
+ "grad_norm": 0.41068577766418457,
9681
+ "learning_rate": 1.940983606557377e-05,
9682
+ "loss": 0.0166,
9683
+ "step": 27640
9684
+ },
9685
+ {
9686
+ "epoch": 135.0091519219036,
9687
+ "grad_norm": 0.690075695514679,
9688
+ "learning_rate": 1.9278688524590167e-05,
9689
+ "loss": 0.0152,
9690
+ "step": 27660
9691
+ },
9692
+ {
9693
+ "epoch": 135.10677242220865,
9694
+ "grad_norm": 0.6945540308952332,
9695
+ "learning_rate": 1.9147540983606556e-05,
9696
+ "loss": 0.0125,
9697
+ "step": 27680
9698
+ },
9699
+ {
9700
+ "epoch": 135.20439292251373,
9701
+ "grad_norm": 0.9262276291847229,
9702
+ "learning_rate": 1.9016393442622952e-05,
9703
+ "loss": 0.014,
9704
+ "step": 27700
9705
+ },
9706
+ {
9707
+ "epoch": 135.3020134228188,
9708
+ "grad_norm": 0.5992072224617004,
9709
+ "learning_rate": 1.8885245901639345e-05,
9710
+ "loss": 0.0132,
9711
+ "step": 27720
9712
+ },
9713
+ {
9714
+ "epoch": 135.39963392312384,
9715
+ "grad_norm": 0.6684610247612,
9716
+ "learning_rate": 1.8754098360655738e-05,
9717
+ "loss": 0.0147,
9718
+ "step": 27740
9719
+ },
9720
+ {
9721
+ "epoch": 135.49725442342893,
9722
+ "grad_norm": 0.647719144821167,
9723
+ "learning_rate": 1.8622950819672134e-05,
9724
+ "loss": 0.0168,
9725
+ "step": 27760
9726
+ },
9727
+ {
9728
+ "epoch": 135.59487492373398,
9729
+ "grad_norm": 1.5291879177093506,
9730
+ "learning_rate": 1.8491803278688523e-05,
9731
+ "loss": 0.0159,
9732
+ "step": 27780
9733
+ },
9734
+ {
9735
+ "epoch": 135.69249542403904,
9736
+ "grad_norm": 0.7436932325363159,
9737
+ "learning_rate": 1.836065573770492e-05,
9738
+ "loss": 0.0136,
9739
+ "step": 27800
9740
+ },
9741
+ {
9742
+ "epoch": 135.79011592434412,
9743
+ "grad_norm": 0.38243773579597473,
9744
+ "learning_rate": 1.8229508196721312e-05,
9745
+ "loss": 0.0145,
9746
+ "step": 27820
9747
+ },
9748
+ {
9749
+ "epoch": 135.88773642464918,
9750
+ "grad_norm": 0.6765353679656982,
9751
+ "learning_rate": 1.8098360655737705e-05,
9752
+ "loss": 0.0139,
9753
+ "step": 27840
9754
+ },
9755
+ {
9756
+ "epoch": 135.98535692495423,
9757
+ "grad_norm": 0.3190823495388031,
9758
+ "learning_rate": 1.79672131147541e-05,
9759
+ "loss": 0.0152,
9760
+ "step": 27860
9761
+ },
9762
+ {
9763
+ "epoch": 136.08297742525932,
9764
+ "grad_norm": 2.0219767093658447,
9765
+ "learning_rate": 1.7836065573770494e-05,
9766
+ "loss": 0.0143,
9767
+ "step": 27880
9768
+ },
9769
+ {
9770
+ "epoch": 136.18059792556437,
9771
+ "grad_norm": 0.776849627494812,
9772
+ "learning_rate": 1.7704918032786887e-05,
9773
+ "loss": 0.0135,
9774
+ "step": 27900
9775
+ },
9776
+ {
9777
+ "epoch": 136.27821842586943,
9778
+ "grad_norm": 0.5274736285209656,
9779
+ "learning_rate": 1.757377049180328e-05,
9780
+ "loss": 0.0123,
9781
+ "step": 27920
9782
+ },
9783
+ {
9784
+ "epoch": 136.3758389261745,
9785
+ "grad_norm": 0.886225700378418,
9786
+ "learning_rate": 1.7442622950819672e-05,
9787
+ "loss": 0.0146,
9788
+ "step": 27940
9789
+ },
9790
+ {
9791
+ "epoch": 136.47345942647956,
9792
+ "grad_norm": 0.5282070636749268,
9793
+ "learning_rate": 1.731147540983607e-05,
9794
+ "loss": 0.0137,
9795
+ "step": 27960
9796
+ },
9797
+ {
9798
+ "epoch": 136.57107992678462,
9799
+ "grad_norm": 0.6784070730209351,
9800
+ "learning_rate": 1.718032786885246e-05,
9801
+ "loss": 0.0143,
9802
+ "step": 27980
9803
+ },
9804
+ {
9805
+ "epoch": 136.66870042708968,
9806
+ "grad_norm": 1.7534900903701782,
9807
+ "learning_rate": 1.7049180327868854e-05,
9808
+ "loss": 0.0137,
9809
+ "step": 28000
9810
+ },
9811
+ {
9812
+ "epoch": 136.76632092739476,
9813
+ "grad_norm": 0.40347975492477417,
9814
+ "learning_rate": 1.6918032786885247e-05,
9815
+ "loss": 0.0157,
9816
+ "step": 28020
9817
+ },
9818
+ {
9819
+ "epoch": 136.8639414276998,
9820
+ "grad_norm": 1.0218480825424194,
9821
+ "learning_rate": 1.678688524590164e-05,
9822
+ "loss": 0.0145,
9823
+ "step": 28040
9824
+ },
9825
+ {
9826
+ "epoch": 136.96156192800487,
9827
+ "grad_norm": 0.2875036597251892,
9828
+ "learning_rate": 1.6655737704918036e-05,
9829
+ "loss": 0.014,
9830
+ "step": 28060
9831
+ },
9832
+ {
9833
+ "epoch": 137.05918242830995,
9834
+ "grad_norm": 1.5968719720840454,
9835
+ "learning_rate": 1.6524590163934428e-05,
9836
+ "loss": 0.0132,
9837
+ "step": 28080
9838
+ },
9839
+ {
9840
+ "epoch": 137.156802928615,
9841
+ "grad_norm": 0.39140036702156067,
9842
+ "learning_rate": 1.6393442622950818e-05,
9843
+ "loss": 0.0138,
9844
+ "step": 28100
9845
+ },
9846
+ {
9847
+ "epoch": 137.25442342892006,
9848
+ "grad_norm": 0.36571571230888367,
9849
+ "learning_rate": 1.6262295081967214e-05,
9850
+ "loss": 0.0134,
9851
+ "step": 28120
9852
+ },
9853
+ {
9854
+ "epoch": 137.35204392922515,
9855
+ "grad_norm": 0.6531932950019836,
9856
+ "learning_rate": 1.6131147540983607e-05,
9857
+ "loss": 0.0146,
9858
+ "step": 28140
9859
+ },
9860
+ {
9861
+ "epoch": 137.4496644295302,
9862
+ "grad_norm": 0.46148520708084106,
9863
+ "learning_rate": 1.6000000000000003e-05,
9864
+ "loss": 0.0136,
9865
+ "step": 28160
9866
+ },
9867
+ {
9868
+ "epoch": 137.54728492983526,
9869
+ "grad_norm": 0.5359562635421753,
9870
+ "learning_rate": 1.5868852459016395e-05,
9871
+ "loss": 0.0128,
9872
+ "step": 28180
9873
+ },
9874
+ {
9875
+ "epoch": 137.64490543014034,
9876
+ "grad_norm": 0.5632950663566589,
9877
+ "learning_rate": 1.5737704918032788e-05,
9878
+ "loss": 0.0148,
9879
+ "step": 28200
9880
+ },
9881
+ {
9882
+ "epoch": 137.7425259304454,
9883
+ "grad_norm": 0.7229663729667664,
9884
+ "learning_rate": 1.560655737704918e-05,
9885
+ "loss": 0.0147,
9886
+ "step": 28220
9887
+ },
9888
+ {
9889
+ "epoch": 137.84014643075045,
9890
+ "grad_norm": 0.5531187653541565,
9891
+ "learning_rate": 1.5475409836065574e-05,
9892
+ "loss": 0.014,
9893
+ "step": 28240
9894
+ },
9895
+ {
9896
+ "epoch": 137.93776693105553,
9897
+ "grad_norm": 0.6305696964263916,
9898
+ "learning_rate": 1.5344262295081966e-05,
9899
+ "loss": 0.0157,
9900
+ "step": 28260
9901
+ },
9902
+ {
9903
+ "epoch": 138.0353874313606,
9904
+ "grad_norm": 0.8933548331260681,
9905
+ "learning_rate": 1.5213114754098363e-05,
9906
+ "loss": 0.0156,
9907
+ "step": 28280
9908
+ },
9909
+ {
9910
+ "epoch": 138.13300793166565,
9911
+ "grad_norm": 0.39126649498939514,
9912
+ "learning_rate": 1.5081967213114755e-05,
9913
+ "loss": 0.011,
9914
+ "step": 28300
9915
+ },
9916
+ {
9917
+ "epoch": 138.2306284319707,
9918
+ "grad_norm": 0.6234102249145508,
9919
+ "learning_rate": 1.4950819672131146e-05,
9920
+ "loss": 0.0133,
9921
+ "step": 28320
9922
+ },
9923
+ {
9924
+ "epoch": 138.32824893227578,
9925
+ "grad_norm": 0.5867244005203247,
9926
+ "learning_rate": 1.481967213114754e-05,
9927
+ "loss": 0.0138,
9928
+ "step": 28340
9929
+ },
9930
+ {
9931
+ "epoch": 138.42586943258084,
9932
+ "grad_norm": 0.6564351916313171,
9933
+ "learning_rate": 1.4688524590163935e-05,
9934
+ "loss": 0.014,
9935
+ "step": 28360
9936
+ },
9937
+ {
9938
+ "epoch": 138.5234899328859,
9939
+ "grad_norm": 1.0982993841171265,
9940
+ "learning_rate": 1.455737704918033e-05,
9941
+ "loss": 0.0133,
9942
+ "step": 28380
9943
+ },
9944
+ {
9945
+ "epoch": 138.62111043319098,
9946
+ "grad_norm": 0.8140943646430969,
9947
+ "learning_rate": 1.4426229508196722e-05,
9948
+ "loss": 0.0149,
9949
+ "step": 28400
9950
+ },
9951
+ {
9952
+ "epoch": 138.71873093349603,
9953
+ "grad_norm": 0.7273306846618652,
9954
+ "learning_rate": 1.4295081967213114e-05,
9955
+ "loss": 0.0149,
9956
+ "step": 28420
9957
+ },
9958
+ {
9959
+ "epoch": 138.8163514338011,
9960
+ "grad_norm": 0.46543437242507935,
9961
+ "learning_rate": 1.4163934426229508e-05,
9962
+ "loss": 0.0143,
9963
+ "step": 28440
9964
+ },
9965
+ {
9966
+ "epoch": 138.91397193410617,
9967
+ "grad_norm": 1.823746681213379,
9968
+ "learning_rate": 1.4032786885245902e-05,
9969
+ "loss": 0.0141,
9970
+ "step": 28460
9971
+ },
9972
+ {
9973
+ "epoch": 139.01159243441123,
9974
+ "grad_norm": 0.602825939655304,
9975
+ "learning_rate": 1.3901639344262297e-05,
9976
+ "loss": 0.0129,
9977
+ "step": 28480
9978
+ },
9979
+ {
9980
+ "epoch": 139.10921293471628,
9981
+ "grad_norm": 0.30030643939971924,
9982
+ "learning_rate": 1.377049180327869e-05,
9983
+ "loss": 0.0108,
9984
+ "step": 28500
9985
+ },
9986
+ {
9987
+ "epoch": 139.20683343502137,
9988
+ "grad_norm": 0.7023382186889648,
9989
+ "learning_rate": 1.3639344262295084e-05,
9990
+ "loss": 0.013,
9991
+ "step": 28520
9992
+ },
9993
+ {
9994
+ "epoch": 139.30445393532642,
9995
+ "grad_norm": 0.8771500587463379,
9996
+ "learning_rate": 1.3508196721311475e-05,
9997
+ "loss": 0.0144,
9998
+ "step": 28540
9999
+ },
10000
+ {
10001
+ "epoch": 139.40207443563148,
10002
+ "grad_norm": 0.6988272666931152,
10003
+ "learning_rate": 1.337704918032787e-05,
10004
+ "loss": 0.0125,
10005
+ "step": 28560
10006
+ },
10007
+ {
10008
+ "epoch": 139.49969493593656,
10009
+ "grad_norm": 0.8657557368278503,
10010
+ "learning_rate": 1.3245901639344262e-05,
10011
+ "loss": 0.0138,
10012
+ "step": 28580
10013
+ },
10014
+ {
10015
+ "epoch": 139.59731543624162,
10016
+ "grad_norm": 0.6832662224769592,
10017
+ "learning_rate": 1.3114754098360657e-05,
10018
+ "loss": 0.0127,
10019
+ "step": 28600
10020
+ },
10021
+ {
10022
+ "epoch": 139.69493593654667,
10023
+ "grad_norm": 0.9065951108932495,
10024
+ "learning_rate": 1.2983606557377051e-05,
10025
+ "loss": 0.015,
10026
+ "step": 28620
10027
+ },
10028
+ {
10029
+ "epoch": 139.79255643685173,
10030
+ "grad_norm": 0.9211568236351013,
10031
+ "learning_rate": 1.2852459016393442e-05,
10032
+ "loss": 0.0131,
10033
+ "step": 28640
10034
+ },
10035
+ {
10036
+ "epoch": 139.8901769371568,
10037
+ "grad_norm": 0.6160862445831299,
10038
+ "learning_rate": 1.2721311475409837e-05,
10039
+ "loss": 0.0163,
10040
+ "step": 28660
10041
+ },
10042
+ {
10043
+ "epoch": 139.98779743746186,
10044
+ "grad_norm": 0.8593130111694336,
10045
+ "learning_rate": 1.259016393442623e-05,
10046
+ "loss": 0.0135,
10047
+ "step": 28680
10048
+ },
10049
+ {
10050
+ "epoch": 140.08541793776692,
10051
+ "grad_norm": 0.7746515274047852,
10052
+ "learning_rate": 1.2459016393442624e-05,
10053
+ "loss": 0.0141,
10054
+ "step": 28700
10055
+ },
10056
+ {
10057
+ "epoch": 140.183038438072,
10058
+ "grad_norm": 0.7830790877342224,
10059
+ "learning_rate": 1.2327868852459017e-05,
10060
+ "loss": 0.0126,
10061
+ "step": 28720
10062
+ },
10063
+ {
10064
+ "epoch": 140.28065893837706,
10065
+ "grad_norm": 0.49005040526390076,
10066
+ "learning_rate": 1.2196721311475411e-05,
10067
+ "loss": 0.0127,
10068
+ "step": 28740
10069
+ },
10070
+ {
10071
+ "epoch": 140.3782794386821,
10072
+ "grad_norm": 0.9640679359436035,
10073
+ "learning_rate": 1.2065573770491804e-05,
10074
+ "loss": 0.0125,
10075
+ "step": 28760
10076
+ },
10077
+ {
10078
+ "epoch": 140.4758999389872,
10079
+ "grad_norm": 0.8114829659461975,
10080
+ "learning_rate": 1.1934426229508197e-05,
10081
+ "loss": 0.0138,
10082
+ "step": 28780
10083
+ },
10084
+ {
10085
+ "epoch": 140.57352043929225,
10086
+ "grad_norm": 0.8460706472396851,
10087
+ "learning_rate": 1.1803278688524591e-05,
10088
+ "loss": 0.0148,
10089
+ "step": 28800
10090
+ },
10091
+ {
10092
+ "epoch": 140.6711409395973,
10093
+ "grad_norm": 0.4882986843585968,
10094
+ "learning_rate": 1.1672131147540984e-05,
10095
+ "loss": 0.0141,
10096
+ "step": 28820
10097
+ },
10098
+ {
10099
+ "epoch": 140.7687614399024,
10100
+ "grad_norm": 1.0322729349136353,
10101
+ "learning_rate": 1.1540983606557378e-05,
10102
+ "loss": 0.0148,
10103
+ "step": 28840
10104
+ },
10105
+ {
10106
+ "epoch": 140.86638194020745,
10107
+ "grad_norm": 1.2970582246780396,
10108
+ "learning_rate": 1.1409836065573771e-05,
10109
+ "loss": 0.0144,
10110
+ "step": 28860
10111
+ },
10112
+ {
10113
+ "epoch": 140.9640024405125,
10114
+ "grad_norm": 0.9063767790794373,
10115
+ "learning_rate": 1.1278688524590164e-05,
10116
+ "loss": 0.0123,
10117
+ "step": 28880
10118
+ },
10119
+ {
10120
+ "epoch": 141.06162294081759,
10121
+ "grad_norm": 0.60384202003479,
10122
+ "learning_rate": 1.1147540983606558e-05,
10123
+ "loss": 0.0122,
10124
+ "step": 28900
10125
+ },
10126
+ {
10127
+ "epoch": 141.15924344112264,
10128
+ "grad_norm": 0.5142499804496765,
10129
+ "learning_rate": 1.1016393442622953e-05,
10130
+ "loss": 0.0125,
10131
+ "step": 28920
10132
+ },
10133
+ {
10134
+ "epoch": 141.2568639414277,
10135
+ "grad_norm": 0.6854032874107361,
10136
+ "learning_rate": 1.0885245901639344e-05,
10137
+ "loss": 0.0132,
10138
+ "step": 28940
10139
+ },
10140
+ {
10141
+ "epoch": 141.35448444173275,
10142
+ "grad_norm": 1.138895034790039,
10143
+ "learning_rate": 1.0754098360655738e-05,
10144
+ "loss": 0.0138,
10145
+ "step": 28960
10146
+ },
10147
+ {
10148
+ "epoch": 141.45210494203783,
10149
+ "grad_norm": 0.5815340280532837,
10150
+ "learning_rate": 1.0622950819672131e-05,
10151
+ "loss": 0.0115,
10152
+ "step": 28980
10153
+ },
10154
+ {
10155
+ "epoch": 141.5497254423429,
10156
+ "grad_norm": 0.6024242639541626,
10157
+ "learning_rate": 1.0491803278688525e-05,
10158
+ "loss": 0.0127,
10159
+ "step": 29000
10160
+ },
10161
+ {
10162
+ "epoch": 141.64734594264795,
10163
+ "grad_norm": 0.44016191363334656,
10164
+ "learning_rate": 1.0360655737704918e-05,
10165
+ "loss": 0.0146,
10166
+ "step": 29020
10167
+ },
10168
+ {
10169
+ "epoch": 141.74496644295303,
10170
+ "grad_norm": 2.051720142364502,
10171
+ "learning_rate": 1.0229508196721311e-05,
10172
+ "loss": 0.0151,
10173
+ "step": 29040
10174
+ },
10175
+ {
10176
+ "epoch": 141.84258694325808,
10177
+ "grad_norm": 0.6961409449577332,
10178
+ "learning_rate": 1.0098360655737705e-05,
10179
+ "loss": 0.013,
10180
+ "step": 29060
10181
+ },
10182
+ {
10183
+ "epoch": 141.94020744356314,
10184
+ "grad_norm": 1.1912919282913208,
10185
+ "learning_rate": 9.9672131147541e-06,
10186
+ "loss": 0.0131,
10187
+ "step": 29080
10188
+ },
10189
+ {
10190
+ "epoch": 142.03782794386822,
10191
+ "grad_norm": 0.6203546524047852,
10192
+ "learning_rate": 9.836065573770493e-06,
10193
+ "loss": 0.013,
10194
+ "step": 29100
10195
+ },
10196
+ {
10197
+ "epoch": 142.13544844417328,
10198
+ "grad_norm": 0.5386860966682434,
10199
+ "learning_rate": 9.704918032786885e-06,
10200
+ "loss": 0.0123,
10201
+ "step": 29120
10202
+ },
10203
+ {
10204
+ "epoch": 142.23306894447833,
10205
+ "grad_norm": 0.5639663934707642,
10206
+ "learning_rate": 9.573770491803278e-06,
10207
+ "loss": 0.0123,
10208
+ "step": 29140
10209
+ },
10210
+ {
10211
+ "epoch": 142.33068944478342,
10212
+ "grad_norm": 0.577315628528595,
10213
+ "learning_rate": 9.442622950819673e-06,
10214
+ "loss": 0.0125,
10215
+ "step": 29160
10216
+ },
10217
+ {
10218
+ "epoch": 142.42830994508847,
10219
+ "grad_norm": 0.5142390727996826,
10220
+ "learning_rate": 9.311475409836067e-06,
10221
+ "loss": 0.0133,
10222
+ "step": 29180
10223
+ },
10224
+ {
10225
+ "epoch": 142.52593044539353,
10226
+ "grad_norm": 0.7933589816093445,
10227
+ "learning_rate": 9.18032786885246e-06,
10228
+ "loss": 0.0151,
10229
+ "step": 29200
10230
+ },
10231
+ {
10232
+ "epoch": 142.6235509456986,
10233
+ "grad_norm": 0.8499199151992798,
10234
+ "learning_rate": 9.049180327868853e-06,
10235
+ "loss": 0.0136,
10236
+ "step": 29220
10237
+ },
10238
+ {
10239
+ "epoch": 142.72117144600367,
10240
+ "grad_norm": 0.6795129179954529,
10241
+ "learning_rate": 8.918032786885247e-06,
10242
+ "loss": 0.0136,
10243
+ "step": 29240
10244
+ },
10245
+ {
10246
+ "epoch": 142.81879194630872,
10247
+ "grad_norm": 0.3827701210975647,
10248
+ "learning_rate": 8.78688524590164e-06,
10249
+ "loss": 0.0122,
10250
+ "step": 29260
10251
+ },
10252
+ {
10253
+ "epoch": 142.91641244661378,
10254
+ "grad_norm": 0.6248555779457092,
10255
+ "learning_rate": 8.655737704918034e-06,
10256
+ "loss": 0.0113,
10257
+ "step": 29280
10258
+ },
10259
+ {
10260
+ "epoch": 143.01403294691886,
10261
+ "grad_norm": 0.9943171739578247,
10262
+ "learning_rate": 8.524590163934427e-06,
10263
+ "loss": 0.0145,
10264
+ "step": 29300
10265
+ },
10266
+ {
10267
+ "epoch": 143.11165344722392,
10268
+ "grad_norm": 0.3848264217376709,
10269
+ "learning_rate": 8.39344262295082e-06,
10270
+ "loss": 0.0119,
10271
+ "step": 29320
10272
+ },
10273
+ {
10274
+ "epoch": 143.20927394752897,
10275
+ "grad_norm": 1.02989661693573,
10276
+ "learning_rate": 8.262295081967214e-06,
10277
+ "loss": 0.0123,
10278
+ "step": 29340
10279
+ },
10280
+ {
10281
+ "epoch": 143.30689444783405,
10282
+ "grad_norm": 0.5843254923820496,
10283
+ "learning_rate": 8.131147540983607e-06,
10284
+ "loss": 0.0124,
10285
+ "step": 29360
10286
+ },
10287
+ {
10288
+ "epoch": 143.4045149481391,
10289
+ "grad_norm": 0.5134753584861755,
10290
+ "learning_rate": 8.000000000000001e-06,
10291
+ "loss": 0.0122,
10292
+ "step": 29380
10293
+ },
10294
+ {
10295
+ "epoch": 143.50213544844416,
10296
+ "grad_norm": 0.4464253783226013,
10297
+ "learning_rate": 7.868852459016394e-06,
10298
+ "loss": 0.0116,
10299
+ "step": 29400
10300
+ },
10301
+ {
10302
+ "epoch": 143.59975594874925,
10303
+ "grad_norm": 0.445730060338974,
10304
+ "learning_rate": 7.737704918032787e-06,
10305
+ "loss": 0.0116,
10306
+ "step": 29420
10307
+ },
10308
+ {
10309
+ "epoch": 143.6973764490543,
10310
+ "grad_norm": 0.7831693887710571,
10311
+ "learning_rate": 7.606557377049181e-06,
10312
+ "loss": 0.0122,
10313
+ "step": 29440
10314
+ },
10315
+ {
10316
+ "epoch": 143.79499694935936,
10317
+ "grad_norm": 0.33939194679260254,
10318
+ "learning_rate": 7.475409836065573e-06,
10319
+ "loss": 0.0131,
10320
+ "step": 29460
10321
+ },
10322
+ {
10323
+ "epoch": 143.89261744966444,
10324
+ "grad_norm": 0.36323612928390503,
10325
+ "learning_rate": 7.344262295081968e-06,
10326
+ "loss": 0.0157,
10327
+ "step": 29480
10328
+ },
10329
+ {
10330
+ "epoch": 143.9902379499695,
10331
+ "grad_norm": 0.6487870216369629,
10332
+ "learning_rate": 7.213114754098361e-06,
10333
+ "loss": 0.0157,
10334
+ "step": 29500
10335
+ },
10336
+ {
10337
+ "epoch": 144.08785845027455,
10338
+ "grad_norm": 0.3841145932674408,
10339
+ "learning_rate": 7.081967213114754e-06,
10340
+ "loss": 0.0106,
10341
+ "step": 29520
10342
+ },
10343
+ {
10344
+ "epoch": 144.1854789505796,
10345
+ "grad_norm": 1.0142998695373535,
10346
+ "learning_rate": 6.9508196721311484e-06,
10347
+ "loss": 0.0129,
10348
+ "step": 29540
10349
+ },
10350
+ {
10351
+ "epoch": 144.2830994508847,
10352
+ "grad_norm": 1.5330740213394165,
10353
+ "learning_rate": 6.819672131147542e-06,
10354
+ "loss": 0.0142,
10355
+ "step": 29560
10356
+ },
10357
+ {
10358
+ "epoch": 144.38071995118975,
10359
+ "grad_norm": 2.0231475830078125,
10360
+ "learning_rate": 6.688524590163935e-06,
10361
+ "loss": 0.0115,
10362
+ "step": 29580
10363
+ },
10364
+ {
10365
+ "epoch": 144.4783404514948,
10366
+ "grad_norm": 0.542549192905426,
10367
+ "learning_rate": 6.557377049180328e-06,
10368
+ "loss": 0.0131,
10369
+ "step": 29600
10370
+ },
10371
+ {
10372
+ "epoch": 144.57596095179989,
10373
+ "grad_norm": 0.6942082047462463,
10374
+ "learning_rate": 6.426229508196721e-06,
10375
+ "loss": 0.013,
10376
+ "step": 29620
10377
+ },
10378
+ {
10379
+ "epoch": 144.67358145210494,
10380
+ "grad_norm": 0.4934479296207428,
10381
+ "learning_rate": 6.295081967213115e-06,
10382
+ "loss": 0.0124,
10383
+ "step": 29640
10384
+ },
10385
+ {
10386
+ "epoch": 144.77120195241,
10387
+ "grad_norm": 0.9981206655502319,
10388
+ "learning_rate": 6.163934426229508e-06,
10389
+ "loss": 0.013,
10390
+ "step": 29660
10391
+ },
10392
+ {
10393
+ "epoch": 144.86882245271508,
10394
+ "grad_norm": 0.5263285636901855,
10395
+ "learning_rate": 6.032786885245902e-06,
10396
+ "loss": 0.013,
10397
+ "step": 29680
10398
+ },
10399
+ {
10400
+ "epoch": 144.96644295302013,
10401
+ "grad_norm": 0.4131539762020111,
10402
+ "learning_rate": 5.9016393442622956e-06,
10403
+ "loss": 0.0132,
10404
+ "step": 29700
10405
+ },
10406
+ {
10407
+ "epoch": 145.0640634533252,
10408
+ "grad_norm": 0.9396491646766663,
10409
+ "learning_rate": 5.770491803278689e-06,
10410
+ "loss": 0.012,
10411
+ "step": 29720
10412
+ },
10413
+ {
10414
+ "epoch": 145.16168395363027,
10415
+ "grad_norm": 0.37081795930862427,
10416
+ "learning_rate": 5.639344262295082e-06,
10417
+ "loss": 0.0118,
10418
+ "step": 29740
10419
+ },
10420
+ {
10421
+ "epoch": 145.25930445393533,
10422
+ "grad_norm": 0.5653529167175293,
10423
+ "learning_rate": 5.508196721311476e-06,
10424
+ "loss": 0.0122,
10425
+ "step": 29760
10426
+ },
10427
+ {
10428
+ "epoch": 145.35692495424038,
10429
+ "grad_norm": 0.49712416529655457,
10430
+ "learning_rate": 5.377049180327869e-06,
10431
+ "loss": 0.012,
10432
+ "step": 29780
10433
+ },
10434
+ {
10435
+ "epoch": 145.45454545454547,
10436
+ "grad_norm": 0.6723568439483643,
10437
+ "learning_rate": 5.245901639344263e-06,
10438
+ "loss": 0.0132,
10439
+ "step": 29800
10440
+ },
10441
+ {
10442
+ "epoch": 145.55216595485052,
10443
+ "grad_norm": 0.6191849708557129,
10444
+ "learning_rate": 5.1147540983606555e-06,
10445
+ "loss": 0.0142,
10446
+ "step": 29820
10447
+ },
10448
+ {
10449
+ "epoch": 145.64978645515558,
10450
+ "grad_norm": 0.8201606273651123,
10451
+ "learning_rate": 4.98360655737705e-06,
10452
+ "loss": 0.014,
10453
+ "step": 29840
10454
+ },
10455
+ {
10456
+ "epoch": 145.74740695546063,
10457
+ "grad_norm": 0.4357975423336029,
10458
+ "learning_rate": 4.852459016393443e-06,
10459
+ "loss": 0.0119,
10460
+ "step": 29860
10461
+ },
10462
+ {
10463
+ "epoch": 145.84502745576572,
10464
+ "grad_norm": 0.5062920451164246,
10465
+ "learning_rate": 4.721311475409836e-06,
10466
+ "loss": 0.0112,
10467
+ "step": 29880
10468
+ },
10469
+ {
10470
+ "epoch": 145.94264795607077,
10471
+ "grad_norm": 0.6272954940795898,
10472
+ "learning_rate": 4.59016393442623e-06,
10473
+ "loss": 0.0121,
10474
+ "step": 29900
10475
+ },
10476
+ {
10477
+ "epoch": 146.04026845637583,
10478
+ "grad_norm": 0.3578208088874817,
10479
+ "learning_rate": 4.4590163934426235e-06,
10480
+ "loss": 0.0137,
10481
+ "step": 29920
10482
+ },
10483
+ {
10484
+ "epoch": 146.1378889566809,
10485
+ "grad_norm": 0.4044102132320404,
10486
+ "learning_rate": 4.327868852459017e-06,
10487
+ "loss": 0.0133,
10488
+ "step": 29940
10489
+ },
10490
+ {
10491
+ "epoch": 146.23550945698597,
10492
+ "grad_norm": 0.4162692725658417,
10493
+ "learning_rate": 4.19672131147541e-06,
10494
+ "loss": 0.013,
10495
+ "step": 29960
10496
+ },
10497
+ {
10498
+ "epoch": 146.33312995729102,
10499
+ "grad_norm": 0.6349827647209167,
10500
+ "learning_rate": 4.0655737704918034e-06,
10501
+ "loss": 0.0138,
10502
+ "step": 29980
10503
+ },
10504
+ {
10505
+ "epoch": 146.4307504575961,
10506
+ "grad_norm": 0.6992813348770142,
10507
+ "learning_rate": 3.934426229508197e-06,
10508
+ "loss": 0.0142,
10509
+ "step": 30000
10510
  }
10511
  ],
10512
  "logging_steps": 20,
 
10526
  "attributes": {}
10527
  }
10528
  },
10529
+ "total_flos": 3.9434336130018816e+17,
10530
  "train_batch_size": 1,
10531
  "trial_name": null,
10532
  "trial_params": null