mikhail-panzo commited on
Commit
b752154
1 Parent(s): 4e63d6d

Training in progress, step 6500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d095a0b3074fe5a8626cfa4acc87c71891375b0a5807fe6b082815254ce690a8
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be130cdb6d58ca576d555e5ea8567af6f6d33afe2411bfe3f11f6564eaa152d0
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70930de6e09d970682f047c2f2015d79bef17aec7309243de6cba5bc51d6c42f
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062d698adb1b00a404b09b72c2a558bae0161b9bae78ab82d6699e09598840ca
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23e913b1b09af4704f298f5d732037471d49897007ac37250e2071bb06273799
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf76ded8fdcf7a4a436a3933944f6bd69f05be25204a8e34a3f160837caddf2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea4cdeda39ba2136e592d70ee5cca1961eef743b3307c81d32a1dfebf7fdaaa7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:048c5f28ad1e36826a9d8ed6d7ba190c323a6c73bfb07a0753770149cba430be
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.3915382921695709,
3
  "best_model_checkpoint": "mikhail-panzo/fil-ceb_b64_le5_s8000/checkpoint-3000",
4
- "epoch": 235.2941176470588,
5
  "eval_steps": 500,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -943,6 +943,84 @@
943
  "eval_samples_per_second": 29.584,
944
  "eval_steps_per_second": 3.78,
945
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
  }
947
  ],
948
  "logging_steps": 50,
@@ -962,7 +1040,7 @@
962
  "attributes": {}
963
  }
964
  },
965
- "total_flos": 6.5283263362154376e+16,
966
  "train_batch_size": 32,
967
  "trial_name": null,
968
  "trial_params": null
 
1
  {
2
  "best_metric": 0.3915382921695709,
3
  "best_model_checkpoint": "mikhail-panzo/fil-ceb_b64_le5_s8000/checkpoint-3000",
4
+ "epoch": 254.90196078431373,
5
  "eval_steps": 500,
6
+ "global_step": 6500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
943
  "eval_samples_per_second": 29.584,
944
  "eval_steps_per_second": 3.78,
945
  "step": 6000
946
+ },
947
+ {
948
+ "epoch": 237.2549019607843,
949
+ "grad_norm": 0.8828366994857788,
950
+ "learning_rate": 3.2533333333333332e-06,
951
+ "loss": 0.3881,
952
+ "step": 6050
953
+ },
954
+ {
955
+ "epoch": 239.2156862745098,
956
+ "grad_norm": 1.034189224243164,
957
+ "learning_rate": 3.17e-06,
958
+ "loss": 0.3905,
959
+ "step": 6100
960
+ },
961
+ {
962
+ "epoch": 241.1764705882353,
963
+ "grad_norm": 0.7035565972328186,
964
+ "learning_rate": 3.0866666666666666e-06,
965
+ "loss": 0.3889,
966
+ "step": 6150
967
+ },
968
+ {
969
+ "epoch": 243.13725490196077,
970
+ "grad_norm": 0.8675793409347534,
971
+ "learning_rate": 3.0033333333333335e-06,
972
+ "loss": 0.3895,
973
+ "step": 6200
974
+ },
975
+ {
976
+ "epoch": 245.09803921568627,
977
+ "grad_norm": 0.731158435344696,
978
+ "learning_rate": 2.92e-06,
979
+ "loss": 0.3835,
980
+ "step": 6250
981
+ },
982
+ {
983
+ "epoch": 247.05882352941177,
984
+ "grad_norm": 0.8929085731506348,
985
+ "learning_rate": 2.836666666666667e-06,
986
+ "loss": 0.3906,
987
+ "step": 6300
988
+ },
989
+ {
990
+ "epoch": 249.01960784313727,
991
+ "grad_norm": 1.1059510707855225,
992
+ "learning_rate": 2.7533333333333334e-06,
993
+ "loss": 0.3905,
994
+ "step": 6350
995
+ },
996
+ {
997
+ "epoch": 250.98039215686273,
998
+ "grad_norm": 0.9429101347923279,
999
+ "learning_rate": 2.6700000000000003e-06,
1000
+ "loss": 0.3894,
1001
+ "step": 6400
1002
+ },
1003
+ {
1004
+ "epoch": 252.94117647058823,
1005
+ "grad_norm": 1.0544432401657104,
1006
+ "learning_rate": 2.5866666666666667e-06,
1007
+ "loss": 0.3937,
1008
+ "step": 6450
1009
+ },
1010
+ {
1011
+ "epoch": 254.90196078431373,
1012
+ "grad_norm": 0.8494179844856262,
1013
+ "learning_rate": 2.5033333333333336e-06,
1014
+ "loss": 0.3921,
1015
+ "step": 6500
1016
+ },
1017
+ {
1018
+ "epoch": 254.90196078431373,
1019
+ "eval_loss": 0.3925850987434387,
1020
+ "eval_runtime": 6.2722,
1021
+ "eval_samples_per_second": 28.698,
1022
+ "eval_steps_per_second": 3.667,
1023
+ "step": 6500
1024
  }
1025
  ],
1026
  "logging_steps": 50,
 
1040
  "attributes": {}
1041
  }
1042
  },
1043
+ "total_flos": 7.072681729994813e+16,
1044
  "train_batch_size": 32,
1045
  "trial_name": null,
1046
  "trial_params": null