NairaRahim commited on
Commit
e802d02
·
verified ·
1 Parent(s): 60cb0e7

Training in progress, epoch 11, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b0578188a7562ebb3f653a9c172c7aab35806f8a64c735c6b610d5e2438e16f
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4b5d9091a6dbab9d2b4be7cf992134ba4a3e0d729e96284bc4512ac0932620
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bd28c218e06e0ddb714109b224f0d2d6ff0943a81c87b19c895d46869fe043e
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9dbc9961f1b825d07e327826af5885ae6801dfe3867c659b03e9c90764c433
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e31bad291fd392e01d851c04b44cf7cac0f5f8b28830534382ca16e10c847e7a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1671761f2a32f97e49b389d83fe64fe54fae391ec682766d59ea01e911801f0d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac3125827e91c83a2b02ffbd5e22748b751677850854e358e914d72d2a70c5e5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a8f171c30ec70e8b7de39e28734b3eb14c402c92c5675eccaa14ecf588e6cff
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 13050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -997,6 +997,105 @@
997
  "eval_samples_per_second": 26.475,
998
  "eval_steps_per_second": 3.327,
999
  "step": 13050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000
  }
1001
  ],
1002
  "logging_steps": 100,
@@ -1011,7 +1110,7 @@
1011
  "early_stopping_threshold": 0.0
1012
  },
1013
  "attributes": {
1014
- "early_stopping_patience_counter": 0
1015
  }
1016
  },
1017
  "TrainerControl": {
@@ -1025,7 +1124,7 @@
1025
  "attributes": {}
1026
  }
1027
  },
1028
- "total_flos": 1.407310903047168e+16,
1029
  "train_batch_size": 8,
1030
  "trial_name": null,
1031
  "trial_params": null
 
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
+ "epoch": 11.0,
5
  "eval_steps": 500,
6
+ "global_step": 14355,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
997
  "eval_samples_per_second": 26.475,
998
  "eval_steps_per_second": 3.327,
999
  "step": 13050
1000
+ },
1001
+ {
1002
+ "epoch": 10.03831417624521,
1003
+ "grad_norm": 2.5322816371917725,
1004
+ "learning_rate": 4.3728448275862074e-05,
1005
+ "loss": 33.8873,
1006
+ "step": 13100
1007
+ },
1008
+ {
1009
+ "epoch": 10.114942528735632,
1010
+ "grad_norm": 2.1063241958618164,
1011
+ "learning_rate": 4.368103448275862e-05,
1012
+ "loss": 33.871,
1013
+ "step": 13200
1014
+ },
1015
+ {
1016
+ "epoch": 10.191570881226054,
1017
+ "grad_norm": 3.7001326084136963,
1018
+ "learning_rate": 4.3633141762452106e-05,
1019
+ "loss": 34.5129,
1020
+ "step": 13300
1021
+ },
1022
+ {
1023
+ "epoch": 10.268199233716475,
1024
+ "grad_norm": 1.8534705638885498,
1025
+ "learning_rate": 4.35852490421456e-05,
1026
+ "loss": 33.7739,
1027
+ "step": 13400
1028
+ },
1029
+ {
1030
+ "epoch": 10.344827586206897,
1031
+ "grad_norm": 1.9871069192886353,
1032
+ "learning_rate": 4.3537356321839086e-05,
1033
+ "loss": 33.4124,
1034
+ "step": 13500
1035
+ },
1036
+ {
1037
+ "epoch": 10.421455938697317,
1038
+ "grad_norm": 2.264529228210449,
1039
+ "learning_rate": 4.348946360153257e-05,
1040
+ "loss": 33.24,
1041
+ "step": 13600
1042
+ },
1043
+ {
1044
+ "epoch": 10.49808429118774,
1045
+ "grad_norm": 3.0297787189483643,
1046
+ "learning_rate": 4.344157088122606e-05,
1047
+ "loss": 33.2922,
1048
+ "step": 13700
1049
+ },
1050
+ {
1051
+ "epoch": 10.574712643678161,
1052
+ "grad_norm": 2.7185864448547363,
1053
+ "learning_rate": 4.339367816091954e-05,
1054
+ "loss": 33.4859,
1055
+ "step": 13800
1056
+ },
1057
+ {
1058
+ "epoch": 10.651340996168582,
1059
+ "grad_norm": 3.8887524604797363,
1060
+ "learning_rate": 4.334578544061303e-05,
1061
+ "loss": 33.4322,
1062
+ "step": 13900
1063
+ },
1064
+ {
1065
+ "epoch": 10.727969348659004,
1066
+ "grad_norm": 2.5119857788085938,
1067
+ "learning_rate": 4.3297892720306514e-05,
1068
+ "loss": 33.6234,
1069
+ "step": 14000
1070
+ },
1071
+ {
1072
+ "epoch": 10.804597701149426,
1073
+ "grad_norm": 3.2969565391540527,
1074
+ "learning_rate": 4.325e-05,
1075
+ "loss": 33.4341,
1076
+ "step": 14100
1077
+ },
1078
+ {
1079
+ "epoch": 10.881226053639846,
1080
+ "grad_norm": 3.3629229068756104,
1081
+ "learning_rate": 4.320210727969349e-05,
1082
+ "loss": 32.7636,
1083
+ "step": 14200
1084
+ },
1085
+ {
1086
+ "epoch": 10.957854406130268,
1087
+ "grad_norm": 3.0765013694763184,
1088
+ "learning_rate": 4.3154214559386975e-05,
1089
+ "loss": 33.7066,
1090
+ "step": 14300
1091
+ },
1092
+ {
1093
+ "epoch": 11.0,
1094
+ "eval_loss": 34.70278549194336,
1095
+ "eval_runtime": 49.2928,
1096
+ "eval_samples_per_second": 26.474,
1097
+ "eval_steps_per_second": 3.327,
1098
+ "step": 14355
1099
  }
1100
  ],
1101
  "logging_steps": 100,
 
1110
  "early_stopping_threshold": 0.0
1111
  },
1112
  "attributes": {
1113
+ "early_stopping_patience_counter": 1
1114
  }
1115
  },
1116
  "TrainerControl": {
 
1124
  "attributes": {}
1125
  }
1126
  },
1127
+ "total_flos": 1.5480419933518848e+16,
1128
  "train_batch_size": 8,
1129
  "trial_name": null,
1130
  "trial_params": null