dq158 commited on
Commit
6969681
1 Parent(s): f988d6e

Training in progress, epoch 13, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41341b31785ed0a2fbc8630488a4926f852161ef06ea08cc89540c9c37dc6630
3
  size 2372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:904b02e1e6c26215ea888d3f8f6d7aafb77206ed1937def3937a940637da0f1c
3
  size 2372346
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7137ffdadd3d98f6eb4df58e0d93207f78d2d3a226609b6aa51f825be2c62570
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb5663454a11041d207ecbb686a7490a34d8683fe674b45b29b48acdb58fa931
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81575766e0f892673e3292549b8eae61ee2367f845a1b2b10898b9e83a3ef05f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07f9db347331ffb79c2e2cf5b87f5ee3885eb3ce501d58242d83ec8e3cae758d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fc3d9b89093dc44b3f8ae65848b5c79017341da78ff50d3adcc276738c797e7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae3274533ddd7159e50e851656ea6e92a09b25d95e547026de312a25757234d5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.5654487609863281,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
- "epoch": 12.0,
5
  "eval_steps": 500,
6
- "global_step": 75876,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1141,13 +1141,110 @@
1141
  "eval_steps_per_second": 1.038,
1142
  "eval_translation_length": 52457,
1143
  "step": 75876
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1144
  }
1145
  ],
1146
  "logging_steps": 500,
1147
  "max_steps": 126460,
1148
  "num_train_epochs": 20,
1149
  "save_steps": 500,
1150
- "total_flos": 2.078183784091484e+17,
1151
  "trial_name": null,
1152
  "trial_params": null
1153
  }
 
1
  {
2
  "best_metric": 1.5654487609863281,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
+ "epoch": 13.0,
5
  "eval_steps": 500,
6
+ "global_step": 82199,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1141
  "eval_steps_per_second": 1.038,
1142
  "eval_translation_length": 52457,
1143
  "step": 75876
1144
+ },
1145
+ {
1146
+ "epoch": 12.02,
1147
+ "learning_rate": 1.7319657912101309e-06,
1148
+ "loss": 1.7871,
1149
+ "step": 76000
1150
+ },
1151
+ {
1152
+ "epoch": 12.1,
1153
+ "learning_rate": 1.7023574667538268e-06,
1154
+ "loss": 1.7728,
1155
+ "step": 76500
1156
+ },
1157
+ {
1158
+ "epoch": 12.18,
1159
+ "learning_rate": 1.6728731866869999e-06,
1160
+ "loss": 1.792,
1161
+ "step": 77000
1162
+ },
1163
+ {
1164
+ "epoch": 12.26,
1165
+ "learning_rate": 1.6435175362209033e-06,
1166
+ "loss": 1.8009,
1167
+ "step": 77500
1168
+ },
1169
+ {
1170
+ "epoch": 12.34,
1171
+ "learning_rate": 1.6142950805631178e-06,
1172
+ "loss": 1.751,
1173
+ "step": 78000
1174
+ },
1175
+ {
1176
+ "epoch": 12.41,
1177
+ "learning_rate": 1.5852103642075995e-06,
1178
+ "loss": 1.7877,
1179
+ "step": 78500
1180
+ },
1181
+ {
1182
+ "epoch": 12.49,
1183
+ "learning_rate": 1.5562679102279453e-06,
1184
+ "loss": 1.7936,
1185
+ "step": 79000
1186
+ },
1187
+ {
1188
+ "epoch": 12.57,
1189
+ "learning_rate": 1.5274722195740005e-06,
1190
+ "loss": 1.7884,
1191
+ "step": 79500
1192
+ },
1193
+ {
1194
+ "epoch": 12.65,
1195
+ "learning_rate": 1.4988277703718882e-06,
1196
+ "loss": 1.7617,
1197
+ "step": 80000
1198
+ },
1199
+ {
1200
+ "epoch": 12.73,
1201
+ "learning_rate": 1.4703390172276072e-06,
1202
+ "loss": 1.7916,
1203
+ "step": 80500
1204
+ },
1205
+ {
1206
+ "epoch": 12.81,
1207
+ "learning_rate": 1.4420103905342767e-06,
1208
+ "loss": 1.7773,
1209
+ "step": 81000
1210
+ },
1211
+ {
1212
+ "epoch": 12.89,
1213
+ "learning_rate": 1.4138462957831472e-06,
1214
+ "loss": 1.7798,
1215
+ "step": 81500
1216
+ },
1217
+ {
1218
+ "epoch": 12.97,
1219
+ "learning_rate": 1.3858511128784937e-06,
1220
+ "loss": 1.7658,
1221
+ "step": 82000
1222
+ },
1223
+ {
1224
+ "epoch": 13.0,
1225
+ "eval_bleu": 1.0,
1226
+ "eval_brevity_penalty": 1.0,
1227
+ "eval_length_ratio": 1.0,
1228
+ "eval_loss": 1.5740926265716553,
1229
+ "eval_precisions": [
1230
+ 1.0,
1231
+ 1.0,
1232
+ 1.0,
1233
+ 1.0
1234
+ ],
1235
+ "eval_reference_length": 52468,
1236
+ "eval_runtime": 677.6429,
1237
+ "eval_samples_per_second": 4.148,
1238
+ "eval_steps_per_second": 1.037,
1239
+ "eval_translation_length": 52468,
1240
+ "step": 82199
1241
  }
1242
  ],
1243
  "logging_steps": 500,
1244
  "max_steps": 126460,
1245
  "num_train_epochs": 20,
1246
  "save_steps": 500,
1247
+ "total_flos": 2.251365766099108e+17,
1248
  "trial_name": null,
1249
  "trial_params": null
1250
  }