Nekofox commited on
Commit
27f8e75
1 Parent(s): 19765ae

Training in progress, step 104000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d082cb4b72cfdb4254a34e305011b454d31bb30dbd27c42aa24b574147c540e9
3
  size 3871544599
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b6b72e44546364141ee724a3720f7fcd3e0187baca4188905097793e58f73cf
3
  size 3871544599
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:112a525385ef4b2920ca2e4ee2218f98e8c63380cb36d3d525690ca7a1c7da52
3
  size 1944201353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e0f8fd34ad9689e90c51a6ce416bb3efe00164a6779e3a86f8096cf6b0c3e6
3
  size 1944201353
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41cdaa8da95e94cc1e97cd08cfc56ef217e59ee59f7687ac8419775c4bc80984
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406220ea58b0ef58e4c479dbacbea0d151a94a8fec8c33f3207fb059c4aaf735
3
  size 14575
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7953899316608ec70beb2202d2f7cbae5152dd2b0e53df4aac9eef077d77e70d
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c168c2a8f00ea281d9279e79fc22bf9baa2663e805bc63f10cd023cb5f2adac6
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12799c4bd0d2d9858c0d7696e12e998a91a87a7a92ec0bf0a40a0e286e746cf8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c4f8cd51d2e8c9949ea38c90ce4eb9bc18b9068fc7636cfea38097eac939ad
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7066538526768048,
5
- "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1206,11 +1206,59 @@
1206
  "learning_rate": 4.938697889674869e-06,
1207
  "loss": 1.689,
1208
  "step": 100000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
  }
1210
  ],
1211
  "max_steps": 1415120,
1212
  "num_train_epochs": 10,
1213
- "total_flos": 4.83002072753111e+16,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.734920006783877,
5
+ "global_step": 104000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1206
  "learning_rate": 4.938697889674869e-06,
1207
  "loss": 1.689,
1208
  "step": 100000
1209
+ },
1210
+ {
1211
+ "epoch": 0.71,
1212
+ "learning_rate": 4.938085627465422e-06,
1213
+ "loss": 1.6779,
1214
+ "step": 100500
1215
+ },
1216
+ {
1217
+ "epoch": 0.71,
1218
+ "learning_rate": 4.937470361240773e-06,
1219
+ "loss": 1.694,
1220
+ "step": 101000
1221
+ },
1222
+ {
1223
+ "epoch": 0.72,
1224
+ "learning_rate": 4.936852091759006e-06,
1225
+ "loss": 1.6757,
1226
+ "step": 101500
1227
+ },
1228
+ {
1229
+ "epoch": 0.72,
1230
+ "learning_rate": 4.936230819781902e-06,
1231
+ "loss": 1.7025,
1232
+ "step": 102000
1233
+ },
1234
+ {
1235
+ "epoch": 0.72,
1236
+ "learning_rate": 4.935606546074945e-06,
1237
+ "loss": 1.6897,
1238
+ "step": 102500
1239
+ },
1240
+ {
1241
+ "epoch": 0.73,
1242
+ "learning_rate": 4.934980528951094e-06,
1243
+ "loss": 1.7087,
1244
+ "step": 103000
1245
+ },
1246
+ {
1247
+ "epoch": 0.73,
1248
+ "learning_rate": 4.9343502600952685e-06,
1249
+ "loss": 1.6645,
1250
+ "step": 103500
1251
+ },
1252
+ {
1253
+ "epoch": 0.73,
1254
+ "learning_rate": 4.9337169918266646e-06,
1255
+ "loss": 1.6942,
1256
+ "step": 104000
1257
  }
1258
  ],
1259
  "max_steps": 1415120,
1260
  "num_train_epochs": 10,
1261
+ "total_flos": 4.963728541905715e+16,
1262
  "trial_name": null,
1263
  "trial_params": null
1264
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:112a525385ef4b2920ca2e4ee2218f98e8c63380cb36d3d525690ca7a1c7da52
3
  size 1944201353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e0f8fd34ad9689e90c51a6ce416bb3efe00164a6779e3a86f8096cf6b0c3e6
3
  size 1944201353
runs/May15_17-32-55_19599b9fb4f0/events.out.tfevents.1684173637.19599b9fb4f0.1168.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef30ab0bad199cc307dca07844477268dc4ed31ab93798a295ba8e4bfd265751
3
- size 9583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f04fd7a7e58b8b6bf9158b672f823af500a6cb60bdddafb0354038ce68a02da
3
+ size 10863