Nekofox commited on
Commit
19765ae
1 Parent(s): 2a13511

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:074aae3be36ece720227c7d5704e6c459a837d5e9816c40e062f7d71f8a7ce90
3
  size 3871544599
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d082cb4b72cfdb4254a34e305011b454d31bb30dbd27c42aa24b574147c540e9
3
  size 3871544599
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e19e2e264275220026b221d7bd9bb2485aa9d99a2f9b2280088444e28714ea1
3
  size 1944201353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:112a525385ef4b2920ca2e4ee2218f98e8c63380cb36d3d525690ca7a1c7da52
3
  size 1944201353
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a0459cecd0f6210ddc54ea326d85e85954cf661a179b72074a382bb5f0d54b3
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41cdaa8da95e94cc1e97cd08cfc56ef217e59ee59f7687ac8419775c4bc80984
3
  size 14575
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:260d8130ef6fe72af25504bb5a639384b37c9ba0958144b30ad87da616600c37
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7953899316608ec70beb2202d2f7cbae5152dd2b0e53df4aac9eef077d77e70d
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea86b41f09818c6bd10502d4bb8bdc8400743097d02b351deb58e5fb424dae99
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12799c4bd0d2d9858c0d7696e12e998a91a87a7a92ec0bf0a40a0e286e746cf8
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6783876985697326,
5
- "global_step": 96000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1158,11 +1158,59 @@
1158
  "learning_rate": 4.943485406239044e-06,
1159
  "loss": 1.5726,
1160
  "step": 96000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1161
  }
1162
  ],
1163
  "max_steps": 1415120,
1164
  "num_train_epochs": 10,
1165
- "total_flos": 4.69878476684329e+16,
1166
  "trial_name": null,
1167
  "trial_params": null
1168
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7066538526768048,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1158
  "learning_rate": 4.943485406239044e-06,
1159
  "loss": 1.5726,
1160
  "step": 96000
1161
+ },
1162
+ {
1163
+ "epoch": 0.68,
1164
+ "learning_rate": 4.942897190956574e-06,
1165
+ "loss": 1.5713,
1166
+ "step": 96500
1167
+ },
1168
+ {
1169
+ "epoch": 0.69,
1170
+ "learning_rate": 4.9423059657304774e-06,
1171
+ "loss": 1.5863,
1172
+ "step": 97000
1173
+ },
1174
+ {
1175
+ "epoch": 0.69,
1176
+ "learning_rate": 4.941711731289216e-06,
1177
+ "loss": 1.6008,
1178
+ "step": 97500
1179
+ },
1180
+ {
1181
+ "epoch": 0.69,
1182
+ "learning_rate": 4.941115685852782e-06,
1183
+ "loss": 1.609,
1184
+ "step": 98000
1185
+ },
1186
+ {
1187
+ "epoch": 0.7,
1188
+ "learning_rate": 4.9405154411961575e-06,
1189
+ "loss": 1.6016,
1190
+ "step": 98500
1191
+ },
1192
+ {
1193
+ "epoch": 0.7,
1194
+ "learning_rate": 4.939913399034342e-06,
1195
+ "loss": 1.5992,
1196
+ "step": 99000
1197
+ },
1198
+ {
1199
+ "epoch": 0.7,
1200
+ "learning_rate": 4.939307147114736e-06,
1201
+ "loss": 1.6742,
1202
+ "step": 99500
1203
+ },
1204
+ {
1205
+ "epoch": 0.71,
1206
+ "learning_rate": 4.938697889674869e-06,
1207
+ "loss": 1.689,
1208
+ "step": 100000
1209
  }
1210
  ],
1211
  "max_steps": 1415120,
1212
  "num_train_epochs": 10,
1213
+ "total_flos": 4.83002072753111e+16,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e19e2e264275220026b221d7bd9bb2485aa9d99a2f9b2280088444e28714ea1
3
  size 1944201353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:112a525385ef4b2920ca2e4ee2218f98e8c63380cb36d3d525690ca7a1c7da52
3
  size 1944201353
runs/May15_17-32-55_19599b9fb4f0/events.out.tfevents.1684173637.19599b9fb4f0.1168.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9508222cf629cc8c15c6c6fe7cc63d0f5c27a274c448d029b1102a9154d2351b
3
- size 8303
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef30ab0bad199cc307dca07844477268dc4ed31ab93798a295ba8e4bfd265751
3
+ size 9583