stefania-radu commited on
Commit
ae398a9
1 Parent(s): c8065c4

Training in progress, step 310000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e419578f524e1611c3d5902d97ccf8efc92603d3b33f13949516891bb5476e00
3
  size 893441530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:588b078274eaf2af1b331e1cd70a9be61bfe018939162cd4041561fac059b8c0
3
  size 893441530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3926e99c54fb7c961027b62d11498ced043c43266a692f441538601c7a5f10f7
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ba1aac81f5af9078e69f687c5c373f267f4525e96f7d4ff79814a56c1216cb
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c42cb907bbd858ffd2b5bae4767d672ae3b753bd7ac85cb51990a0992d4df69a
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1483cb709e4146d94296c64449eb045d3f6821657b2815235b2d1e5f2c693e9
3
+ size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08b25c62270eb67709cff9418808f83a4a7710e7ce508a964ce593dde6417e23
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:746b61609316d06785a028aa9340563cf7c6dcbeb2edf456b62699aedb8c3a07
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3,
5
- "global_step": 300000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2226,11 +2226,85 @@
2226
  "eval_samples_per_second": 108.487,
2227
  "eval_steps_per_second": 13.561,
2228
  "step": 300000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2229
  }
2230
  ],
2231
  "max_steps": 1000000,
2232
  "num_train_epochs": 9223372036854775807,
2233
- "total_flos": 2.656383051969331e+21,
2234
  "trial_name": null,
2235
  "trial_params": null
2236
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.31,
5
+ "global_step": 310000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2226
  "eval_samples_per_second": 108.487,
2227
  "eval_steps_per_second": 13.561,
2228
  "step": 300000
2229
+ },
2230
+ {
2231
+ "epoch": 0.3,
2232
+ "learning_rate": 1.732744634202854e-05,
2233
+ "loss": 0.3579,
2234
+ "step": 301000
2235
+ },
2236
+ {
2237
+ "epoch": 0.3,
2238
+ "learning_rate": 1.731675352071429e-05,
2239
+ "loss": 0.3569,
2240
+ "step": 302000
2241
+ },
2242
+ {
2243
+ "epoch": 0.3,
2244
+ "learning_rate": 1.7306028528854846e-05,
2245
+ "loss": 0.3561,
2246
+ "step": 303000
2247
+ },
2248
+ {
2249
+ "epoch": 0.3,
2250
+ "learning_rate": 1.7295271483737004e-05,
2251
+ "loss": 0.3563,
2252
+ "step": 304000
2253
+ },
2254
+ {
2255
+ "epoch": 0.3,
2256
+ "learning_rate": 1.7284482502998086e-05,
2257
+ "loss": 0.3567,
2258
+ "step": 305000
2259
+ },
2260
+ {
2261
+ "epoch": 0.3,
2262
+ "eval_runtime": 3353.5685,
2263
+ "eval_samples_per_second": 101.281,
2264
+ "eval_steps_per_second": 12.66,
2265
+ "step": 305000
2266
+ },
2267
+ {
2268
+ "epoch": 0.31,
2269
+ "learning_rate": 1.7273661704624656e-05,
2270
+ "loss": 0.3559,
2271
+ "step": 306000
2272
+ },
2273
+ {
2274
+ "epoch": 0.31,
2275
+ "learning_rate": 1.7262809206951228e-05,
2276
+ "loss": 0.355,
2277
+ "step": 307000
2278
+ },
2279
+ {
2280
+ "epoch": 0.31,
2281
+ "learning_rate": 1.725192512865898e-05,
2282
+ "loss": 0.3576,
2283
+ "step": 308000
2284
+ },
2285
+ {
2286
+ "epoch": 0.31,
2287
+ "learning_rate": 1.7241009588774453e-05,
2288
+ "loss": 0.3555,
2289
+ "step": 309000
2290
+ },
2291
+ {
2292
+ "epoch": 0.31,
2293
+ "learning_rate": 1.7230062706668237e-05,
2294
+ "loss": 0.3523,
2295
+ "step": 310000
2296
+ },
2297
+ {
2298
+ "epoch": 0.31,
2299
+ "eval_runtime": 3333.1448,
2300
+ "eval_samples_per_second": 101.901,
2301
+ "eval_steps_per_second": 12.738,
2302
+ "step": 310000
2303
  }
2304
  ],
2305
  "max_steps": 1000000,
2306
  "num_train_epochs": 9223372036854775807,
2307
+ "total_flos": 2.744929153701642e+21,
2308
  "trial_name": null,
2309
  "trial_params": null
2310
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3926e99c54fb7c961027b62d11498ced043c43266a692f441538601c7a5f10f7
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ba1aac81f5af9078e69f687c5c373f267f4525e96f7d4ff79814a56c1216cb
3
  size 454197066