plip commited on
Commit
b3ea0f4
1 Parent(s): 9ccd2d1

Training in progress, step 1000000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cbfc1ed883942984be588c84681fbb0b292e529986dcbab5a1fecaa3f6ad447
3
  size 893439185
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58a32262e0ffbdac27815bc97d0e11c733c93ea33c2ca18dc9c3802edf93ed0
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc73418bd52c0694a19af6083331d7a4a133f36616e77cb56fc9fc0bb18ad264
3
  size 449471589
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c385e8e74e4a9e4e29d16e7bcc51330a32ca96f31f3958d42f7f6d81cb5517
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3cbe84c4b275ece4dfc8b045971a9447b9468599c6de1ac7856d818ab7fcce6
3
- size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc3d5753924cf4929822397824e69f8755eca737deedad3eadd6591cc8055f63
3
+ size 14567
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ca4b049386ceb25b5284b9754462b13ddabb069762bc1b4ce1a9e94d95e348c
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dcf0c5d64fc7f0b4d4af0e5dd48c408044c47545a256fa9ec3bd6934aa5b874
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:733172b1d4a99d1dcac219cdba47537d2e3c42c728e60a468833c7a7eb409d93
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce03c924c5530c6249464a77507fae462c6707b0b26098ea916b622fcbba7a6a
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1a20d0d880982442a49c1adeca0b36b7c4aa9ce9768b58e40b03f2358d78bf3
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac733fdea8118ca8aaba1980be73398e21c242cc2758b7dfd3a00de2a13d686a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2358905887cd0ce80c53b6e8a0174e039c4c5bd62c6c91c86f0312f9b46fcf7
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d806e9f9f09813043b95cbeda18b18cdfb60c100fbde3239bf79ee81c659dc36
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.11750423748225,
5
- "global_step": 990000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7332,11 +7332,85 @@
7332
  "eval_samples_per_second": 1296.438,
7333
  "eval_steps_per_second": 20.743,
7334
  "step": 990000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7335
  }
7336
  ],
7337
  "max_steps": 1000000,
7338
  "num_train_epochs": 16,
7339
- "total_flos": 6.939915985982136e+22,
7340
  "trial_name": null,
7341
  "trial_params": null
7342
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.27020630048712,
5
+ "global_step": 1000000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
7332
  "eval_samples_per_second": 1296.438,
7333
  "eval_steps_per_second": 20.743,
7334
  "step": 990000
7335
+ },
7336
+ {
7337
+ "epoch": 15.13,
7338
+ "learning_rate": 1.0031000845556304e-05,
7339
+ "loss": 0.2272,
7340
+ "step": 991000
7341
+ },
7342
+ {
7343
+ "epoch": 15.15,
7344
+ "learning_rate": 1.0024494874742152e-05,
7345
+ "loss": 0.2272,
7346
+ "step": 992000
7347
+ },
7348
+ {
7349
+ "epoch": 15.16,
7350
+ "learning_rate": 1.0018754144840986e-05,
7351
+ "loss": 0.2272,
7352
+ "step": 993000
7353
+ },
7354
+ {
7355
+ "epoch": 15.18,
7356
+ "learning_rate": 1.0013778718632507e-05,
7357
+ "loss": 0.227,
7358
+ "step": 994000
7359
+ },
7360
+ {
7361
+ "epoch": 15.19,
7362
+ "learning_rate": 1.000956865052717e-05,
7363
+ "loss": 0.2269,
7364
+ "step": 995000
7365
+ },
7366
+ {
7367
+ "epoch": 15.19,
7368
+ "eval_runtime": 0.7194,
7369
+ "eval_samples_per_second": 1390.055,
7370
+ "eval_steps_per_second": 22.241,
7371
+ "step": 995000
7372
+ },
7373
+ {
7374
+ "epoch": 15.21,
7375
+ "learning_rate": 1.0006123986565623e-05,
7376
+ "loss": 0.2267,
7377
+ "step": 996000
7378
+ },
7379
+ {
7380
+ "epoch": 15.22,
7381
+ "learning_rate": 1.0003444764418138e-05,
7382
+ "loss": 0.2265,
7383
+ "step": 997000
7384
+ },
7385
+ {
7386
+ "epoch": 15.24,
7387
+ "learning_rate": 1.000153101338428e-05,
7388
+ "loss": 0.2268,
7389
+ "step": 998000
7390
+ },
7391
+ {
7392
+ "epoch": 15.25,
7393
+ "learning_rate": 1.00003827543925e-05,
7394
+ "loss": 0.2269,
7395
+ "step": 999000
7396
+ },
7397
+ {
7398
+ "epoch": 15.27,
7399
+ "learning_rate": 1e-05,
7400
+ "loss": 0.2268,
7401
+ "step": 1000000
7402
+ },
7403
+ {
7404
+ "epoch": 15.27,
7405
+ "eval_runtime": 0.8245,
7406
+ "eval_samples_per_second": 1212.903,
7407
+ "eval_steps_per_second": 19.406,
7408
+ "step": 1000000
7409
  }
7410
  ],
7411
  "max_steps": 1000000,
7412
  "num_train_epochs": 16,
7413
+ "total_flos": 7.010016247012483e+22,
7414
  "trial_name": null,
7415
  "trial_params": null
7416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc73418bd52c0694a19af6083331d7a4a133f36616e77cb56fc9fc0bb18ad264
3
  size 449471589
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c385e8e74e4a9e4e29d16e7bcc51330a32ca96f31f3958d42f7f6d81cb5517
3
  size 449471589