augustocsc commited on
Commit
6c0068a
1 Parent(s): ebb92fd

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4112e8e9dc23cb1f7536f3eb76469acface21f23e45de82fb7032dccfefbcc9b
3
  size 995654149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0b06e97f7375d3c463012a0154e6c9d8ce59a64253b7d38b8e7eaf160efcff
3
  size 995654149
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc0011ff74c2a560ea18822f2b33d18d266eb182404ad11e97d8a0929b1762f9
3
  size 497813341
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fef899874d9a26f54345ba20a4e90494ffcc0dd889e0572fa334421094aa7d69
3
  size 497813341
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6c38db4387a36e2ad70be6de6346c8072392db912ef970812aef3c708739e0f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d4f3dfba7c476528350a112afc2eb29ca025fdb99df6a6d058dd017e77721e
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8846bb95f8928ebc17412b6a7f15b0cd04a2aeba69a1f3a91482aec2592920f9
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42c1b679744a42399dc6c4b3d13d32a0f7023f208a865b3c1cc1bb8f2e1860f
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7860686644175432,
5
  "eval_steps": 200,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -475,13 +475,65 @@
475
  "eval_samples_per_second": 947.096,
476
  "eval_steps_per_second": 3.7,
477
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  }
479
  ],
480
  "logging_steps": 500,
481
  "max_steps": 10078,
482
  "num_train_epochs": 2,
483
  "save_steps": 1000,
484
- "total_flos": 1.50504210432e+17,
485
  "trial_name": null,
486
  "trial_params": null
487
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9845207382417147,
5
  "eval_steps": 200,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
475
  "eval_samples_per_second": 947.096,
476
  "eval_steps_per_second": 3.7,
477
  "step": 9000
478
+ },
479
+ {
480
+ "epoch": 1.83,
481
+ "eval_loss": 0.11026974767446518,
482
+ "eval_runtime": 151.187,
483
+ "eval_samples_per_second": 948.071,
484
+ "eval_steps_per_second": 3.704,
485
+ "step": 9200
486
+ },
487
+ {
488
+ "epoch": 1.87,
489
+ "eval_loss": 0.11021895706653595,
490
+ "eval_runtime": 151.1765,
491
+ "eval_samples_per_second": 948.137,
492
+ "eval_steps_per_second": 3.704,
493
+ "step": 9400
494
+ },
495
+ {
496
+ "epoch": 1.89,
497
+ "learning_rate": 2.867632466759278e-06,
498
+ "loss": 0.111,
499
+ "step": 9500
500
+ },
501
+ {
502
+ "epoch": 1.91,
503
+ "eval_loss": 0.11018586158752441,
504
+ "eval_runtime": 151.3213,
505
+ "eval_samples_per_second": 947.229,
506
+ "eval_steps_per_second": 3.701,
507
+ "step": 9600
508
+ },
509
+ {
510
+ "epoch": 1.94,
511
+ "eval_loss": 0.11020087450742722,
512
+ "eval_runtime": 151.3699,
513
+ "eval_samples_per_second": 946.925,
514
+ "eval_steps_per_second": 3.7,
515
+ "step": 9800
516
+ },
517
+ {
518
+ "epoch": 1.98,
519
+ "learning_rate": 3.869815439571344e-07,
520
+ "loss": 0.1109,
521
+ "step": 10000
522
+ },
523
+ {
524
+ "epoch": 1.98,
525
+ "eval_loss": 0.11018609255552292,
526
+ "eval_runtime": 151.328,
527
+ "eval_samples_per_second": 947.187,
528
+ "eval_steps_per_second": 3.701,
529
+ "step": 10000
530
  }
531
  ],
532
  "logging_steps": 500,
533
  "max_steps": 10078,
534
  "num_train_epochs": 2,
535
  "save_steps": 1000,
536
+ "total_flos": 1.6722690048e+17,
537
  "trial_name": null,
538
  "trial_params": null
539
  }