augustocsc commited on
Commit
3f48ec7
1 Parent(s): 60e8873

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08272eae0fb3d0187698911c3a6fe55947f1c2aa29c2b91fc192c91f0955752c
3
  size 497780352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c999b2c968317cfb9a178e7d7e916ec81e10f2d4b5f677a60a04809df73dc28
3
  size 497780352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:062cbf925132d5481813039e3f2ae2af94ffb7026e1a2ae1af26275cafd1de78
3
  size 995617914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfe21c9b3924775aa74156b19fb883f26009e266099be574b1bb86c23ca571c4
3
  size 995617914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1be58c6095331490031ab4484c5ffe4bf36bc8c2ec9130e8e665c8f156e2c90
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5189a6d57ea7f8ba0ac9eec6cad8707c6e4c886fe1a371a733497024fcbb01ba
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1305eff1a05da779c382ab68d240e5dfe192a09923f7945b52f548c78896f445
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c72869181363c7c0dcffb7a71857fd6a7c81a93e70aff2985fc26363295969
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.0,
5
  "eval_steps": 200,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -493,6 +493,60 @@
493
  "eval_samples_per_second": 422.006,
494
  "eval_steps_per_second": 6.594,
495
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  }
497
  ],
498
  "logging_steps": 500,
@@ -500,7 +554,7 @@
500
  "num_input_tokens_seen": 0,
501
  "num_train_epochs": 10,
502
  "save_steps": 1000,
503
- "total_flos": 3.7626052608e+16,
504
  "train_batch_size": 64,
505
  "trial_name": null,
506
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 200,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
493
  "eval_samples_per_second": 422.006,
494
  "eval_steps_per_second": 6.594,
495
  "step": 9000
496
+ },
497
+ {
498
+ "epoch": 9.2,
499
+ "eval_loss": 0.023400841280817986,
500
+ "eval_runtime": 37.7841,
501
+ "eval_samples_per_second": 423.458,
502
+ "eval_steps_per_second": 6.617,
503
+ "step": 9200
504
+ },
505
+ {
506
+ "epoch": 9.4,
507
+ "eval_loss": 0.023405231535434723,
508
+ "eval_runtime": 37.8666,
509
+ "eval_samples_per_second": 422.536,
510
+ "eval_steps_per_second": 6.602,
511
+ "step": 9400
512
+ },
513
+ {
514
+ "epoch": 9.5,
515
+ "grad_norm": 0.16063953936100006,
516
+ "learning_rate": 3.077914851215585e-07,
517
+ "loss": 0.0241,
518
+ "step": 9500
519
+ },
520
+ {
521
+ "epoch": 9.6,
522
+ "eval_loss": 0.023399699479341507,
523
+ "eval_runtime": 37.8663,
524
+ "eval_samples_per_second": 422.539,
525
+ "eval_steps_per_second": 6.602,
526
+ "step": 9600
527
+ },
528
+ {
529
+ "epoch": 9.8,
530
+ "eval_loss": 0.023402543738484383,
531
+ "eval_runtime": 37.6977,
532
+ "eval_samples_per_second": 424.429,
533
+ "eval_steps_per_second": 6.632,
534
+ "step": 9800
535
+ },
536
+ {
537
+ "epoch": 10.0,
538
+ "grad_norm": 0.11751583963632584,
539
+ "learning_rate": 0.0,
540
+ "loss": 0.024,
541
+ "step": 10000
542
+ },
543
+ {
544
+ "epoch": 10.0,
545
+ "eval_loss": 0.023403601720929146,
546
+ "eval_runtime": 37.692,
547
+ "eval_samples_per_second": 424.493,
548
+ "eval_steps_per_second": 6.633,
549
+ "step": 10000
550
  }
551
  ],
552
  "logging_steps": 500,
 
554
  "num_input_tokens_seen": 0,
555
  "num_train_epochs": 10,
556
  "save_steps": 1000,
557
+ "total_flos": 4.180672512e+16,
558
  "train_batch_size": 64,
559
  "trial_name": null,
560
  "trial_params": null