augustocsc commited on
Commit
23b635b
1 Parent(s): a9ba4dd

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ca19d4a9c3a66cc062755613586eb9666cc8033031890aa61853fd9c4cf2041
3
  size 497780352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d72f3fef16d48722b8147d42ef2f19f1c00f6d54bd6e1decd4a87224211203f
3
  size 497780352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfda32c2df088cee5c680d79751e871a539eaf0989bb84c9f148a0e06f01717c
3
  size 995654586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e50964ec2dd79bebad7dd56891d11c03b94cfd39ab6848d9646ef913c456d0a
3
  size 995654586
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1be58c6095331490031ab4484c5ffe4bf36bc8c2ec9130e8e665c8f156e2c90
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5189a6d57ea7f8ba0ac9eec6cad8707c6e4c886fe1a371a733497024fcbb01ba
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ac32590bb4b92b786de3b7715c46f1c43487e1605b133d8603403d8e65638b0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c72869181363c7c0dcffb7a71857fd6a7c81a93e70aff2985fc26363295969
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.0,
5
  "eval_steps": 200,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -493,6 +493,60 @@
493
  "eval_samples_per_second": 422.467,
494
  "eval_steps_per_second": 6.601,
495
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  }
497
  ],
498
  "logging_steps": 500,
@@ -500,7 +554,7 @@
500
  "num_input_tokens_seen": 0,
501
  "num_train_epochs": 10,
502
  "save_steps": 1000,
503
- "total_flos": 3.7626052608e+16,
504
  "train_batch_size": 64,
505
  "trial_name": null,
506
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 200,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
493
  "eval_samples_per_second": 422.467,
494
  "eval_steps_per_second": 6.601,
495
  "step": 9000
496
+ },
497
+ {
498
+ "epoch": 9.2,
499
+ "eval_loss": 0.02422364056110382,
500
+ "eval_runtime": 37.7417,
501
+ "eval_samples_per_second": 423.934,
502
+ "eval_steps_per_second": 6.624,
503
+ "step": 9200
504
+ },
505
+ {
506
+ "epoch": 9.4,
507
+ "eval_loss": 0.024163929745554924,
508
+ "eval_runtime": 37.7865,
509
+ "eval_samples_per_second": 423.432,
510
+ "eval_steps_per_second": 6.616,
511
+ "step": 9400
512
+ },
513
+ {
514
+ "epoch": 9.5,
515
+ "grad_norm": 0.10827407240867615,
516
+ "learning_rate": 2.5e-06,
517
+ "loss": 0.0247,
518
+ "step": 9500
519
+ },
520
+ {
521
+ "epoch": 9.6,
522
+ "eval_loss": 0.024232398718595505,
523
+ "eval_runtime": 37.8161,
524
+ "eval_samples_per_second": 423.1,
525
+ "eval_steps_per_second": 6.611,
526
+ "step": 9600
527
+ },
528
+ {
529
+ "epoch": 9.8,
530
+ "eval_loss": 0.02414529025554657,
531
+ "eval_runtime": 37.6993,
532
+ "eval_samples_per_second": 424.411,
533
+ "eval_steps_per_second": 6.631,
534
+ "step": 9800
535
+ },
536
+ {
537
+ "epoch": 10.0,
538
+ "grad_norm": 0.13070951402187347,
539
+ "learning_rate": 0.0,
540
+ "loss": 0.0245,
541
+ "step": 10000
542
+ },
543
+ {
544
+ "epoch": 10.0,
545
+ "eval_loss": 0.024126721546053886,
546
+ "eval_runtime": 37.7053,
547
+ "eval_samples_per_second": 424.344,
548
+ "eval_steps_per_second": 6.63,
549
+ "step": 10000
550
  }
551
  ],
552
  "logging_steps": 500,
 
554
  "num_input_tokens_seen": 0,
555
  "num_train_epochs": 10,
556
  "save_steps": 1000,
557
+ "total_flos": 4.180672512e+16,
558
  "train_batch_size": 64,
559
  "trial_name": null,
560
  "trial_params": null