abdiharyadi commited on
Commit
5c7710a
1 Parent(s): 63d009e

Training in progress, step 199764, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f117dd565f6ce119a7ab273507fed496c1ef97abf41ab3fb96f3f15b5b093885
3
  size 466289280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf0bcb7d3ca09a342fcfc235067fb20d1a354073e87505ef089ed7c01dffac9
3
  size 466289280
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f71d9052aaeec7b71e29f501d83441ebfb73ee455b12421e1bd873a3b02bf9a9
3
  size 932672442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28f493075dd0cae59e97d2a821cbb6a5faf64bb64b5c2e595d84033258f6aefa
3
  size 932672442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce643dc83e6406caf3dc96437daf2af7153c2d12a984bc515c067467febc407e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f877aaf1c908eec1d0705a7a99ec8e0d4860730d9b3fd376717fa6ee89e134dd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dca3aab84aeacc5f3fb66c81b2380fa436b115eecc94206afa3c268d57d32a23
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597d54923f947aa39c4dfbc7d0c69acd0ddad86d19c03796f1545edfaf3e00f9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.800003604269257,
5
  "eval_steps": 500,
6
- "global_step": 177568,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -560,6 +560,76 @@
560
  "learning_rate": 2.0985407214845987e-06,
561
  "loss": 3.6253,
562
  "step": 175380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  }
564
  ],
565
  "logging_steps": 2220,
@@ -567,7 +637,7 @@
567
  "num_input_tokens_seen": 0,
568
  "num_train_epochs": 1,
569
  "save_steps": 22196,
570
- "total_flos": 4.107095329536e+16,
571
  "train_batch_size": 2,
572
  "trial_name": null,
573
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.900004054802914,
5
  "eval_steps": 500,
6
+ "global_step": 199764,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
560
  "learning_rate": 2.0985407214845987e-06,
561
  "loss": 3.6253,
562
  "step": 175380
563
+ },
564
+ {
565
+ "epoch": 0.8001477750395344,
566
+ "grad_norm": 8.387534141540527,
567
+ "learning_rate": 1.9985222496046567e-06,
568
+ "loss": 3.6102,
569
+ "step": 177600
570
+ },
571
+ {
572
+ "epoch": 0.8101496222275285,
573
+ "grad_norm": 9.557201385498047,
574
+ "learning_rate": 1.898503777724715e-06,
575
+ "loss": 3.6088,
576
+ "step": 179820
577
+ },
578
+ {
579
+ "epoch": 0.8201514694155226,
580
+ "grad_norm": 8.81215763092041,
581
+ "learning_rate": 1.7984853058447732e-06,
582
+ "loss": 3.6092,
583
+ "step": 182040
584
+ },
585
+ {
586
+ "epoch": 0.8301533166035169,
587
+ "grad_norm": 5.788514137268066,
588
+ "learning_rate": 1.6984668339648313e-06,
589
+ "loss": 3.5987,
590
+ "step": 184260
591
+ },
592
+ {
593
+ "epoch": 0.840155163791511,
594
+ "grad_norm": 3.8115339279174805,
595
+ "learning_rate": 1.5984483620848898e-06,
596
+ "loss": 3.6097,
597
+ "step": 186480
598
+ },
599
+ {
600
+ "epoch": 0.8501570109795052,
601
+ "grad_norm": 9.834037780761719,
602
+ "learning_rate": 1.498429890204948e-06,
603
+ "loss": 3.6115,
604
+ "step": 188700
605
+ },
606
+ {
607
+ "epoch": 0.8601588581674994,
608
+ "grad_norm": 4.8395586013793945,
609
+ "learning_rate": 1.398411418325006e-06,
610
+ "loss": 3.6048,
611
+ "step": 190920
612
+ },
613
+ {
614
+ "epoch": 0.8701607053554936,
615
+ "grad_norm": 5.381741523742676,
616
+ "learning_rate": 1.2983929464450643e-06,
617
+ "loss": 3.6098,
618
+ "step": 193140
619
+ },
620
+ {
621
+ "epoch": 0.8801625525434877,
622
+ "grad_norm": 9.676674842834473,
623
+ "learning_rate": 1.1983744745651226e-06,
624
+ "loss": 3.6137,
625
+ "step": 195360
626
+ },
627
+ {
628
+ "epoch": 0.890164399731482,
629
+ "grad_norm": 9.529138565063477,
630
+ "learning_rate": 1.0983560026851806e-06,
631
+ "loss": 3.6122,
632
+ "step": 197580
633
  }
634
  ],
635
  "logging_steps": 2220,
 
637
  "num_input_tokens_seen": 0,
638
  "num_train_epochs": 1,
639
  "save_steps": 22196,
640
+ "total_flos": 4.6201916348928e+16,
641
  "train_batch_size": 2,
642
  "trial_name": null,
643
  "trial_params": null