jdannem6 commited on
Commit
8ef6d44
·
verified ·
1 Parent(s): 99e0448

Uploaded checkpoint-10000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f592525aa06b9d652c3e47ce10a4feed84c673077118b3f1e7fa0765c4e8d65
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfc16ef0037aef3ed045cba1571a465b80b316394b3da0e1be310de1a3e5cac8
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfd488fb9ddc7976baa882b27f99572ef49e0dcce57741826a3c5a53f8c74033
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1137ecb412c88df3bca9fa139c89543a45f6bf82baf46e46717d027befb4d1
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb05a287608789acf6a46ac24bd27b9ec4ab55c0546b575765875ff29cd1c432
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719ef15f4e381b40350baace20dd00bda85c902e5a6b0fb9b3cf54bce93480aa
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:140704ae7778ae2e7bd0a12096903198666696eb43c0e46a8b8a3aa47b4fb047
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bfabff5a7af861fca0d4328cbdeeac43000e633dd64effe96ee68fd4be8bd44
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1875,
5
  "eval_steps": 2500,
6
- "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -556,6 +556,189 @@
556
  "eval_samples_per_second": 9.428,
557
  "eval_steps_per_second": 9.428,
558
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
  }
560
  ],
561
  "logging_steps": 100,
@@ -563,7 +746,7 @@
563
  "num_input_tokens_seen": 0,
564
  "num_train_epochs": 1,
565
  "save_steps": 2500,
566
- "total_flos": 1.2076594495488e+17,
567
  "train_batch_size": 1,
568
  "trial_name": null,
569
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.25,
5
  "eval_steps": 2500,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
556
  "eval_samples_per_second": 9.428,
557
  "eval_steps_per_second": 9.428,
558
  "step": 7500
559
+ },
560
+ {
561
+ "epoch": 0.19,
562
+ "grad_norm": 3.4310085773468018,
563
+ "learning_rate": 1.2717948717948718e-05,
564
+ "loss": 0.8767,
565
+ "step": 7600
566
+ },
567
+ {
568
+ "epoch": 0.19,
569
+ "grad_norm": 2.533520460128784,
570
+ "learning_rate": 1.2615384615384616e-05,
571
+ "loss": 0.8784,
572
+ "step": 7700
573
+ },
574
+ {
575
+ "epoch": 0.2,
576
+ "grad_norm": 4.673364639282227,
577
+ "learning_rate": 1.2512820512820514e-05,
578
+ "loss": 0.8504,
579
+ "step": 7800
580
+ },
581
+ {
582
+ "epoch": 0.2,
583
+ "grad_norm": 2.4026598930358887,
584
+ "learning_rate": 1.2410256410256412e-05,
585
+ "loss": 0.8647,
586
+ "step": 7900
587
+ },
588
+ {
589
+ "epoch": 0.2,
590
+ "grad_norm": 6.66796875,
591
+ "learning_rate": 1.230769230769231e-05,
592
+ "loss": 0.8634,
593
+ "step": 8000
594
+ },
595
+ {
596
+ "epoch": 0.2,
597
+ "grad_norm": 1.8087568283081055,
598
+ "learning_rate": 1.2205128205128208e-05,
599
+ "loss": 0.8277,
600
+ "step": 8100
601
+ },
602
+ {
603
+ "epoch": 0.2,
604
+ "grad_norm": 3.196040630340576,
605
+ "learning_rate": 1.2102564102564102e-05,
606
+ "loss": 0.8739,
607
+ "step": 8200
608
+ },
609
+ {
610
+ "epoch": 0.21,
611
+ "grad_norm": 1.6817710399627686,
612
+ "learning_rate": 1.2e-05,
613
+ "loss": 0.8367,
614
+ "step": 8300
615
+ },
616
+ {
617
+ "epoch": 0.21,
618
+ "grad_norm": 5.548306941986084,
619
+ "learning_rate": 1.1897435897435898e-05,
620
+ "loss": 0.8247,
621
+ "step": 8400
622
+ },
623
+ {
624
+ "epoch": 0.21,
625
+ "grad_norm": 6.069587707519531,
626
+ "learning_rate": 1.1794871794871796e-05,
627
+ "loss": 0.8248,
628
+ "step": 8500
629
+ },
630
+ {
631
+ "epoch": 0.21,
632
+ "grad_norm": 3.085785150527954,
633
+ "learning_rate": 1.1692307692307694e-05,
634
+ "loss": 0.8618,
635
+ "step": 8600
636
+ },
637
+ {
638
+ "epoch": 0.22,
639
+ "grad_norm": 1.7855651378631592,
640
+ "learning_rate": 1.1589743589743592e-05,
641
+ "loss": 0.8601,
642
+ "step": 8700
643
+ },
644
+ {
645
+ "epoch": 0.22,
646
+ "grad_norm": 3.378775119781494,
647
+ "learning_rate": 1.1487179487179487e-05,
648
+ "loss": 0.8712,
649
+ "step": 8800
650
+ },
651
+ {
652
+ "epoch": 0.22,
653
+ "grad_norm": 2.7686617374420166,
654
+ "learning_rate": 1.1384615384615385e-05,
655
+ "loss": 0.852,
656
+ "step": 8900
657
+ },
658
+ {
659
+ "epoch": 0.23,
660
+ "grad_norm": 5.424912452697754,
661
+ "learning_rate": 1.1282051282051283e-05,
662
+ "loss": 0.8796,
663
+ "step": 9000
664
+ },
665
+ {
666
+ "epoch": 0.23,
667
+ "grad_norm": 6.806646347045898,
668
+ "learning_rate": 1.117948717948718e-05,
669
+ "loss": 0.8457,
670
+ "step": 9100
671
+ },
672
+ {
673
+ "epoch": 0.23,
674
+ "grad_norm": 4.3275837898254395,
675
+ "learning_rate": 1.1076923076923079e-05,
676
+ "loss": 0.8346,
677
+ "step": 9200
678
+ },
679
+ {
680
+ "epoch": 0.23,
681
+ "grad_norm": 5.901556015014648,
682
+ "learning_rate": 1.0974358974358977e-05,
683
+ "loss": 0.8489,
684
+ "step": 9300
685
+ },
686
+ {
687
+ "epoch": 0.23,
688
+ "grad_norm": 6.287178993225098,
689
+ "learning_rate": 1.0871794871794871e-05,
690
+ "loss": 0.8463,
691
+ "step": 9400
692
+ },
693
+ {
694
+ "epoch": 0.24,
695
+ "grad_norm": 2.2666897773742676,
696
+ "learning_rate": 1.076923076923077e-05,
697
+ "loss": 0.8399,
698
+ "step": 9500
699
+ },
700
+ {
701
+ "epoch": 0.24,
702
+ "grad_norm": 2.2565557956695557,
703
+ "learning_rate": 1.0666666666666667e-05,
704
+ "loss": 0.8452,
705
+ "step": 9600
706
+ },
707
+ {
708
+ "epoch": 0.24,
709
+ "grad_norm": 3.512251615524292,
710
+ "learning_rate": 1.0564102564102565e-05,
711
+ "loss": 0.8665,
712
+ "step": 9700
713
+ },
714
+ {
715
+ "epoch": 0.24,
716
+ "grad_norm": 5.637045860290527,
717
+ "learning_rate": 1.0461538461538463e-05,
718
+ "loss": 0.829,
719
+ "step": 9800
720
+ },
721
+ {
722
+ "epoch": 0.25,
723
+ "grad_norm": 2.9041316509246826,
724
+ "learning_rate": 1.0358974358974361e-05,
725
+ "loss": 0.8273,
726
+ "step": 9900
727
+ },
728
+ {
729
+ "epoch": 0.25,
730
+ "grad_norm": 2.120234727859497,
731
+ "learning_rate": 1.0256410256410256e-05,
732
+ "loss": 0.7933,
733
+ "step": 10000
734
+ },
735
+ {
736
+ "epoch": 0.25,
737
+ "eval_loss": 0.8178455829620361,
738
+ "eval_runtime": 103.8104,
739
+ "eval_samples_per_second": 9.633,
740
+ "eval_steps_per_second": 9.633,
741
+ "step": 10000
742
  }
743
  ],
744
  "logging_steps": 100,
 
746
  "num_input_tokens_seen": 0,
747
  "num_train_epochs": 1,
748
  "save_steps": 2500,
749
+ "total_flos": 1.6102125993984e+17,
750
  "train_batch_size": 1,
751
  "trial_name": null,
752
  "trial_params": null