beomi commited on
Commit
ce76d0b
1 Parent(s): 39cf04d

Train complete at 3epochs

Browse files
Files changed (5) hide show
  1. adapter_config.json +2 -2
  2. adapter_model.bin +1 -1
  3. scaler.pt +0 -3
  4. scheduler.pt +0 -3
  5. trainer_state.json +237 -3
adapter_config.json CHANGED
@@ -4,7 +4,7 @@
4
  "enable_lora": null,
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
7
- "lora_alpha": 8,
8
  "lora_dropout": 0.05,
9
  "merge_weights": false,
10
  "modules_to_save": null,
@@ -15,4 +15,4 @@
15
  "v_proj"
16
  ],
17
  "task_type": "CAUSAL_LM"
18
- }
 
4
  "enable_lora": null,
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
7
+ "lora_alpha": 16,
8
  "lora_dropout": 0.05,
9
  "merge_weights": false,
10
  "modules_to_save": null,
 
15
  "v_proj"
16
  ],
17
  "task_type": "CAUSAL_LM"
18
+ }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f001994ef638f77b8d6ebb48a26a7c118864bf78afac5811f536c9fc2f148e48
3
  size 84001933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cc3e88a5ffbde4422d1c4928af850ec41e2a420e149ba5fdaba16b64450e564
3
  size 84001933
scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1269a10971cdfaf218d0248ae69edd4de991ba32b7f1d7f4ab1bab4b303cdf82
3
- size 557
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:99f4707cc6da43569e1ae1afbc47e184230167a1ad359ef368f74abc427c732b
3
- size 627
 
 
 
 
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9987397605545052,
5
- "global_step": 793,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -480,11 +480,245 @@
480
  "learning_rate": 1.829044117647059e-05,
481
  "loss": 0.7239,
482
  "step": 790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  }
484
  ],
485
  "max_steps": 1188,
486
  "num_train_epochs": 3,
487
- "total_flos": 4.058995894908669e+19,
488
  "trial_name": null,
489
  "trial_params": null
490
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9943289224952743,
5
+ "global_step": 1188,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
480
  "learning_rate": 1.829044117647059e-05,
481
  "loss": 0.7239,
482
  "step": 790
483
+ },
484
+ {
485
+ "epoch": 2.02,
486
+ "learning_rate": 1.7830882352941177e-05,
487
+ "loss": 0.7271,
488
+ "step": 800
489
+ },
490
+ {
491
+ "epoch": 2.04,
492
+ "learning_rate": 1.7371323529411764e-05,
493
+ "loss": 0.7201,
494
+ "step": 810
495
+ },
496
+ {
497
+ "epoch": 2.07,
498
+ "learning_rate": 1.6911764705882355e-05,
499
+ "loss": 0.7228,
500
+ "step": 820
501
+ },
502
+ {
503
+ "epoch": 2.09,
504
+ "learning_rate": 1.6452205882352942e-05,
505
+ "loss": 0.7337,
506
+ "step": 830
507
+ },
508
+ {
509
+ "epoch": 2.12,
510
+ "learning_rate": 1.599264705882353e-05,
511
+ "loss": 0.7279,
512
+ "step": 840
513
+ },
514
+ {
515
+ "epoch": 2.14,
516
+ "learning_rate": 1.5533088235294117e-05,
517
+ "loss": 0.7283,
518
+ "step": 850
519
+ },
520
+ {
521
+ "epoch": 2.17,
522
+ "learning_rate": 1.5073529411764706e-05,
523
+ "loss": 0.714,
524
+ "step": 860
525
+ },
526
+ {
527
+ "epoch": 2.19,
528
+ "learning_rate": 1.4613970588235295e-05,
529
+ "loss": 0.7185,
530
+ "step": 870
531
+ },
532
+ {
533
+ "epoch": 2.22,
534
+ "learning_rate": 1.4154411764705883e-05,
535
+ "loss": 0.7216,
536
+ "step": 880
537
+ },
538
+ {
539
+ "epoch": 2.24,
540
+ "learning_rate": 1.3694852941176472e-05,
541
+ "loss": 0.7239,
542
+ "step": 890
543
+ },
544
+ {
545
+ "epoch": 2.27,
546
+ "learning_rate": 1.323529411764706e-05,
547
+ "loss": 0.7309,
548
+ "step": 900
549
+ },
550
+ {
551
+ "epoch": 2.29,
552
+ "learning_rate": 1.2775735294117647e-05,
553
+ "loss": 0.727,
554
+ "step": 910
555
+ },
556
+ {
557
+ "epoch": 2.32,
558
+ "learning_rate": 1.2316176470588236e-05,
559
+ "loss": 0.7165,
560
+ "step": 920
561
+ },
562
+ {
563
+ "epoch": 2.34,
564
+ "learning_rate": 1.1856617647058823e-05,
565
+ "loss": 0.723,
566
+ "step": 930
567
+ },
568
+ {
569
+ "epoch": 2.37,
570
+ "learning_rate": 1.1397058823529412e-05,
571
+ "loss": 0.7166,
572
+ "step": 940
573
+ },
574
+ {
575
+ "epoch": 2.39,
576
+ "learning_rate": 1.09375e-05,
577
+ "loss": 0.7178,
578
+ "step": 950
579
+ },
580
+ {
581
+ "epoch": 2.42,
582
+ "learning_rate": 1.0477941176470589e-05,
583
+ "loss": 0.7094,
584
+ "step": 960
585
+ },
586
+ {
587
+ "epoch": 2.44,
588
+ "learning_rate": 1.0018382352941178e-05,
589
+ "loss": 0.7229,
590
+ "step": 970
591
+ },
592
+ {
593
+ "epoch": 2.47,
594
+ "learning_rate": 9.558823529411764e-06,
595
+ "loss": 0.7116,
596
+ "step": 980
597
+ },
598
+ {
599
+ "epoch": 2.5,
600
+ "learning_rate": 9.099264705882353e-06,
601
+ "loss": 0.7187,
602
+ "step": 990
603
+ },
604
+ {
605
+ "epoch": 2.52,
606
+ "learning_rate": 8.639705882352942e-06,
607
+ "loss": 0.7103,
608
+ "step": 1000
609
+ },
610
+ {
611
+ "epoch": 2.55,
612
+ "learning_rate": 8.18014705882353e-06,
613
+ "loss": 0.7241,
614
+ "step": 1010
615
+ },
616
+ {
617
+ "epoch": 2.57,
618
+ "learning_rate": 7.720588235294119e-06,
619
+ "loss": 0.7336,
620
+ "step": 1020
621
+ },
622
+ {
623
+ "epoch": 2.6,
624
+ "learning_rate": 7.261029411764707e-06,
625
+ "loss": 0.7168,
626
+ "step": 1030
627
+ },
628
+ {
629
+ "epoch": 2.62,
630
+ "learning_rate": 6.8014705882352935e-06,
631
+ "loss": 0.7242,
632
+ "step": 1040
633
+ },
634
+ {
635
+ "epoch": 2.65,
636
+ "learning_rate": 6.341911764705883e-06,
637
+ "loss": 0.7199,
638
+ "step": 1050
639
+ },
640
+ {
641
+ "epoch": 2.67,
642
+ "learning_rate": 5.882352941176471e-06,
643
+ "loss": 0.725,
644
+ "step": 1060
645
+ },
646
+ {
647
+ "epoch": 2.7,
648
+ "learning_rate": 5.422794117647059e-06,
649
+ "loss": 0.7252,
650
+ "step": 1070
651
+ },
652
+ {
653
+ "epoch": 2.72,
654
+ "learning_rate": 4.963235294117647e-06,
655
+ "loss": 0.7183,
656
+ "step": 1080
657
+ },
658
+ {
659
+ "epoch": 2.75,
660
+ "learning_rate": 4.503676470588236e-06,
661
+ "loss": 0.7172,
662
+ "step": 1090
663
+ },
664
+ {
665
+ "epoch": 2.77,
666
+ "learning_rate": 4.044117647058824e-06,
667
+ "loss": 0.7195,
668
+ "step": 1100
669
+ },
670
+ {
671
+ "epoch": 2.8,
672
+ "learning_rate": 3.584558823529412e-06,
673
+ "loss": 0.7155,
674
+ "step": 1110
675
+ },
676
+ {
677
+ "epoch": 2.82,
678
+ "learning_rate": 3.125e-06,
679
+ "loss": 0.7209,
680
+ "step": 1120
681
+ },
682
+ {
683
+ "epoch": 2.85,
684
+ "learning_rate": 2.6654411764705884e-06,
685
+ "loss": 0.7112,
686
+ "step": 1130
687
+ },
688
+ {
689
+ "epoch": 2.87,
690
+ "learning_rate": 2.2058823529411767e-06,
691
+ "loss": 0.7105,
692
+ "step": 1140
693
+ },
694
+ {
695
+ "epoch": 2.9,
696
+ "learning_rate": 1.7463235294117648e-06,
697
+ "loss": 0.7217,
698
+ "step": 1150
699
+ },
700
+ {
701
+ "epoch": 2.92,
702
+ "learning_rate": 1.286764705882353e-06,
703
+ "loss": 0.7183,
704
+ "step": 1160
705
+ },
706
+ {
707
+ "epoch": 2.95,
708
+ "learning_rate": 8.272058823529412e-07,
709
+ "loss": 0.7143,
710
+ "step": 1170
711
+ },
712
+ {
713
+ "epoch": 2.97,
714
+ "learning_rate": 3.6764705882352943e-07,
715
+ "loss": 0.7126,
716
+ "step": 1180
717
  }
718
  ],
719
  "max_steps": 1188,
720
  "num_train_epochs": 3,
721
+ "total_flos": 6.076984402892554e+19,
722
  "trial_name": null,
723
  "trial_params": null
724
  }