nutorbit commited on
Commit
6348a9b
1 Parent(s): 1474213

Training in progress, step 475, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0240065ca1b5ed67384fd5aaf0b8316e661300a63f84ad84cbbf3755931d6749
3
  size 72673016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efd90182dfa6a81ee47d9232456689b3603b96f32f042109f9b660e92bd1d92
3
  size 72673016
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78478f107d623e983c32299c6998f1d12bc2f1b52b0affbbae8200ea5787c998
3
  size 36892564
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:811fb76be471b2d061d804906e97d50c54cb2cf42f67f6d42e24bb576f4155dc
3
  size 36892564
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d39aec35151764d808003fba5886e79decf24837f16631ca32bcf663661b5fa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c54609d4e8cb89282e95eaa414501a7844ec04587efed4c10cd692e700fa780
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3caecce00506e02ee5b8d54b798aaba8f7686aa7732466b92b01f7b66819e6f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f40779dea4b6e00c1e20018f59c11c5cbe1ad90972d2ca12df667176352bf8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.08507420361092731,
5
  "eval_steps": 1000,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2707,13 +2707,163 @@
2707
  "learning_rate": 2.4193548387096777e-05,
2708
  "loss": 1.7733,
2709
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2710
  }
2711
  ],
2712
  "logging_steps": 1,
2713
  "max_steps": 501,
2714
  "num_train_epochs": 1,
2715
  "save_steps": 25,
2716
- "total_flos": 2.1692039993327616e+16,
2717
  "trial_name": null,
2718
  "trial_params": null
2719
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08980054825597883,
5
  "eval_steps": 1000,
6
+ "global_step": 475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2707
  "learning_rate": 2.4193548387096777e-05,
2708
  "loss": 1.7733,
2709
  "step": 450
2710
+ },
2711
+ {
2712
+ "epoch": 0.09,
2713
+ "learning_rate": 2.3790322580645163e-05,
2714
+ "loss": 1.6785,
2715
+ "step": 451
2716
+ },
2717
+ {
2718
+ "epoch": 0.09,
2719
+ "learning_rate": 2.338709677419355e-05,
2720
+ "loss": 1.6109,
2721
+ "step": 452
2722
+ },
2723
+ {
2724
+ "epoch": 0.09,
2725
+ "learning_rate": 2.2983870967741935e-05,
2726
+ "loss": 1.5554,
2727
+ "step": 453
2728
+ },
2729
+ {
2730
+ "epoch": 0.09,
2731
+ "learning_rate": 2.258064516129032e-05,
2732
+ "loss": 1.5901,
2733
+ "step": 454
2734
+ },
2735
+ {
2736
+ "epoch": 0.09,
2737
+ "learning_rate": 2.217741935483871e-05,
2738
+ "loss": 1.687,
2739
+ "step": 455
2740
+ },
2741
+ {
2742
+ "epoch": 0.09,
2743
+ "learning_rate": 2.1774193548387097e-05,
2744
+ "loss": 1.7672,
2745
+ "step": 456
2746
+ },
2747
+ {
2748
+ "epoch": 0.09,
2749
+ "learning_rate": 2.1370967741935487e-05,
2750
+ "loss": 1.9025,
2751
+ "step": 457
2752
+ },
2753
+ {
2754
+ "epoch": 0.09,
2755
+ "learning_rate": 2.0967741935483873e-05,
2756
+ "loss": 1.804,
2757
+ "step": 458
2758
+ },
2759
+ {
2760
+ "epoch": 0.09,
2761
+ "learning_rate": 2.056451612903226e-05,
2762
+ "loss": 1.725,
2763
+ "step": 459
2764
+ },
2765
+ {
2766
+ "epoch": 0.09,
2767
+ "learning_rate": 2.0161290322580645e-05,
2768
+ "loss": 1.6565,
2769
+ "step": 460
2770
+ },
2771
+ {
2772
+ "epoch": 0.09,
2773
+ "learning_rate": 1.975806451612903e-05,
2774
+ "loss": 1.8996,
2775
+ "step": 461
2776
+ },
2777
+ {
2778
+ "epoch": 0.09,
2779
+ "learning_rate": 1.935483870967742e-05,
2780
+ "loss": 1.7974,
2781
+ "step": 462
2782
+ },
2783
+ {
2784
+ "epoch": 0.09,
2785
+ "learning_rate": 1.8951612903225807e-05,
2786
+ "loss": 1.8168,
2787
+ "step": 463
2788
+ },
2789
+ {
2790
+ "epoch": 0.09,
2791
+ "learning_rate": 1.8548387096774193e-05,
2792
+ "loss": 1.7012,
2793
+ "step": 464
2794
+ },
2795
+ {
2796
+ "epoch": 0.09,
2797
+ "learning_rate": 1.8145161290322583e-05,
2798
+ "loss": 1.9804,
2799
+ "step": 465
2800
+ },
2801
+ {
2802
+ "epoch": 0.09,
2803
+ "learning_rate": 1.774193548387097e-05,
2804
+ "loss": 1.7656,
2805
+ "step": 466
2806
+ },
2807
+ {
2808
+ "epoch": 0.09,
2809
+ "learning_rate": 1.733870967741936e-05,
2810
+ "loss": 1.5495,
2811
+ "step": 467
2812
+ },
2813
+ {
2814
+ "epoch": 0.09,
2815
+ "learning_rate": 1.693548387096774e-05,
2816
+ "loss": 1.8389,
2817
+ "step": 468
2818
+ },
2819
+ {
2820
+ "epoch": 0.09,
2821
+ "learning_rate": 1.653225806451613e-05,
2822
+ "loss": 1.7781,
2823
+ "step": 469
2824
+ },
2825
+ {
2826
+ "epoch": 0.09,
2827
+ "learning_rate": 1.6129032258064517e-05,
2828
+ "loss": 1.6191,
2829
+ "step": 470
2830
+ },
2831
+ {
2832
+ "epoch": 0.09,
2833
+ "learning_rate": 1.5725806451612903e-05,
2834
+ "loss": 1.6805,
2835
+ "step": 471
2836
+ },
2837
+ {
2838
+ "epoch": 0.09,
2839
+ "learning_rate": 1.5322580645161292e-05,
2840
+ "loss": 1.9133,
2841
+ "step": 472
2842
+ },
2843
+ {
2844
+ "epoch": 0.09,
2845
+ "learning_rate": 1.4919354838709679e-05,
2846
+ "loss": 1.5507,
2847
+ "step": 473
2848
+ },
2849
+ {
2850
+ "epoch": 0.09,
2851
+ "learning_rate": 1.4516129032258066e-05,
2852
+ "loss": 2.0498,
2853
+ "step": 474
2854
+ },
2855
+ {
2856
+ "epoch": 0.09,
2857
+ "learning_rate": 1.4112903225806454e-05,
2858
+ "loss": 1.6849,
2859
+ "step": 475
2860
  }
2861
  ],
2862
  "logging_steps": 1,
2863
  "max_steps": 501,
2864
  "num_train_epochs": 1,
2865
  "save_steps": 25,
2866
+ "total_flos": 2.279886077804544e+16,
2867
  "trial_name": null,
2868
  "trial_params": null
2869
  }