plip commited on
Commit
338c32a
·
1 Parent(s): ff1c532

Training in progress, step 140000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62c0a2c7e077b8baa73fd828f2e70985bcefeea8be38a9936a8140714c1e4c47
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878ac2af256d90283abe99c8603dab07e40eb73da1c3655fc21a49086d6f8483
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc20de398adc6319c715613716f2c9d4ce124e70ee41a98ab642ac175fb204e5
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c22557eb08501f7a24373ea155511c565e71575e567e7ff811308f57ab5e12
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa61e63d6ec853afa02e48d5167bab30a383bd9f05f192b20c686fb9a3478097
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed68d92642b5c57649c135331b8243d8047b1dee7f4eb5f6f68f9dc4d2f32821
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.965686274509804,
5
- "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2606,11 +2606,211 @@
2606
  "eval_samples_per_second": 729.142,
2607
  "eval_steps_per_second": 11.666,
2608
  "step": 130000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "max_steps": 250000,
2612
  "num_train_epochs": 16,
2613
- "total_flos": 2.0821139637301475e+21,
2614
  "trial_name": null,
2615
  "trial_params": null
2616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.57843137254902,
5
+ "global_step": 140000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 729.142,
2607
  "eval_steps_per_second": 11.666,
2608
  "step": 130000
2609
+ },
2610
+ {
2611
+ "epoch": 8.0,
2612
+ "learning_rate": 0.00030792659356955893,
2613
+ "loss": 0.4657,
2614
+ "step": 130500
2615
+ },
2616
+ {
2617
+ "epoch": 8.03,
2618
+ "learning_rate": 0.0003059755454143586,
2619
+ "loss": 0.4653,
2620
+ "step": 131000
2621
+ },
2622
+ {
2623
+ "epoch": 8.03,
2624
+ "eval_loss": 0.8070600628852844,
2625
+ "eval_runtime": 1.2854,
2626
+ "eval_samples_per_second": 777.995,
2627
+ "eval_steps_per_second": 12.448,
2628
+ "step": 131000
2629
+ },
2630
+ {
2631
+ "epoch": 8.06,
2632
+ "learning_rate": 0.00030402445458564144,
2633
+ "loss": 0.4649,
2634
+ "step": 131500
2635
+ },
2636
+ {
2637
+ "epoch": 8.09,
2638
+ "learning_rate": 0.0003020734064304411,
2639
+ "loss": 0.4647,
2640
+ "step": 132000
2641
+ },
2642
+ {
2643
+ "epoch": 8.09,
2644
+ "eval_loss": 0.799366295337677,
2645
+ "eval_runtime": 1.2985,
2646
+ "eval_samples_per_second": 770.128,
2647
+ "eval_steps_per_second": 12.322,
2648
+ "step": 132000
2649
+ },
2650
+ {
2651
+ "epoch": 8.12,
2652
+ "learning_rate": 0.00030012248629392423,
2653
+ "loss": 0.4647,
2654
+ "step": 132500
2655
+ },
2656
+ {
2657
+ "epoch": 8.15,
2658
+ "learning_rate": 0.00029817177951565793,
2659
+ "loss": 0.4639,
2660
+ "step": 133000
2661
+ },
2662
+ {
2663
+ "epoch": 8.15,
2664
+ "eval_loss": 0.8033633232116699,
2665
+ "eval_runtime": 1.2955,
2666
+ "eval_samples_per_second": 771.897,
2667
+ "eval_steps_per_second": 12.35,
2668
+ "step": 133000
2669
+ },
2670
+ {
2671
+ "epoch": 8.18,
2672
+ "learning_rate": 0.00029622137142587594,
2673
+ "loss": 0.4637,
2674
+ "step": 133500
2675
+ },
2676
+ {
2677
+ "epoch": 8.21,
2678
+ "learning_rate": 0.0002942713473417466,
2679
+ "loss": 0.4634,
2680
+ "step": 134000
2681
+ },
2682
+ {
2683
+ "epoch": 8.21,
2684
+ "eval_loss": 0.8022355437278748,
2685
+ "eval_runtime": 1.3019,
2686
+ "eval_samples_per_second": 768.12,
2687
+ "eval_steps_per_second": 12.29,
2688
+ "step": 134000
2689
+ },
2690
+ {
2691
+ "epoch": 8.24,
2692
+ "learning_rate": 0.00029232179256364054,
2693
+ "loss": 0.4631,
2694
+ "step": 134500
2695
+ },
2696
+ {
2697
+ "epoch": 8.27,
2698
+ "learning_rate": 0.0002903727923713994,
2699
+ "loss": 0.4656,
2700
+ "step": 135000
2701
+ },
2702
+ {
2703
+ "epoch": 8.27,
2704
+ "eval_loss": 0.8051571249961853,
2705
+ "eval_runtime": 1.3053,
2706
+ "eval_samples_per_second": 766.083,
2707
+ "eval_steps_per_second": 12.257,
2708
+ "step": 135000
2709
+ },
2710
+ {
2711
+ "epoch": 8.3,
2712
+ "learning_rate": 0.00028842443202060556,
2713
+ "loss": 0.4625,
2714
+ "step": 135500
2715
+ },
2716
+ {
2717
+ "epoch": 8.33,
2718
+ "learning_rate": 0.00028647679673885255,
2719
+ "loss": 0.4623,
2720
+ "step": 136000
2721
+ },
2722
+ {
2723
+ "epoch": 8.33,
2724
+ "eval_loss": 0.7988797426223755,
2725
+ "eval_runtime": 1.3231,
2726
+ "eval_samples_per_second": 755.795,
2727
+ "eval_steps_per_second": 12.093,
2728
+ "step": 136000
2729
+ },
2730
+ {
2731
+ "epoch": 8.36,
2732
+ "learning_rate": 0.000284529971722017,
2733
+ "loss": 0.462,
2734
+ "step": 136500
2735
+ },
2736
+ {
2737
+ "epoch": 8.39,
2738
+ "learning_rate": 0.0002825840421305321,
2739
+ "loss": 0.4617,
2740
+ "step": 137000
2741
+ },
2742
+ {
2743
+ "epoch": 8.39,
2744
+ "eval_loss": 0.7993477582931519,
2745
+ "eval_runtime": 1.2892,
2746
+ "eval_samples_per_second": 775.645,
2747
+ "eval_steps_per_second": 12.41,
2748
+ "step": 137000
2749
+ },
2750
+ {
2751
+ "epoch": 8.43,
2752
+ "learning_rate": 0.00028063909308566196,
2753
+ "loss": 0.4616,
2754
+ "step": 137500
2755
+ },
2756
+ {
2757
+ "epoch": 8.46,
2758
+ "learning_rate": 0.00027869520966577874,
2759
+ "loss": 0.4612,
2760
+ "step": 138000
2761
+ },
2762
+ {
2763
+ "epoch": 8.46,
2764
+ "eval_loss": 0.8003228902816772,
2765
+ "eval_runtime": 1.2968,
2766
+ "eval_samples_per_second": 771.107,
2767
+ "eval_steps_per_second": 12.338,
2768
+ "step": 138000
2769
+ },
2770
+ {
2771
+ "epoch": 8.49,
2772
+ "learning_rate": 0.00027675247690264027,
2773
+ "loss": 0.461,
2774
+ "step": 138500
2775
+ },
2776
+ {
2777
+ "epoch": 8.52,
2778
+ "learning_rate": 0.0002748109797776715,
2779
+ "loss": 0.4608,
2780
+ "step": 139000
2781
+ },
2782
+ {
2783
+ "epoch": 8.52,
2784
+ "eval_loss": 0.7989851236343384,
2785
+ "eval_runtime": 1.2801,
2786
+ "eval_samples_per_second": 781.164,
2787
+ "eval_steps_per_second": 12.499,
2788
+ "step": 139000
2789
+ },
2790
+ {
2791
+ "epoch": 8.55,
2792
+ "learning_rate": 0.0002728708032182461,
2793
+ "loss": 0.4603,
2794
+ "step": 139500
2795
+ },
2796
+ {
2797
+ "epoch": 8.58,
2798
+ "learning_rate": 0.0002709320320939721,
2799
+ "loss": 0.4603,
2800
+ "step": 140000
2801
+ },
2802
+ {
2803
+ "epoch": 8.58,
2804
+ "eval_loss": 0.8073873519897461,
2805
+ "eval_runtime": 1.2866,
2806
+ "eval_samples_per_second": 777.265,
2807
+ "eval_steps_per_second": 12.436,
2808
+ "step": 140000
2809
  }
2810
  ],
2811
  "max_steps": 250000,
2812
  "num_train_epochs": 16,
2813
+ "total_flos": 2.2422719560923365e+21,
2814
  "trial_name": null,
2815
  "trial_params": null
2816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc20de398adc6319c715613716f2c9d4ce124e70ee41a98ab642ac175fb204e5
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c22557eb08501f7a24373ea155511c565e71575e567e7ff811308f57ab5e12
3
  size 25761253