hariniiiiiiiiii commited on
Commit
7f9042d
1 Parent(s): 311485d

Training in progress, step 4000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dee4476ab05abceb3ef157f762adb7b7ff2d24c295242ef85bdc7aa9baf339c
3
  size 4115013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38535e6c225fedc50533b886bfd54b9a622c67e22d7200e25392219b114ec183
3
  size 4115013
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c224c2d13e7b12f967f2ff294e904f86dfd319b281de69e29b4ca2a309522ccf
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e39e01e5f4c4c5c56c4c737e55174fa27102738a2a31ad9abaea988d95c5076
3
  size 2329702453
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ace5200ce81b04d30bdcefc552d51b2d31acee8aa24f561db0693af839dd1d4
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a848aa7b2c67556e67ae63d728ec54a37d250974833764149f6f889e5db0d3
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb02ae445517d5b6d8cceafc80181af9dac20207d12d240d963ba9d73872898
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:768c536c5cefd4a4a3a1c722218d4a55287b59d738fd22efac7a8db1091c7245
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.915300209591912,
5
- "global_step": 3500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2526,11 +2526,371 @@
2526
  "eval_samples_per_second": 0.211,
2527
  "eval_steps_per_second": 0.211,
2528
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2529
  }
2530
  ],
2531
- "max_steps": 3542,
2532
- "num_train_epochs": 7,
2533
- "total_flos": 8.516307584906035e+16,
2534
  "trial_name": null,
2535
  "trial_params": null
2536
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.903464431019603,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2526
  "eval_samples_per_second": 0.211,
2527
  "eval_steps_per_second": 0.211,
2528
  "step": 3500
2529
+ },
2530
+ {
2531
+ "epoch": 6.94,
2532
+ "learning_rate": 0.00015593561368209256,
2533
+ "loss": 0.1147,
2534
+ "step": 3510
2535
+ },
2536
+ {
2537
+ "epoch": 6.95,
2538
+ "learning_rate": 0.00015492957746478874,
2539
+ "loss": 0.0879,
2540
+ "step": 3520
2541
+ },
2542
+ {
2543
+ "epoch": 6.97,
2544
+ "learning_rate": 0.0001539235412474849,
2545
+ "loss": 0.095,
2546
+ "step": 3530
2547
+ },
2548
+ {
2549
+ "epoch": 6.99,
2550
+ "learning_rate": 0.00015291750503018109,
2551
+ "loss": 0.1277,
2552
+ "step": 3540
2553
+ },
2554
+ {
2555
+ "epoch": 7.02,
2556
+ "learning_rate": 0.00015191146881287726,
2557
+ "loss": 0.1332,
2558
+ "step": 3550
2559
+ },
2560
+ {
2561
+ "epoch": 7.04,
2562
+ "learning_rate": 0.00015090543259557344,
2563
+ "loss": 0.1055,
2564
+ "step": 3560
2565
+ },
2566
+ {
2567
+ "epoch": 7.06,
2568
+ "learning_rate": 0.00014989939637826964,
2569
+ "loss": 0.1114,
2570
+ "step": 3570
2571
+ },
2572
+ {
2573
+ "epoch": 7.07,
2574
+ "learning_rate": 0.00014889336016096582,
2575
+ "loss": 0.0983,
2576
+ "step": 3580
2577
+ },
2578
+ {
2579
+ "epoch": 7.09,
2580
+ "learning_rate": 0.00014788732394366196,
2581
+ "loss": 0.0823,
2582
+ "step": 3590
2583
+ },
2584
+ {
2585
+ "epoch": 7.11,
2586
+ "learning_rate": 0.00014688128772635814,
2587
+ "loss": 0.0949,
2588
+ "step": 3600
2589
+ },
2590
+ {
2591
+ "epoch": 7.11,
2592
+ "eval_loss": 0.9529324769973755,
2593
+ "eval_rouge1": 0.1603205128205128,
2594
+ "eval_rouge2": 0.09454545454545453,
2595
+ "eval_rougeL": 0.16115384615384615,
2596
+ "eval_rougeLsum": 0.15993589743589742,
2597
+ "eval_runtime": 96.2453,
2598
+ "eval_samples_per_second": 0.208,
2599
+ "eval_steps_per_second": 0.208,
2600
+ "step": 3600
2601
+ },
2602
+ {
2603
+ "epoch": 7.13,
2604
+ "learning_rate": 0.00014587525150905434,
2605
+ "loss": 0.0698,
2606
+ "step": 3610
2607
+ },
2608
+ {
2609
+ "epoch": 7.15,
2610
+ "learning_rate": 0.00014486921529175052,
2611
+ "loss": 0.0694,
2612
+ "step": 3620
2613
+ },
2614
+ {
2615
+ "epoch": 7.17,
2616
+ "learning_rate": 0.0001438631790744467,
2617
+ "loss": 0.1078,
2618
+ "step": 3630
2619
+ },
2620
+ {
2621
+ "epoch": 7.19,
2622
+ "learning_rate": 0.00014285714285714284,
2623
+ "loss": 0.1292,
2624
+ "step": 3640
2625
+ },
2626
+ {
2627
+ "epoch": 7.21,
2628
+ "learning_rate": 0.00014185110663983904,
2629
+ "loss": 0.1175,
2630
+ "step": 3650
2631
+ },
2632
+ {
2633
+ "epoch": 7.23,
2634
+ "learning_rate": 0.00014084507042253522,
2635
+ "loss": 0.1168,
2636
+ "step": 3660
2637
+ },
2638
+ {
2639
+ "epoch": 7.25,
2640
+ "learning_rate": 0.0001398390342052314,
2641
+ "loss": 0.0948,
2642
+ "step": 3670
2643
+ },
2644
+ {
2645
+ "epoch": 7.27,
2646
+ "learning_rate": 0.00013883299798792757,
2647
+ "loss": 0.1314,
2648
+ "step": 3680
2649
+ },
2650
+ {
2651
+ "epoch": 7.29,
2652
+ "learning_rate": 0.00013782696177062375,
2653
+ "loss": 0.1068,
2654
+ "step": 3690
2655
+ },
2656
+ {
2657
+ "epoch": 7.31,
2658
+ "learning_rate": 0.00013682092555331992,
2659
+ "loss": 0.1059,
2660
+ "step": 3700
2661
+ },
2662
+ {
2663
+ "epoch": 7.31,
2664
+ "eval_loss": 0.9520353078842163,
2665
+ "eval_rouge1": 0.13832167832167833,
2666
+ "eval_rouge2": 0.0977272727272727,
2667
+ "eval_rougeL": 0.14185314685314687,
2668
+ "eval_rougeLsum": 0.13999999999999999,
2669
+ "eval_runtime": 91.3536,
2670
+ "eval_samples_per_second": 0.219,
2671
+ "eval_steps_per_second": 0.219,
2672
+ "step": 3700
2673
+ },
2674
+ {
2675
+ "epoch": 7.33,
2676
+ "learning_rate": 0.0001358148893360161,
2677
+ "loss": 0.0945,
2678
+ "step": 3710
2679
+ },
2680
+ {
2681
+ "epoch": 7.35,
2682
+ "learning_rate": 0.00013480885311871227,
2683
+ "loss": 0.1298,
2684
+ "step": 3720
2685
+ },
2686
+ {
2687
+ "epoch": 7.37,
2688
+ "learning_rate": 0.00013380281690140845,
2689
+ "loss": 0.0972,
2690
+ "step": 3730
2691
+ },
2692
+ {
2693
+ "epoch": 7.39,
2694
+ "learning_rate": 0.00013279678068410465,
2695
+ "loss": 0.1007,
2696
+ "step": 3740
2697
+ },
2698
+ {
2699
+ "epoch": 7.41,
2700
+ "learning_rate": 0.0001317907444668008,
2701
+ "loss": 0.1194,
2702
+ "step": 3750
2703
+ },
2704
+ {
2705
+ "epoch": 7.43,
2706
+ "learning_rate": 0.00013078470824949697,
2707
+ "loss": 0.1416,
2708
+ "step": 3760
2709
+ },
2710
+ {
2711
+ "epoch": 7.45,
2712
+ "learning_rate": 0.00012977867203219315,
2713
+ "loss": 0.1112,
2714
+ "step": 3770
2715
+ },
2716
+ {
2717
+ "epoch": 7.47,
2718
+ "learning_rate": 0.00012877263581488935,
2719
+ "loss": 0.1232,
2720
+ "step": 3780
2721
+ },
2722
+ {
2723
+ "epoch": 7.49,
2724
+ "learning_rate": 0.00012776659959758553,
2725
+ "loss": 0.1053,
2726
+ "step": 3790
2727
+ },
2728
+ {
2729
+ "epoch": 7.51,
2730
+ "learning_rate": 0.0001267605633802817,
2731
+ "loss": 0.1482,
2732
+ "step": 3800
2733
+ },
2734
+ {
2735
+ "epoch": 7.51,
2736
+ "eval_loss": 0.9513714909553528,
2737
+ "eval_rouge1": 0.21115384615384616,
2738
+ "eval_rouge2": 0.12045454545454545,
2739
+ "eval_rougeL": 0.20999999999999996,
2740
+ "eval_rougeLsum": 0.20730769230769228,
2741
+ "eval_runtime": 90.7686,
2742
+ "eval_samples_per_second": 0.22,
2743
+ "eval_steps_per_second": 0.22,
2744
+ "step": 3800
2745
+ },
2746
+ {
2747
+ "epoch": 7.53,
2748
+ "learning_rate": 0.00012575452716297785,
2749
+ "loss": 0.1281,
2750
+ "step": 3810
2751
+ },
2752
+ {
2753
+ "epoch": 7.55,
2754
+ "learning_rate": 0.00012474849094567405,
2755
+ "loss": 0.1547,
2756
+ "step": 3820
2757
+ },
2758
+ {
2759
+ "epoch": 7.57,
2760
+ "learning_rate": 0.00012374245472837023,
2761
+ "loss": 0.1283,
2762
+ "step": 3830
2763
+ },
2764
+ {
2765
+ "epoch": 7.59,
2766
+ "learning_rate": 0.0001227364185110664,
2767
+ "loss": 0.174,
2768
+ "step": 3840
2769
+ },
2770
+ {
2771
+ "epoch": 7.61,
2772
+ "learning_rate": 0.00012173038229376258,
2773
+ "loss": 0.0827,
2774
+ "step": 3850
2775
+ },
2776
+ {
2777
+ "epoch": 7.63,
2778
+ "learning_rate": 0.00012072434607645876,
2779
+ "loss": 0.1174,
2780
+ "step": 3860
2781
+ },
2782
+ {
2783
+ "epoch": 7.65,
2784
+ "learning_rate": 0.00011971830985915493,
2785
+ "loss": 0.0914,
2786
+ "step": 3870
2787
+ },
2788
+ {
2789
+ "epoch": 7.67,
2790
+ "learning_rate": 0.0001187122736418511,
2791
+ "loss": 0.1205,
2792
+ "step": 3880
2793
+ },
2794
+ {
2795
+ "epoch": 7.69,
2796
+ "learning_rate": 0.00011770623742454728,
2797
+ "loss": 0.0821,
2798
+ "step": 3890
2799
+ },
2800
+ {
2801
+ "epoch": 7.71,
2802
+ "learning_rate": 0.00011670020120724347,
2803
+ "loss": 0.1268,
2804
+ "step": 3900
2805
+ },
2806
+ {
2807
+ "epoch": 7.71,
2808
+ "eval_loss": 0.938602089881897,
2809
+ "eval_rouge1": 0.20384615384615384,
2810
+ "eval_rouge2": 0.10909090909090909,
2811
+ "eval_rougeL": 0.20153846153846153,
2812
+ "eval_rougeLsum": 0.20076923076923076,
2813
+ "eval_runtime": 89.8217,
2814
+ "eval_samples_per_second": 0.223,
2815
+ "eval_steps_per_second": 0.223,
2816
+ "step": 3900
2817
+ },
2818
+ {
2819
+ "epoch": 7.73,
2820
+ "learning_rate": 0.00011569416498993963,
2821
+ "loss": 0.0964,
2822
+ "step": 3910
2823
+ },
2824
+ {
2825
+ "epoch": 7.75,
2826
+ "learning_rate": 0.00011468812877263582,
2827
+ "loss": 0.0878,
2828
+ "step": 3920
2829
+ },
2830
+ {
2831
+ "epoch": 7.77,
2832
+ "learning_rate": 0.00011368209255533198,
2833
+ "loss": 0.1205,
2834
+ "step": 3930
2835
+ },
2836
+ {
2837
+ "epoch": 7.79,
2838
+ "learning_rate": 0.00011267605633802817,
2839
+ "loss": 0.0916,
2840
+ "step": 3940
2841
+ },
2842
+ {
2843
+ "epoch": 7.8,
2844
+ "learning_rate": 0.00011167002012072435,
2845
+ "loss": 0.1021,
2846
+ "step": 3950
2847
+ },
2848
+ {
2849
+ "epoch": 7.82,
2850
+ "learning_rate": 0.00011066398390342052,
2851
+ "loss": 0.0843,
2852
+ "step": 3960
2853
+ },
2854
+ {
2855
+ "epoch": 7.84,
2856
+ "learning_rate": 0.0001096579476861167,
2857
+ "loss": 0.0947,
2858
+ "step": 3970
2859
+ },
2860
+ {
2861
+ "epoch": 7.86,
2862
+ "learning_rate": 0.00010865191146881289,
2863
+ "loss": 0.0884,
2864
+ "step": 3980
2865
+ },
2866
+ {
2867
+ "epoch": 7.88,
2868
+ "learning_rate": 0.00010764587525150905,
2869
+ "loss": 0.0943,
2870
+ "step": 3990
2871
+ },
2872
+ {
2873
+ "epoch": 7.9,
2874
+ "learning_rate": 0.00010663983903420524,
2875
+ "loss": 0.089,
2876
+ "step": 4000
2877
+ },
2878
+ {
2879
+ "epoch": 7.9,
2880
+ "eval_loss": 0.9426229596138,
2881
+ "eval_rouge1": 0.15076923076923077,
2882
+ "eval_rouge2": 0.1181818181818182,
2883
+ "eval_rougeL": 0.15615384615384614,
2884
+ "eval_rougeLsum": 0.15384615384615383,
2885
+ "eval_runtime": 90.9011,
2886
+ "eval_samples_per_second": 0.22,
2887
+ "eval_steps_per_second": 0.22,
2888
+ "step": 4000
2889
  }
2890
  ],
2891
+ "max_steps": 5060,
2892
+ "num_train_epochs": 10,
2893
+ "total_flos": 9.733899587958374e+16,
2894
  "trial_name": null,
2895
  "trial_params": null
2896
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c84070d0e82c96b5d90688a9eaac039d70060ac2fb04ad15294fe621d1085031
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8324d78149c1825be11050b4984c3538e3690b1c3af5db03801adad7507acb0
3
  size 3643
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c224c2d13e7b12f967f2ff294e904f86dfd319b281de69e29b4ca2a309522ccf
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e39e01e5f4c4c5c56c4c737e55174fa27102738a2a31ad9abaea988d95c5076
3
  size 2329702453
runs/Feb09_10-25-55_5414a9461c91/1675939283.9214077/events.out.tfevents.1675939283.5414a9461c91.351.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c113a55f8f1f9d07188df5c0217fc427fb6d5bf3a04d4e7139f2c86df2580992
3
+ size 5952
runs/Feb09_10-25-55_5414a9461c91/events.out.tfevents.1675939283.5414a9461c91.351.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acba4368ad5958bff2596e2a94cae81829d5acfc5d99f8d2a2e424dbf9a5ebb8
3
+ size 14428
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c84070d0e82c96b5d90688a9eaac039d70060ac2fb04ad15294fe621d1085031
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8324d78149c1825be11050b4984c3538e3690b1c3af5db03801adad7507acb0
3
  size 3643