hariniiiiiiiiii commited on
Commit
8de7203
1 Parent(s): 01dcb61

Training in progress, step 3000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a87f571870fa37676e444ee2cf3bd121da8e220650bf64cae7ecf7bbabd18107
3
  size 4115013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8fab46afba02b9664ca3eb5d4fc9d6f6e2f8b5882d6ad6a6f5bda047e6e1fbd
3
  size 4115013
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b7789172be0d19cba7bec5cd3fe0c5aeec80db54570b699519c070ba33b3242
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:347082fb15486e1ce22829a5a9a17790976d005235f55b77b736bf422a4e49ef
3
  size 2329702453
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbce431f221c35d0ff3900cc40d8e1493a46d07dce6b643999346c309d391f2d
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b47b76e58d40ed6257704d7bde822b19860484ca61d1450fb6ff42df326b971d
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99522a772e64a907efd3e369e7b361594aee824f92725e631b466fa7e08bc79a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2936137cf5142f34c072a19508ea41ce0d6a55cae7261f6c7cb486aa65afbe
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.938971766736531,
5
- "global_step": 2500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1806,11 +1806,371 @@
1806
  "eval_samples_per_second": 0.241,
1807
  "eval_steps_per_second": 0.241,
1808
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1809
  }
1810
  ],
1811
  "max_steps": 3542,
1812
  "num_train_epochs": 7,
1813
- "total_flos": 6.078797148279398e+16,
1814
  "trial_name": null,
1815
  "trial_params": null
1816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.927135988164221,
5
+ "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1806
  "eval_samples_per_second": 0.241,
1807
  "eval_steps_per_second": 0.241,
1808
  "step": 2500
1809
+ },
1810
+ {
1811
+ "epoch": 4.96,
1812
+ "learning_rate": 0.00014947856315179606,
1813
+ "loss": 0.1688,
1814
+ "step": 2510
1815
+ },
1816
+ {
1817
+ "epoch": 4.98,
1818
+ "learning_rate": 0.00014803012746234068,
1819
+ "loss": 0.1385,
1820
+ "step": 2520
1821
+ },
1822
+ {
1823
+ "epoch": 5.0,
1824
+ "learning_rate": 0.0001465816917728853,
1825
+ "loss": 0.1592,
1826
+ "step": 2530
1827
+ },
1828
+ {
1829
+ "epoch": 5.02,
1830
+ "learning_rate": 0.0001451332560834299,
1831
+ "loss": 0.1014,
1832
+ "step": 2540
1833
+ },
1834
+ {
1835
+ "epoch": 5.04,
1836
+ "learning_rate": 0.0001436848203939745,
1837
+ "loss": 0.0796,
1838
+ "step": 2550
1839
+ },
1840
+ {
1841
+ "epoch": 5.06,
1842
+ "learning_rate": 0.00014223638470451912,
1843
+ "loss": 0.0981,
1844
+ "step": 2560
1845
+ },
1846
+ {
1847
+ "epoch": 5.08,
1848
+ "learning_rate": 0.00014078794901506374,
1849
+ "loss": 0.093,
1850
+ "step": 2570
1851
+ },
1852
+ {
1853
+ "epoch": 5.1,
1854
+ "learning_rate": 0.00013933951332560835,
1855
+ "loss": 0.1599,
1856
+ "step": 2580
1857
+ },
1858
+ {
1859
+ "epoch": 5.12,
1860
+ "learning_rate": 0.00013789107763615297,
1861
+ "loss": 0.1223,
1862
+ "step": 2590
1863
+ },
1864
+ {
1865
+ "epoch": 5.14,
1866
+ "learning_rate": 0.00013644264194669756,
1867
+ "loss": 0.1,
1868
+ "step": 2600
1869
+ },
1870
+ {
1871
+ "epoch": 5.14,
1872
+ "eval_loss": 0.9408878087997437,
1873
+ "eval_rouge1": 0.12507936507936507,
1874
+ "eval_rouge2": 0.08333333333333333,
1875
+ "eval_rougeL": 0.12396825396825398,
1876
+ "eval_rougeLsum": 0.13111111111111112,
1877
+ "eval_runtime": 96.2103,
1878
+ "eval_samples_per_second": 0.208,
1879
+ "eval_steps_per_second": 0.208,
1880
+ "step": 2600
1881
+ },
1882
+ {
1883
+ "epoch": 5.16,
1884
+ "learning_rate": 0.00013499420625724218,
1885
+ "loss": 0.1284,
1886
+ "step": 2610
1887
+ },
1888
+ {
1889
+ "epoch": 5.18,
1890
+ "learning_rate": 0.0001335457705677868,
1891
+ "loss": 0.1523,
1892
+ "step": 2620
1893
+ },
1894
+ {
1895
+ "epoch": 5.2,
1896
+ "learning_rate": 0.00013209733487833138,
1897
+ "loss": 0.1051,
1898
+ "step": 2630
1899
+ },
1900
+ {
1901
+ "epoch": 5.22,
1902
+ "learning_rate": 0.00013064889918887603,
1903
+ "loss": 0.1216,
1904
+ "step": 2640
1905
+ },
1906
+ {
1907
+ "epoch": 5.24,
1908
+ "learning_rate": 0.00012920046349942064,
1909
+ "loss": 0.1219,
1910
+ "step": 2650
1911
+ },
1912
+ {
1913
+ "epoch": 5.26,
1914
+ "learning_rate": 0.00012775202780996523,
1915
+ "loss": 0.1482,
1916
+ "step": 2660
1917
+ },
1918
+ {
1919
+ "epoch": 5.28,
1920
+ "learning_rate": 0.00012630359212050985,
1921
+ "loss": 0.1076,
1922
+ "step": 2670
1923
+ },
1924
+ {
1925
+ "epoch": 5.3,
1926
+ "learning_rate": 0.00012485515643105447,
1927
+ "loss": 0.121,
1928
+ "step": 2680
1929
+ },
1930
+ {
1931
+ "epoch": 5.32,
1932
+ "learning_rate": 0.00012340672074159908,
1933
+ "loss": 0.1448,
1934
+ "step": 2690
1935
+ },
1936
+ {
1937
+ "epoch": 5.34,
1938
+ "learning_rate": 0.0001219582850521437,
1939
+ "loss": 0.1683,
1940
+ "step": 2700
1941
+ },
1942
+ {
1943
+ "epoch": 5.34,
1944
+ "eval_loss": 0.9422550201416016,
1945
+ "eval_rouge1": 0.13818181818181818,
1946
+ "eval_rouge2": 0.0951010101010101,
1947
+ "eval_rougeL": 0.13713286713286715,
1948
+ "eval_rougeLsum": 0.14174825174825176,
1949
+ "eval_runtime": 96.365,
1950
+ "eval_samples_per_second": 0.208,
1951
+ "eval_steps_per_second": 0.208,
1952
+ "step": 2700
1953
+ },
1954
+ {
1955
+ "epoch": 5.36,
1956
+ "learning_rate": 0.0001205098493626883,
1957
+ "loss": 0.103,
1958
+ "step": 2710
1959
+ },
1960
+ {
1961
+ "epoch": 5.37,
1962
+ "learning_rate": 0.0001190614136732329,
1963
+ "loss": 0.1434,
1964
+ "step": 2720
1965
+ },
1966
+ {
1967
+ "epoch": 5.39,
1968
+ "learning_rate": 0.00011761297798377752,
1969
+ "loss": 0.1419,
1970
+ "step": 2730
1971
+ },
1972
+ {
1973
+ "epoch": 5.41,
1974
+ "learning_rate": 0.00011616454229432214,
1975
+ "loss": 0.1145,
1976
+ "step": 2740
1977
+ },
1978
+ {
1979
+ "epoch": 5.43,
1980
+ "learning_rate": 0.00011471610660486674,
1981
+ "loss": 0.1302,
1982
+ "step": 2750
1983
+ },
1984
+ {
1985
+ "epoch": 5.45,
1986
+ "learning_rate": 0.00011326767091541136,
1987
+ "loss": 0.0718,
1988
+ "step": 2760
1989
+ },
1990
+ {
1991
+ "epoch": 5.47,
1992
+ "learning_rate": 0.00011181923522595596,
1993
+ "loss": 0.1166,
1994
+ "step": 2770
1995
+ },
1996
+ {
1997
+ "epoch": 5.49,
1998
+ "learning_rate": 0.0001103707995365006,
1999
+ "loss": 0.1265,
2000
+ "step": 2780
2001
+ },
2002
+ {
2003
+ "epoch": 5.51,
2004
+ "learning_rate": 0.0001089223638470452,
2005
+ "loss": 0.0972,
2006
+ "step": 2790
2007
+ },
2008
+ {
2009
+ "epoch": 5.53,
2010
+ "learning_rate": 0.0001074739281575898,
2011
+ "loss": 0.1395,
2012
+ "step": 2800
2013
+ },
2014
+ {
2015
+ "epoch": 5.53,
2016
+ "eval_loss": 0.9336325526237488,
2017
+ "eval_rouge1": 0.16115384615384615,
2018
+ "eval_rouge2": 0.12329545454545454,
2019
+ "eval_rougeL": 0.15999999999999998,
2020
+ "eval_rougeLsum": 0.16307692307692306,
2021
+ "eval_runtime": 93.5346,
2022
+ "eval_samples_per_second": 0.214,
2023
+ "eval_steps_per_second": 0.214,
2024
+ "step": 2800
2025
+ },
2026
+ {
2027
+ "epoch": 5.55,
2028
+ "learning_rate": 0.00010602549246813442,
2029
+ "loss": 0.0808,
2030
+ "step": 2810
2031
+ },
2032
+ {
2033
+ "epoch": 5.57,
2034
+ "learning_rate": 0.00010457705677867903,
2035
+ "loss": 0.1205,
2036
+ "step": 2820
2037
+ },
2038
+ {
2039
+ "epoch": 5.59,
2040
+ "learning_rate": 0.00010312862108922364,
2041
+ "loss": 0.119,
2042
+ "step": 2830
2043
+ },
2044
+ {
2045
+ "epoch": 5.61,
2046
+ "learning_rate": 0.00010168018539976825,
2047
+ "loss": 0.1357,
2048
+ "step": 2840
2049
+ },
2050
+ {
2051
+ "epoch": 5.63,
2052
+ "learning_rate": 0.00010023174971031286,
2053
+ "loss": 0.1144,
2054
+ "step": 2850
2055
+ },
2056
+ {
2057
+ "epoch": 5.65,
2058
+ "learning_rate": 9.878331402085749e-05,
2059
+ "loss": 0.138,
2060
+ "step": 2860
2061
+ },
2062
+ {
2063
+ "epoch": 5.67,
2064
+ "learning_rate": 9.733487833140209e-05,
2065
+ "loss": 0.0998,
2066
+ "step": 2870
2067
+ },
2068
+ {
2069
+ "epoch": 5.69,
2070
+ "learning_rate": 9.588644264194669e-05,
2071
+ "loss": 0.1437,
2072
+ "step": 2880
2073
+ },
2074
+ {
2075
+ "epoch": 5.71,
2076
+ "learning_rate": 9.443800695249131e-05,
2077
+ "loss": 0.1053,
2078
+ "step": 2890
2079
+ },
2080
+ {
2081
+ "epoch": 5.73,
2082
+ "learning_rate": 9.298957126303593e-05,
2083
+ "loss": 0.1067,
2084
+ "step": 2900
2085
+ },
2086
+ {
2087
+ "epoch": 5.73,
2088
+ "eval_loss": 0.9290033578872681,
2089
+ "eval_rouge1": 0.2234265734265734,
2090
+ "eval_rouge2": 0.13156565656565655,
2091
+ "eval_rougeL": 0.21744755244755246,
2092
+ "eval_rougeLsum": 0.2169230769230769,
2093
+ "eval_runtime": 91.8958,
2094
+ "eval_samples_per_second": 0.218,
2095
+ "eval_steps_per_second": 0.218,
2096
+ "step": 2900
2097
+ },
2098
+ {
2099
+ "epoch": 5.75,
2100
+ "learning_rate": 9.154113557358054e-05,
2101
+ "loss": 0.1225,
2102
+ "step": 2910
2103
+ },
2104
+ {
2105
+ "epoch": 5.77,
2106
+ "learning_rate": 9.009269988412515e-05,
2107
+ "loss": 0.0867,
2108
+ "step": 2920
2109
+ },
2110
+ {
2111
+ "epoch": 5.79,
2112
+ "learning_rate": 8.864426419466975e-05,
2113
+ "loss": 0.1325,
2114
+ "step": 2930
2115
+ },
2116
+ {
2117
+ "epoch": 5.81,
2118
+ "learning_rate": 8.719582850521438e-05,
2119
+ "loss": 0.118,
2120
+ "step": 2940
2121
+ },
2122
+ {
2123
+ "epoch": 5.83,
2124
+ "learning_rate": 8.574739281575898e-05,
2125
+ "loss": 0.112,
2126
+ "step": 2950
2127
+ },
2128
+ {
2129
+ "epoch": 5.85,
2130
+ "learning_rate": 8.429895712630359e-05,
2131
+ "loss": 0.1326,
2132
+ "step": 2960
2133
+ },
2134
+ {
2135
+ "epoch": 5.87,
2136
+ "learning_rate": 8.28505214368482e-05,
2137
+ "loss": 0.1506,
2138
+ "step": 2970
2139
+ },
2140
+ {
2141
+ "epoch": 5.89,
2142
+ "learning_rate": 8.140208574739282e-05,
2143
+ "loss": 0.1499,
2144
+ "step": 2980
2145
+ },
2146
+ {
2147
+ "epoch": 5.91,
2148
+ "learning_rate": 7.995365005793744e-05,
2149
+ "loss": 0.1092,
2150
+ "step": 2990
2151
+ },
2152
+ {
2153
+ "epoch": 5.93,
2154
+ "learning_rate": 7.850521436848204e-05,
2155
+ "loss": 0.1104,
2156
+ "step": 3000
2157
+ },
2158
+ {
2159
+ "epoch": 5.93,
2160
+ "eval_loss": 0.9244877696037292,
2161
+ "eval_rouge1": 0.2,
2162
+ "eval_rouge2": 0.1,
2163
+ "eval_rougeL": 0.19153846153846155,
2164
+ "eval_rougeLsum": 0.19153846153846155,
2165
+ "eval_runtime": 93.7022,
2166
+ "eval_samples_per_second": 0.213,
2167
+ "eval_steps_per_second": 0.213,
2168
+ "step": 3000
2169
  }
2170
  ],
2171
  "max_steps": 3542,
2172
  "num_train_epochs": 7,
2173
+ "total_flos": 7.297038322401485e+16,
2174
  "trial_name": null,
2175
  "trial_params": null
2176
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660f1225e692fbbda687422d3532879d3c116f23c4ac0ae767265d9fdf03511c
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84070d0e82c96b5d90688a9eaac039d70060ac2fb04ad15294fe621d1085031
3
  size 3643
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e4b616d5ea5f92c203d471e43b622d1302d66f27b9105c5655030fdf5c7e986
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:347082fb15486e1ce22829a5a9a17790976d005235f55b77b736bf422a4e49ef
3
  size 2329702453
runs/Feb09_05-15-33_2f481ea0b382/1675919822.3009498/events.out.tfevents.1675919822.2f481ea0b382.229.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2158c55d9c18b09babe3afd3628b7b9db9948dd64adc03d98aaeaf3fdedb19cc
3
+ size 5963
runs/Feb09_05-15-33_2f481ea0b382/events.out.tfevents.1675919822.2f481ea0b382.229.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39ea82e021df455d0b872a29b4241fd128157b9e12cd0e02049765435e647026
3
+ size 4373
runs/Feb09_05-19-49_2f481ea0b382/1675920014.2635813/events.out.tfevents.1675920014.2f481ea0b382.229.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9885aa1ad8d035a7245932173d30a177b54c4bea111ff4b8f24a0a39bb518372
3
+ size 5952
runs/Feb09_05-19-49_2f481ea0b382/1675920384.8822913/events.out.tfevents.1675920384.2f481ea0b382.229.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3cd46406fd5fcf0df98c5506929a6c5fdac00b065adf380344345fde71eb856
3
+ size 5952
runs/Feb09_05-19-49_2f481ea0b382/events.out.tfevents.1675920014.2f481ea0b382.229.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0666918e3bf6e4588614adc4681a6884f490b1b1ffe5fc3e674bc8d620aca324
3
+ size 4991
runs/Feb09_05-19-49_2f481ea0b382/events.out.tfevents.1675920384.2f481ea0b382.229.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdd1cd3228b9e1861b5bb9722e297776fac3573f23a401d1935533fabc65f3f
3
+ size 14426
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660f1225e692fbbda687422d3532879d3c116f23c4ac0ae767265d9fdf03511c
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84070d0e82c96b5d90688a9eaac039d70060ac2fb04ad15294fe621d1085031
3
  size 3643