CreatorPhan commited on
Commit
ede467d
1 Parent(s): 507c313

Upload folder using huggingface_hub (#3)

Browse files

- Upload folder using huggingface_hub (d2bcdf81b4c7be220954c2f72b33d6d95238beca)

Files changed (5) hide show
  1. adapter_model.bin +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +603 -3
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b39b2ac3c3f75772f547048fda7ecb323076b9689db7d79915bba156dc508f2f
3
  size 39409357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b701342ea311c1e25f0f6953505f048df3f56027dc5c764b95df4491bfde1c
3
  size 39409357
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c734ecaa394370ee4bcd94cc0b2ae016a26765122f3f76327b28c23f96a22732
3
  size 78844421
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da79792a601e6737b980ff24e453353405dce9a807a0964433ac96d5a97b84aa
3
  size 78844421
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:972139d83957a9cf2600cb6eeca17287d7a5377c33a53500ae7e13fe830ad36b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7508d4b8dd267de5cc58e972da25236687927651336a28f292c92f7f23951475
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d8dcaf05375bb59f736a94e8f8b03d33cdc87bc02411e6527a29996e0a68b3b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04fdaf305b17644f9d215d28be45b77e6d0f8e4b5adc1c7045c50a61bd8f3c5b
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.675977653631285,
5
  "eval_steps": 500,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1807,13 +1807,613 @@
1807
  "learning_rate": 0.00011620111731843578,
1808
  "loss": 1.6315,
1809
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1810
  }
1811
  ],
1812
  "logging_steps": 1,
1813
  "max_steps": 716,
1814
  "num_train_epochs": 4,
1815
  "save_steps": 100,
1816
- "total_flos": 1.5369996759656448e+17,
1817
  "trial_name": null,
1818
  "trial_params": null
1819
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.2346368715083798,
5
  "eval_steps": 500,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1807
  "learning_rate": 0.00011620111731843578,
1808
  "loss": 1.6315,
1809
  "step": 300
1810
+ },
1811
+ {
1812
+ "epoch": 1.68,
1813
+ "learning_rate": 0.00011592178770949722,
1814
+ "loss": 1.6802,
1815
+ "step": 301
1816
+ },
1817
+ {
1818
+ "epoch": 1.69,
1819
+ "learning_rate": 0.00011564245810055867,
1820
+ "loss": 1.7174,
1821
+ "step": 302
1822
+ },
1823
+ {
1824
+ "epoch": 1.69,
1825
+ "learning_rate": 0.00011536312849162012,
1826
+ "loss": 1.5212,
1827
+ "step": 303
1828
+ },
1829
+ {
1830
+ "epoch": 1.7,
1831
+ "learning_rate": 0.00011508379888268157,
1832
+ "loss": 1.5808,
1833
+ "step": 304
1834
+ },
1835
+ {
1836
+ "epoch": 1.7,
1837
+ "learning_rate": 0.00011480446927374303,
1838
+ "loss": 1.6152,
1839
+ "step": 305
1840
+ },
1841
+ {
1842
+ "epoch": 1.71,
1843
+ "learning_rate": 0.00011452513966480447,
1844
+ "loss": 1.5435,
1845
+ "step": 306
1846
+ },
1847
+ {
1848
+ "epoch": 1.72,
1849
+ "learning_rate": 0.00011424581005586592,
1850
+ "loss": 1.6603,
1851
+ "step": 307
1852
+ },
1853
+ {
1854
+ "epoch": 1.72,
1855
+ "learning_rate": 0.00011396648044692737,
1856
+ "loss": 1.685,
1857
+ "step": 308
1858
+ },
1859
+ {
1860
+ "epoch": 1.73,
1861
+ "learning_rate": 0.00011368715083798884,
1862
+ "loss": 1.6002,
1863
+ "step": 309
1864
+ },
1865
+ {
1866
+ "epoch": 1.73,
1867
+ "learning_rate": 0.00011340782122905029,
1868
+ "loss": 1.6046,
1869
+ "step": 310
1870
+ },
1871
+ {
1872
+ "epoch": 1.74,
1873
+ "learning_rate": 0.00011312849162011174,
1874
+ "loss": 1.5969,
1875
+ "step": 311
1876
+ },
1877
+ {
1878
+ "epoch": 1.74,
1879
+ "learning_rate": 0.0001128491620111732,
1880
+ "loss": 1.5845,
1881
+ "step": 312
1882
+ },
1883
+ {
1884
+ "epoch": 1.75,
1885
+ "learning_rate": 0.00011256983240223464,
1886
+ "loss": 1.8183,
1887
+ "step": 313
1888
+ },
1889
+ {
1890
+ "epoch": 1.75,
1891
+ "learning_rate": 0.00011229050279329609,
1892
+ "loss": 1.6953,
1893
+ "step": 314
1894
+ },
1895
+ {
1896
+ "epoch": 1.76,
1897
+ "learning_rate": 0.00011201117318435754,
1898
+ "loss": 1.7787,
1899
+ "step": 315
1900
+ },
1901
+ {
1902
+ "epoch": 1.77,
1903
+ "learning_rate": 0.000111731843575419,
1904
+ "loss": 1.6422,
1905
+ "step": 316
1906
+ },
1907
+ {
1908
+ "epoch": 1.77,
1909
+ "learning_rate": 0.00011145251396648045,
1910
+ "loss": 1.7034,
1911
+ "step": 317
1912
+ },
1913
+ {
1914
+ "epoch": 1.78,
1915
+ "learning_rate": 0.00011117318435754192,
1916
+ "loss": 1.7301,
1917
+ "step": 318
1918
+ },
1919
+ {
1920
+ "epoch": 1.78,
1921
+ "learning_rate": 0.00011089385474860337,
1922
+ "loss": 1.7084,
1923
+ "step": 319
1924
+ },
1925
+ {
1926
+ "epoch": 1.79,
1927
+ "learning_rate": 0.00011061452513966482,
1928
+ "loss": 1.772,
1929
+ "step": 320
1930
+ },
1931
+ {
1932
+ "epoch": 1.79,
1933
+ "learning_rate": 0.00011033519553072626,
1934
+ "loss": 1.5733,
1935
+ "step": 321
1936
+ },
1937
+ {
1938
+ "epoch": 1.8,
1939
+ "learning_rate": 0.00011005586592178771,
1940
+ "loss": 1.6423,
1941
+ "step": 322
1942
+ },
1943
+ {
1944
+ "epoch": 1.8,
1945
+ "learning_rate": 0.00010977653631284917,
1946
+ "loss": 1.5809,
1947
+ "step": 323
1948
+ },
1949
+ {
1950
+ "epoch": 1.81,
1951
+ "learning_rate": 0.00010949720670391062,
1952
+ "loss": 1.6781,
1953
+ "step": 324
1954
+ },
1955
+ {
1956
+ "epoch": 1.82,
1957
+ "learning_rate": 0.00010921787709497207,
1958
+ "loss": 1.6788,
1959
+ "step": 325
1960
+ },
1961
+ {
1962
+ "epoch": 1.82,
1963
+ "learning_rate": 0.00010893854748603351,
1964
+ "loss": 1.6346,
1965
+ "step": 326
1966
+ },
1967
+ {
1968
+ "epoch": 1.83,
1969
+ "learning_rate": 0.00010865921787709499,
1970
+ "loss": 1.6634,
1971
+ "step": 327
1972
+ },
1973
+ {
1974
+ "epoch": 1.83,
1975
+ "learning_rate": 0.00010837988826815643,
1976
+ "loss": 1.7561,
1977
+ "step": 328
1978
+ },
1979
+ {
1980
+ "epoch": 1.84,
1981
+ "learning_rate": 0.00010810055865921788,
1982
+ "loss": 1.66,
1983
+ "step": 329
1984
+ },
1985
+ {
1986
+ "epoch": 1.84,
1987
+ "learning_rate": 0.00010782122905027934,
1988
+ "loss": 1.7298,
1989
+ "step": 330
1990
+ },
1991
+ {
1992
+ "epoch": 1.85,
1993
+ "learning_rate": 0.00010754189944134079,
1994
+ "loss": 1.6893,
1995
+ "step": 331
1996
+ },
1997
+ {
1998
+ "epoch": 1.85,
1999
+ "learning_rate": 0.00010726256983240224,
2000
+ "loss": 1.7631,
2001
+ "step": 332
2002
+ },
2003
+ {
2004
+ "epoch": 1.86,
2005
+ "learning_rate": 0.00010698324022346368,
2006
+ "loss": 1.6633,
2007
+ "step": 333
2008
+ },
2009
+ {
2010
+ "epoch": 1.87,
2011
+ "learning_rate": 0.00010670391061452513,
2012
+ "loss": 1.5388,
2013
+ "step": 334
2014
+ },
2015
+ {
2016
+ "epoch": 1.87,
2017
+ "learning_rate": 0.00010642458100558659,
2018
+ "loss": 1.6718,
2019
+ "step": 335
2020
+ },
2021
+ {
2022
+ "epoch": 1.88,
2023
+ "learning_rate": 0.00010614525139664805,
2024
+ "loss": 1.5536,
2025
+ "step": 336
2026
+ },
2027
+ {
2028
+ "epoch": 1.88,
2029
+ "learning_rate": 0.00010586592178770951,
2030
+ "loss": 1.6483,
2031
+ "step": 337
2032
+ },
2033
+ {
2034
+ "epoch": 1.89,
2035
+ "learning_rate": 0.00010558659217877096,
2036
+ "loss": 1.5774,
2037
+ "step": 338
2038
+ },
2039
+ {
2040
+ "epoch": 1.89,
2041
+ "learning_rate": 0.00010530726256983241,
2042
+ "loss": 1.6366,
2043
+ "step": 339
2044
+ },
2045
+ {
2046
+ "epoch": 1.9,
2047
+ "learning_rate": 0.00010502793296089387,
2048
+ "loss": 1.5567,
2049
+ "step": 340
2050
+ },
2051
+ {
2052
+ "epoch": 1.91,
2053
+ "learning_rate": 0.0001047486033519553,
2054
+ "loss": 1.5323,
2055
+ "step": 341
2056
+ },
2057
+ {
2058
+ "epoch": 1.91,
2059
+ "learning_rate": 0.00010446927374301676,
2060
+ "loss": 1.4608,
2061
+ "step": 342
2062
+ },
2063
+ {
2064
+ "epoch": 1.92,
2065
+ "learning_rate": 0.00010418994413407821,
2066
+ "loss": 1.5933,
2067
+ "step": 343
2068
+ },
2069
+ {
2070
+ "epoch": 1.92,
2071
+ "learning_rate": 0.00010391061452513966,
2072
+ "loss": 1.6625,
2073
+ "step": 344
2074
+ },
2075
+ {
2076
+ "epoch": 1.93,
2077
+ "learning_rate": 0.00010363128491620113,
2078
+ "loss": 1.7236,
2079
+ "step": 345
2080
+ },
2081
+ {
2082
+ "epoch": 1.93,
2083
+ "learning_rate": 0.00010335195530726258,
2084
+ "loss": 1.759,
2085
+ "step": 346
2086
+ },
2087
+ {
2088
+ "epoch": 1.94,
2089
+ "learning_rate": 0.00010307262569832404,
2090
+ "loss": 1.7248,
2091
+ "step": 347
2092
+ },
2093
+ {
2094
+ "epoch": 1.94,
2095
+ "learning_rate": 0.00010279329608938548,
2096
+ "loss": 1.5144,
2097
+ "step": 348
2098
+ },
2099
+ {
2100
+ "epoch": 1.95,
2101
+ "learning_rate": 0.00010251396648044693,
2102
+ "loss": 1.6905,
2103
+ "step": 349
2104
+ },
2105
+ {
2106
+ "epoch": 1.96,
2107
+ "learning_rate": 0.00010223463687150838,
2108
+ "loss": 1.6119,
2109
+ "step": 350
2110
+ },
2111
+ {
2112
+ "epoch": 1.96,
2113
+ "learning_rate": 0.00010195530726256983,
2114
+ "loss": 1.5464,
2115
+ "step": 351
2116
+ },
2117
+ {
2118
+ "epoch": 1.97,
2119
+ "learning_rate": 0.00010167597765363129,
2120
+ "loss": 1.6901,
2121
+ "step": 352
2122
+ },
2123
+ {
2124
+ "epoch": 1.97,
2125
+ "learning_rate": 0.00010139664804469273,
2126
+ "loss": 1.3511,
2127
+ "step": 353
2128
+ },
2129
+ {
2130
+ "epoch": 1.98,
2131
+ "learning_rate": 0.0001011173184357542,
2132
+ "loss": 1.5434,
2133
+ "step": 354
2134
+ },
2135
+ {
2136
+ "epoch": 1.98,
2137
+ "learning_rate": 0.00010083798882681566,
2138
+ "loss": 1.5891,
2139
+ "step": 355
2140
+ },
2141
+ {
2142
+ "epoch": 1.99,
2143
+ "learning_rate": 0.0001005586592178771,
2144
+ "loss": 1.6658,
2145
+ "step": 356
2146
+ },
2147
+ {
2148
+ "epoch": 1.99,
2149
+ "learning_rate": 0.00010027932960893855,
2150
+ "loss": 1.5657,
2151
+ "step": 357
2152
+ },
2153
+ {
2154
+ "epoch": 2.0,
2155
+ "learning_rate": 0.0001,
2156
+ "loss": 1.7005,
2157
+ "step": 358
2158
+ },
2159
+ {
2160
+ "epoch": 2.01,
2161
+ "learning_rate": 9.972067039106146e-05,
2162
+ "loss": 1.4202,
2163
+ "step": 359
2164
+ },
2165
+ {
2166
+ "epoch": 2.01,
2167
+ "learning_rate": 9.944134078212291e-05,
2168
+ "loss": 1.5262,
2169
+ "step": 360
2170
+ },
2171
+ {
2172
+ "epoch": 2.02,
2173
+ "learning_rate": 9.916201117318436e-05,
2174
+ "loss": 1.6323,
2175
+ "step": 361
2176
+ },
2177
+ {
2178
+ "epoch": 2.02,
2179
+ "learning_rate": 9.888268156424582e-05,
2180
+ "loss": 1.5521,
2181
+ "step": 362
2182
+ },
2183
+ {
2184
+ "epoch": 2.03,
2185
+ "learning_rate": 9.860335195530727e-05,
2186
+ "loss": 1.5762,
2187
+ "step": 363
2188
+ },
2189
+ {
2190
+ "epoch": 2.03,
2191
+ "learning_rate": 9.832402234636872e-05,
2192
+ "loss": 1.613,
2193
+ "step": 364
2194
+ },
2195
+ {
2196
+ "epoch": 2.04,
2197
+ "learning_rate": 9.804469273743018e-05,
2198
+ "loss": 1.4231,
2199
+ "step": 365
2200
+ },
2201
+ {
2202
+ "epoch": 2.04,
2203
+ "learning_rate": 9.776536312849163e-05,
2204
+ "loss": 1.5706,
2205
+ "step": 366
2206
+ },
2207
+ {
2208
+ "epoch": 2.05,
2209
+ "learning_rate": 9.748603351955308e-05,
2210
+ "loss": 1.5245,
2211
+ "step": 367
2212
+ },
2213
+ {
2214
+ "epoch": 2.06,
2215
+ "learning_rate": 9.720670391061453e-05,
2216
+ "loss": 1.4771,
2217
+ "step": 368
2218
+ },
2219
+ {
2220
+ "epoch": 2.06,
2221
+ "learning_rate": 9.692737430167597e-05,
2222
+ "loss": 1.596,
2223
+ "step": 369
2224
+ },
2225
+ {
2226
+ "epoch": 2.07,
2227
+ "learning_rate": 9.664804469273744e-05,
2228
+ "loss": 1.537,
2229
+ "step": 370
2230
+ },
2231
+ {
2232
+ "epoch": 2.07,
2233
+ "learning_rate": 9.636871508379889e-05,
2234
+ "loss": 1.4276,
2235
+ "step": 371
2236
+ },
2237
+ {
2238
+ "epoch": 2.08,
2239
+ "learning_rate": 9.608938547486033e-05,
2240
+ "loss": 1.4746,
2241
+ "step": 372
2242
+ },
2243
+ {
2244
+ "epoch": 2.08,
2245
+ "learning_rate": 9.581005586592178e-05,
2246
+ "loss": 1.4374,
2247
+ "step": 373
2248
+ },
2249
+ {
2250
+ "epoch": 2.09,
2251
+ "learning_rate": 9.553072625698325e-05,
2252
+ "loss": 1.4704,
2253
+ "step": 374
2254
+ },
2255
+ {
2256
+ "epoch": 2.09,
2257
+ "learning_rate": 9.52513966480447e-05,
2258
+ "loss": 1.5997,
2259
+ "step": 375
2260
+ },
2261
+ {
2262
+ "epoch": 2.1,
2263
+ "learning_rate": 9.497206703910614e-05,
2264
+ "loss": 1.5034,
2265
+ "step": 376
2266
+ },
2267
+ {
2268
+ "epoch": 2.11,
2269
+ "learning_rate": 9.46927374301676e-05,
2270
+ "loss": 1.6392,
2271
+ "step": 377
2272
+ },
2273
+ {
2274
+ "epoch": 2.11,
2275
+ "learning_rate": 9.441340782122905e-05,
2276
+ "loss": 1.5611,
2277
+ "step": 378
2278
+ },
2279
+ {
2280
+ "epoch": 2.12,
2281
+ "learning_rate": 9.413407821229052e-05,
2282
+ "loss": 1.3384,
2283
+ "step": 379
2284
+ },
2285
+ {
2286
+ "epoch": 2.12,
2287
+ "learning_rate": 9.385474860335196e-05,
2288
+ "loss": 1.5732,
2289
+ "step": 380
2290
+ },
2291
+ {
2292
+ "epoch": 2.13,
2293
+ "learning_rate": 9.357541899441341e-05,
2294
+ "loss": 1.3874,
2295
+ "step": 381
2296
+ },
2297
+ {
2298
+ "epoch": 2.13,
2299
+ "learning_rate": 9.329608938547486e-05,
2300
+ "loss": 1.4139,
2301
+ "step": 382
2302
+ },
2303
+ {
2304
+ "epoch": 2.14,
2305
+ "learning_rate": 9.301675977653633e-05,
2306
+ "loss": 1.5195,
2307
+ "step": 383
2308
+ },
2309
+ {
2310
+ "epoch": 2.15,
2311
+ "learning_rate": 9.273743016759777e-05,
2312
+ "loss": 1.4371,
2313
+ "step": 384
2314
+ },
2315
+ {
2316
+ "epoch": 2.15,
2317
+ "learning_rate": 9.245810055865922e-05,
2318
+ "loss": 1.4411,
2319
+ "step": 385
2320
+ },
2321
+ {
2322
+ "epoch": 2.16,
2323
+ "learning_rate": 9.217877094972067e-05,
2324
+ "loss": 1.5214,
2325
+ "step": 386
2326
+ },
2327
+ {
2328
+ "epoch": 2.16,
2329
+ "learning_rate": 9.189944134078213e-05,
2330
+ "loss": 1.4971,
2331
+ "step": 387
2332
+ },
2333
+ {
2334
+ "epoch": 2.17,
2335
+ "learning_rate": 9.162011173184358e-05,
2336
+ "loss": 1.3453,
2337
+ "step": 388
2338
+ },
2339
+ {
2340
+ "epoch": 2.17,
2341
+ "learning_rate": 9.134078212290503e-05,
2342
+ "loss": 1.394,
2343
+ "step": 389
2344
+ },
2345
+ {
2346
+ "epoch": 2.18,
2347
+ "learning_rate": 9.106145251396648e-05,
2348
+ "loss": 1.5058,
2349
+ "step": 390
2350
+ },
2351
+ {
2352
+ "epoch": 2.18,
2353
+ "learning_rate": 9.078212290502794e-05,
2354
+ "loss": 1.4855,
2355
+ "step": 391
2356
+ },
2357
+ {
2358
+ "epoch": 2.19,
2359
+ "learning_rate": 9.050279329608939e-05,
2360
+ "loss": 1.4647,
2361
+ "step": 392
2362
+ },
2363
+ {
2364
+ "epoch": 2.2,
2365
+ "learning_rate": 9.022346368715084e-05,
2366
+ "loss": 1.3435,
2367
+ "step": 393
2368
+ },
2369
+ {
2370
+ "epoch": 2.2,
2371
+ "learning_rate": 8.99441340782123e-05,
2372
+ "loss": 1.5815,
2373
+ "step": 394
2374
+ },
2375
+ {
2376
+ "epoch": 2.21,
2377
+ "learning_rate": 8.966480446927375e-05,
2378
+ "loss": 1.4742,
2379
+ "step": 395
2380
+ },
2381
+ {
2382
+ "epoch": 2.21,
2383
+ "learning_rate": 8.938547486033519e-05,
2384
+ "loss": 1.5389,
2385
+ "step": 396
2386
+ },
2387
+ {
2388
+ "epoch": 2.22,
2389
+ "learning_rate": 8.910614525139666e-05,
2390
+ "loss": 1.384,
2391
+ "step": 397
2392
+ },
2393
+ {
2394
+ "epoch": 2.22,
2395
+ "learning_rate": 8.882681564245811e-05,
2396
+ "loss": 1.3967,
2397
+ "step": 398
2398
+ },
2399
+ {
2400
+ "epoch": 2.23,
2401
+ "learning_rate": 8.854748603351956e-05,
2402
+ "loss": 1.442,
2403
+ "step": 399
2404
+ },
2405
+ {
2406
+ "epoch": 2.23,
2407
+ "learning_rate": 8.8268156424581e-05,
2408
+ "loss": 1.396,
2409
+ "step": 400
2410
  }
2411
  ],
2412
  "logging_steps": 1,
2413
  "max_steps": 716,
2414
  "num_train_epochs": 4,
2415
  "save_steps": 100,
2416
+ "total_flos": 2.056923590572032e+17,
2417
  "trial_name": null,
2418
  "trial_params": null
2419
  }