joelniklaus commited on
Commit
53bf0fe
1 Parent(s): ba587bb

Training in progress, step 350000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3406b3f5d360b3ee97dec0690ee5e0f1f592bd66b41674fd22328fa308c97410
3
  size 1475917081
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01eeb7203c73ae25830371a530337eca01f1003b7a50caee9183d76ee91ba7de
3
  size 1475917081
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:916088829e4a33bb63ef8bfaf253f9e54fbcbb1ad1d20931ca2eb4df28f415f8
3
  size 737971755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a4b61df7a33ac16523e6c4ab0a364e641dc3a424040f9c827d0938f4fbd92de
3
  size 737971755
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78957a8e67c10188ecbf7b87fac550a5e15834a1bedab06244fbfd096963cc82
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fbeb25a56616a76e374f3b84422a557e38ebfe8539cf5633ebfa81a135c8d
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d41c6734c2aef1f60ed0fbc886cbc351448520889799ebfa66c14f8f9e99059
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f220426de5a076dbb6f66f54955d3a3fc0acbab10b1bd60cf9472b552bfdca
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3,
5
- "global_step": 300000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1854,11 +1854,319 @@
1854
  "eval_samples_per_second": 51.846,
1855
  "eval_steps_per_second": 0.415,
1856
  "step": 300000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1857
  }
1858
  ],
1859
  "max_steps": 1000000,
1860
  "num_train_epochs": 9223372036854775807,
1861
- "total_flos": 5.0592818921472e+18,
1862
  "trial_name": null,
1863
  "trial_params": null
1864
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.35,
5
+ "global_step": 350000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1854
  "eval_samples_per_second": 51.846,
1855
  "eval_steps_per_second": 0.415,
1856
  "step": 300000
1857
+ },
1858
+ {
1859
+ "epoch": 0.3,
1860
+ "learning_rate": 8.37422439088976e-05,
1861
+ "loss": 0.9018,
1862
+ "step": 301000
1863
+ },
1864
+ {
1865
+ "epoch": 0.3,
1866
+ "learning_rate": 8.362004023673474e-05,
1867
+ "loss": 0.908,
1868
+ "step": 302000
1869
+ },
1870
+ {
1871
+ "epoch": 0.3,
1872
+ "learning_rate": 8.349746890119826e-05,
1873
+ "loss": 0.9228,
1874
+ "step": 303000
1875
+ },
1876
+ {
1877
+ "epoch": 0.3,
1878
+ "learning_rate": 8.337453124270863e-05,
1879
+ "loss": 0.9383,
1880
+ "step": 304000
1881
+ },
1882
+ {
1883
+ "epoch": 0.3,
1884
+ "learning_rate": 8.32512286056924e-05,
1885
+ "loss": 0.9279,
1886
+ "step": 305000
1887
+ },
1888
+ {
1889
+ "epoch": 0.31,
1890
+ "learning_rate": 8.31275623385675e-05,
1891
+ "loss": 0.912,
1892
+ "step": 306000
1893
+ },
1894
+ {
1895
+ "epoch": 0.31,
1896
+ "learning_rate": 8.300353379372834e-05,
1897
+ "loss": 0.9081,
1898
+ "step": 307000
1899
+ },
1900
+ {
1901
+ "epoch": 0.31,
1902
+ "learning_rate": 8.287914432753123e-05,
1903
+ "loss": 0.8929,
1904
+ "step": 308000
1905
+ },
1906
+ {
1907
+ "epoch": 0.31,
1908
+ "learning_rate": 8.275439530027948e-05,
1909
+ "loss": 0.8785,
1910
+ "step": 309000
1911
+ },
1912
+ {
1913
+ "epoch": 0.31,
1914
+ "learning_rate": 8.262928807620843e-05,
1915
+ "loss": 0.9038,
1916
+ "step": 310000
1917
+ },
1918
+ {
1919
+ "epoch": 0.31,
1920
+ "learning_rate": 8.250382402347065e-05,
1921
+ "loss": 0.8984,
1922
+ "step": 311000
1923
+ },
1924
+ {
1925
+ "epoch": 0.31,
1926
+ "learning_rate": 8.237800451412095e-05,
1927
+ "loss": 0.9051,
1928
+ "step": 312000
1929
+ },
1930
+ {
1931
+ "epoch": 0.31,
1932
+ "learning_rate": 8.225183092410128e-05,
1933
+ "loss": 0.8888,
1934
+ "step": 313000
1935
+ },
1936
+ {
1937
+ "epoch": 0.31,
1938
+ "learning_rate": 8.212530463322583e-05,
1939
+ "loss": 0.8833,
1940
+ "step": 314000
1941
+ },
1942
+ {
1943
+ "epoch": 0.32,
1944
+ "learning_rate": 8.199842702516583e-05,
1945
+ "loss": 0.9158,
1946
+ "step": 315000
1947
+ },
1948
+ {
1949
+ "epoch": 0.32,
1950
+ "learning_rate": 8.18711994874345e-05,
1951
+ "loss": 0.8836,
1952
+ "step": 316000
1953
+ },
1954
+ {
1955
+ "epoch": 0.32,
1956
+ "learning_rate": 8.174362341137177e-05,
1957
+ "loss": 0.9135,
1958
+ "step": 317000
1959
+ },
1960
+ {
1961
+ "epoch": 0.32,
1962
+ "learning_rate": 8.161570019212921e-05,
1963
+ "loss": 0.9094,
1964
+ "step": 318000
1965
+ },
1966
+ {
1967
+ "epoch": 0.32,
1968
+ "learning_rate": 8.148743122865463e-05,
1969
+ "loss": 0.8858,
1970
+ "step": 319000
1971
+ },
1972
+ {
1973
+ "epoch": 0.32,
1974
+ "learning_rate": 8.135881792367686e-05,
1975
+ "loss": 0.8964,
1976
+ "step": 320000
1977
+ },
1978
+ {
1979
+ "epoch": 0.32,
1980
+ "learning_rate": 8.12298616836904e-05,
1981
+ "loss": 0.901,
1982
+ "step": 321000
1983
+ },
1984
+ {
1985
+ "epoch": 0.32,
1986
+ "learning_rate": 8.110056391894005e-05,
1987
+ "loss": 0.9241,
1988
+ "step": 322000
1989
+ },
1990
+ {
1991
+ "epoch": 0.32,
1992
+ "learning_rate": 8.097092604340542e-05,
1993
+ "loss": 0.9077,
1994
+ "step": 323000
1995
+ },
1996
+ {
1997
+ "epoch": 0.32,
1998
+ "learning_rate": 8.084094947478556e-05,
1999
+ "loss": 0.9093,
2000
+ "step": 324000
2001
+ },
2002
+ {
2003
+ "epoch": 0.33,
2004
+ "learning_rate": 8.07106356344834e-05,
2005
+ "loss": 0.9183,
2006
+ "step": 325000
2007
+ },
2008
+ {
2009
+ "epoch": 0.33,
2010
+ "learning_rate": 8.057998594759022e-05,
2011
+ "loss": 0.9342,
2012
+ "step": 326000
2013
+ },
2014
+ {
2015
+ "epoch": 0.33,
2016
+ "learning_rate": 8.044900184287007e-05,
2017
+ "loss": 0.9212,
2018
+ "step": 327000
2019
+ },
2020
+ {
2021
+ "epoch": 0.33,
2022
+ "learning_rate": 8.031768475274413e-05,
2023
+ "loss": 0.9397,
2024
+ "step": 328000
2025
+ },
2026
+ {
2027
+ "epoch": 0.33,
2028
+ "learning_rate": 8.018603611327504e-05,
2029
+ "loss": 0.9342,
2030
+ "step": 329000
2031
+ },
2032
+ {
2033
+ "epoch": 0.33,
2034
+ "learning_rate": 8.005405736415126e-05,
2035
+ "loss": 0.9234,
2036
+ "step": 330000
2037
+ },
2038
+ {
2039
+ "epoch": 0.33,
2040
+ "learning_rate": 7.992174994867123e-05,
2041
+ "loss": 0.9054,
2042
+ "step": 331000
2043
+ },
2044
+ {
2045
+ "epoch": 0.33,
2046
+ "learning_rate": 7.978911531372765e-05,
2047
+ "loss": 0.8913,
2048
+ "step": 332000
2049
+ },
2050
+ {
2051
+ "epoch": 0.33,
2052
+ "learning_rate": 7.965615490979163e-05,
2053
+ "loss": 0.8869,
2054
+ "step": 333000
2055
+ },
2056
+ {
2057
+ "epoch": 0.33,
2058
+ "learning_rate": 7.952287019089685e-05,
2059
+ "loss": 0.911,
2060
+ "step": 334000
2061
+ },
2062
+ {
2063
+ "epoch": 0.34,
2064
+ "learning_rate": 7.938926261462366e-05,
2065
+ "loss": 0.9036,
2066
+ "step": 335000
2067
+ },
2068
+ {
2069
+ "epoch": 0.34,
2070
+ "learning_rate": 7.925533364208309e-05,
2071
+ "loss": 0.8847,
2072
+ "step": 336000
2073
+ },
2074
+ {
2075
+ "epoch": 0.34,
2076
+ "learning_rate": 7.912108473790092e-05,
2077
+ "loss": 0.8967,
2078
+ "step": 337000
2079
+ },
2080
+ {
2081
+ "epoch": 0.34,
2082
+ "learning_rate": 7.898651737020166e-05,
2083
+ "loss": 0.8983,
2084
+ "step": 338000
2085
+ },
2086
+ {
2087
+ "epoch": 0.34,
2088
+ "learning_rate": 7.88516330105925e-05,
2089
+ "loss": 0.9145,
2090
+ "step": 339000
2091
+ },
2092
+ {
2093
+ "epoch": 0.34,
2094
+ "learning_rate": 7.871643313414718e-05,
2095
+ "loss": 0.9213,
2096
+ "step": 340000
2097
+ },
2098
+ {
2099
+ "epoch": 0.34,
2100
+ "learning_rate": 7.858091921938988e-05,
2101
+ "loss": 0.9112,
2102
+ "step": 341000
2103
+ },
2104
+ {
2105
+ "epoch": 0.34,
2106
+ "learning_rate": 7.844509274827907e-05,
2107
+ "loss": 0.9006,
2108
+ "step": 342000
2109
+ },
2110
+ {
2111
+ "epoch": 0.34,
2112
+ "learning_rate": 7.830895520619128e-05,
2113
+ "loss": 0.8925,
2114
+ "step": 343000
2115
+ },
2116
+ {
2117
+ "epoch": 0.34,
2118
+ "learning_rate": 7.817250808190483e-05,
2119
+ "loss": 0.8887,
2120
+ "step": 344000
2121
+ },
2122
+ {
2123
+ "epoch": 0.34,
2124
+ "learning_rate": 7.803575286758364e-05,
2125
+ "loss": 0.8836,
2126
+ "step": 345000
2127
+ },
2128
+ {
2129
+ "epoch": 0.35,
2130
+ "learning_rate": 7.789869105876083e-05,
2131
+ "loss": 0.8685,
2132
+ "step": 346000
2133
+ },
2134
+ {
2135
+ "epoch": 0.35,
2136
+ "learning_rate": 7.776132415432234e-05,
2137
+ "loss": 0.8525,
2138
+ "step": 347000
2139
+ },
2140
+ {
2141
+ "epoch": 0.35,
2142
+ "learning_rate": 7.762365365649067e-05,
2143
+ "loss": 0.8712,
2144
+ "step": 348000
2145
+ },
2146
+ {
2147
+ "epoch": 0.35,
2148
+ "learning_rate": 7.748568107080832e-05,
2149
+ "loss": 0.8501,
2150
+ "step": 349000
2151
+ },
2152
+ {
2153
+ "epoch": 0.35,
2154
+ "learning_rate": 7.734740790612136e-05,
2155
+ "loss": 0.8324,
2156
+ "step": 350000
2157
+ },
2158
+ {
2159
+ "epoch": 0.35,
2160
+ "eval_loss": 0.6422616243362427,
2161
+ "eval_runtime": 114.2202,
2162
+ "eval_samples_per_second": 43.775,
2163
+ "eval_steps_per_second": 0.35,
2164
+ "step": 350000
2165
  }
2166
  ],
2167
  "max_steps": 1000000,
2168
  "num_train_epochs": 9223372036854775807,
2169
+ "total_flos": 5.9024955408384e+18,
2170
  "trial_name": null,
2171
  "trial_params": null
2172
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9227fdd9c0ef08967e965821c42a8882676ccac8307cf5ec9edcb45b212067b3
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1793f13c5a11dd9ecdf84c423d90f2d5ffd02b783b224028e2471ade3abd8577
3
  size 3439
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:916088829e4a33bb63ef8bfaf253f9e54fbcbb1ad1d20931ca2eb4df28f415f8
3
  size 737971755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a4b61df7a33ac16523e6c4ab0a364e641dc3a424040f9c827d0938f4fbd92de
3
  size 737971755
runs/Jan16_18-15-46_t1v-n-9f780742-w-0/events.out.tfevents.1673893144.t1v-n-9f780742-w-0.17747.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b5f7d0f844b38e3b2512f07bdf7eee0b428199d39a0e84817d422ce8694803f
3
- size 53370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd61a437576e6298c63e94b6973159af1207f7aaf2ea0272e76cc033ea2bb0b4
3
+ size 60770
runs/Jan25_00-37-02_t1v-n-9f780742-w-0/1674607228.185891/events.out.tfevents.1674607228.t1v-n-9f780742-w-0.3357200.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093be2fbbd90fdbc631454c2d6aa7ff115dd672c239cb30e5fd535df274e727c
3
+ size 5430
runs/Jan25_00-37-02_t1v-n-9f780742-w-0/events.out.tfevents.1674607228.t1v-n-9f780742-w-0.3357200.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcc882a80b6103706d77991d69242068edc8820d6c9f49efb947dfc73b394f67
3
+ size 12038
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9227fdd9c0ef08967e965821c42a8882676ccac8307cf5ec9edcb45b212067b3
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1793f13c5a11dd9ecdf84c423d90f2d5ffd02b783b224028e2471ade3abd8577
3
  size 3439