ylacombe HF staff commited on
Commit
febb19e
1 Parent(s): 02daf7d

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: mit
3
  base_model: facebook/w2v-bert-2.0
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,11 +17,11 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-bert-CV16-en-libri
17
 
18
- This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.1077
21
- - Wer: 0.0717
22
- - Cer: 0.0197
23
 
24
  ## Model description
25
 
 
2
  license: mit
3
  base_model: facebook/w2v-bert-2.0
4
  tags:
5
+ - automatic-speech-recognition
6
+ - librispeech_asr
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-bert-CV16-en-libri
19
 
20
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the LIBRISPEECH_ASR - CLEAN dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.1035
23
+ - Wer: 0.0708
24
+ - Cer: 0.0194
25
 
26
  ## Model description
27
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_cer": 0.02643822760039138,
4
- "eval_loss": 0.13311129808425903,
5
- "eval_runtime": 59.8158,
6
  "eval_samples": 2528,
7
- "eval_samples_per_second": 42.263,
8
- "eval_steps_per_second": 1.187,
9
- "eval_wer": 0.09965613856342333,
10
- "train_loss": 0.09581804365822763,
11
- "train_runtime": 4900.8909,
12
  "train_samples": 28538,
13
- "train_samples_per_second": 29.115,
14
- "train_steps_per_second": 0.404
15
  }
 
1
  {
2
+ "epoch": 6.99,
3
+ "eval_cer": 0.019377383733700754,
4
+ "eval_loss": 0.10354145616292953,
5
+ "eval_runtime": 59.6491,
6
  "eval_samples": 2528,
7
+ "eval_samples_per_second": 42.381,
8
+ "eval_steps_per_second": 1.19,
9
+ "eval_wer": 0.07078875870266599,
10
+ "train_loss": 0.05688602840165039,
11
+ "train_runtime": 6445.258,
12
  "train_samples": 28538,
13
+ "train_samples_per_second": 30.994,
14
+ "train_steps_per_second": 0.43
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_cer": 0.02643822760039138,
4
- "eval_loss": 0.13311129808425903,
5
- "eval_runtime": 59.8158,
6
  "eval_samples": 2528,
7
- "eval_samples_per_second": 42.263,
8
- "eval_steps_per_second": 1.187,
9
- "eval_wer": 0.09965613856342333
10
  }
 
1
  {
2
+ "epoch": 6.99,
3
+ "eval_cer": 0.019377383733700754,
4
+ "eval_loss": 0.10354145616292953,
5
+ "eval_runtime": 59.6491,
6
  "eval_samples": 2528,
7
+ "eval_samples_per_second": 42.381,
8
+ "eval_steps_per_second": 1.19,
9
+ "eval_wer": 0.07078875870266599
10
  }
runs/Jan16_14-53-45_vorace/events.out.tfevents.1705423620.vorace.510473.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95954bbf24073745de4bd78f01796d56e41c36f6ab40add343e5fee7d55cc556
3
+ size 405
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.09581804365822763,
4
- "train_runtime": 4900.8909,
5
  "train_samples": 28538,
6
- "train_samples_per_second": 29.115,
7
- "train_steps_per_second": 0.404
8
  }
 
1
  {
2
+ "epoch": 6.99,
3
+ "train_loss": 0.05688602840165039,
4
+ "train_runtime": 6445.258,
5
  "train_samples": 28538,
6
+ "train_samples_per_second": 30.994,
7
+ "train_steps_per_second": 0.43
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.996216897856242,
5
  "eval_steps": 250,
6
- "global_step": 1980,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1871,605 +1871,1593 @@
1871
  {
1872
  "epoch": 3.8,
1873
  "learning_rate": 4.497e-06,
1874
- "loss": 0.2137,
1875
  "step": 1505
1876
  },
1877
  {
1878
  "epoch": 3.81,
1879
- "learning_rate": 4.5089999999999995e-06,
1880
- "loss": 0.1759,
1881
  "step": 1510
1882
  },
1883
  {
1884
  "epoch": 3.82,
1885
- "learning_rate": 4.524e-06,
1886
- "loss": 0.1698,
1887
  "step": 1515
1888
  },
1889
  {
1890
  "epoch": 3.84,
1891
- "learning_rate": 4.539e-06,
1892
- "loss": 0.1703,
1893
  "step": 1520
1894
  },
1895
  {
1896
  "epoch": 3.85,
1897
- "learning_rate": 4.554e-06,
1898
- "loss": 0.1841,
1899
  "step": 1525
1900
  },
1901
  {
1902
  "epoch": 3.86,
1903
- "learning_rate": 4.569e-06,
1904
- "loss": 0.1783,
1905
  "step": 1530
1906
  },
1907
  {
1908
- "epoch": 3.87,
1909
- "learning_rate": 4.584e-06,
1910
- "loss": 0.1633,
1911
  "step": 1535
1912
  },
1913
  {
1914
  "epoch": 3.89,
1915
  "learning_rate": 4.5989999999999995e-06,
1916
- "loss": 0.1839,
1917
  "step": 1540
1918
  },
1919
  {
1920
  "epoch": 3.9,
1921
  "learning_rate": 4.614e-06,
1922
- "loss": 0.153,
1923
  "step": 1545
1924
  },
1925
  {
1926
  "epoch": 3.91,
1927
  "learning_rate": 4.629e-06,
1928
- "loss": 0.1553,
1929
  "step": 1550
1930
  },
1931
  {
1932
- "epoch": 3.92,
1933
  "learning_rate": 4.644e-06,
1934
- "loss": 0.1834,
1935
  "step": 1555
1936
  },
1937
  {
1938
  "epoch": 3.94,
1939
- "learning_rate": 4.659e-06,
1940
- "loss": 0.1582,
1941
  "step": 1560
1942
  },
1943
  {
1944
  "epoch": 3.95,
1945
- "learning_rate": 4.674e-06,
1946
- "loss": 0.1592,
1947
  "step": 1565
1948
  },
1949
  {
1950
  "epoch": 3.96,
1951
- "learning_rate": 4.689e-06,
1952
- "loss": 0.1823,
1953
  "step": 1570
1954
  },
1955
  {
1956
- "epoch": 3.97,
1957
- "learning_rate": 4.704e-06,
1958
- "loss": 0.1782,
1959
  "step": 1575
1960
  },
1961
  {
1962
  "epoch": 3.99,
1963
- "learning_rate": 4.719e-06,
1964
- "loss": 0.1543,
1965
  "step": 1580
1966
  },
1967
  {
1968
  "epoch": 4.0,
1969
- "learning_rate": 4.734e-06,
1970
- "loss": 0.1943,
1971
  "step": 1585
1972
  },
1973
  {
1974
  "epoch": 4.01,
1975
- "learning_rate": 4.749e-06,
1976
- "loss": 0.1491,
1977
  "step": 1590
1978
  },
1979
  {
1980
  "epoch": 4.03,
1981
- "learning_rate": 4.764e-06,
1982
- "loss": 0.1656,
1983
  "step": 1595
1984
  },
1985
  {
1986
  "epoch": 4.04,
1987
- "learning_rate": 4.779e-06,
1988
- "loss": 0.1507,
1989
  "step": 1600
1990
  },
1991
  {
1992
  "epoch": 4.05,
1993
- "learning_rate": 4.794e-06,
1994
- "loss": 0.1589,
1995
  "step": 1605
1996
  },
1997
  {
1998
  "epoch": 4.06,
1999
- "learning_rate": 4.809e-06,
2000
- "loss": 0.1443,
2001
  "step": 1610
2002
  },
2003
  {
2004
  "epoch": 4.08,
2005
- "learning_rate": 4.824e-06,
2006
- "loss": 0.1487,
2007
  "step": 1615
2008
  },
2009
  {
2010
  "epoch": 4.09,
2011
- "learning_rate": 4.839e-06,
2012
- "loss": 0.1766,
2013
  "step": 1620
2014
  },
2015
  {
2016
  "epoch": 4.1,
2017
- "learning_rate": 4.8540000000000005e-06,
2018
- "loss": 0.1548,
2019
  "step": 1625
2020
  },
2021
  {
2022
  "epoch": 4.11,
2023
- "learning_rate": 4.869e-06,
2024
- "loss": 0.1494,
2025
  "step": 1630
2026
  },
2027
  {
2028
  "epoch": 4.13,
2029
- "learning_rate": 4.884e-06,
2030
- "loss": 0.1793,
2031
  "step": 1635
2032
  },
2033
  {
2034
  "epoch": 4.14,
2035
- "learning_rate": 4.899e-06,
2036
- "loss": 0.1417,
2037
  "step": 1640
2038
  },
2039
  {
2040
  "epoch": 4.15,
2041
- "learning_rate": 4.914e-06,
2042
- "loss": 0.1399,
2043
  "step": 1645
2044
  },
2045
  {
2046
- "epoch": 4.16,
2047
- "learning_rate": 4.929000000000001e-06,
2048
- "loss": 0.166,
2049
  "step": 1650
2050
  },
2051
  {
2052
  "epoch": 4.18,
2053
- "learning_rate": 4.9440000000000004e-06,
2054
- "loss": 0.1748,
2055
  "step": 1655
2056
  },
2057
  {
2058
  "epoch": 4.19,
2059
- "learning_rate": 4.959e-06,
2060
- "loss": 0.1378,
2061
  "step": 1660
2062
  },
2063
  {
2064
  "epoch": 4.2,
2065
- "learning_rate": 4.974e-06,
2066
- "loss": 0.144,
2067
  "step": 1665
2068
  },
2069
  {
2070
- "epoch": 4.21,
2071
- "learning_rate": 4.989e-06,
2072
- "loss": 0.1794,
2073
  "step": 1670
2074
  },
2075
  {
2076
  "epoch": 4.23,
2077
- "learning_rate": 5.004e-06,
2078
- "loss": 0.146,
2079
  "step": 1675
2080
  },
2081
  {
2082
  "epoch": 4.24,
2083
- "learning_rate": 5.0190000000000006e-06,
2084
- "loss": 0.141,
2085
  "step": 1680
2086
  },
2087
  {
2088
  "epoch": 4.25,
2089
- "learning_rate": 5.034e-06,
2090
- "loss": 0.1757,
2091
  "step": 1685
2092
  },
2093
  {
2094
- "epoch": 4.26,
2095
- "learning_rate": 5.049e-06,
2096
- "loss": 0.1423,
2097
  "step": 1690
2098
  },
2099
  {
2100
  "epoch": 4.28,
2101
- "learning_rate": 5.064e-06,
2102
- "loss": 0.1414,
2103
  "step": 1695
2104
  },
2105
  {
2106
  "epoch": 4.29,
2107
- "learning_rate": 5.079e-06,
2108
- "loss": 0.1556,
2109
  "step": 1700
2110
  },
2111
  {
2112
  "epoch": 4.3,
2113
- "learning_rate": 5.094000000000001e-06,
2114
- "loss": 0.1599,
2115
  "step": 1705
2116
  },
2117
  {
2118
  "epoch": 4.32,
2119
- "learning_rate": 5.1090000000000006e-06,
2120
- "loss": 0.1436,
2121
  "step": 1710
2122
  },
2123
  {
2124
  "epoch": 4.33,
2125
- "learning_rate": 5.124e-06,
2126
- "loss": 0.1346,
2127
  "step": 1715
2128
  },
2129
  {
2130
  "epoch": 4.34,
2131
- "learning_rate": 5.139e-06,
2132
- "loss": 0.1702,
2133
  "step": 1720
2134
  },
2135
  {
2136
  "epoch": 4.35,
2137
- "learning_rate": 5.154e-06,
2138
- "loss": 0.1397,
2139
  "step": 1725
2140
  },
2141
  {
2142
  "epoch": 4.37,
2143
- "learning_rate": 5.169e-06,
2144
- "loss": 0.1373,
2145
  "step": 1730
2146
  },
2147
  {
2148
  "epoch": 4.38,
2149
- "learning_rate": 5.184000000000001e-06,
2150
- "loss": 0.1929,
2151
  "step": 1735
2152
  },
2153
  {
2154
  "epoch": 4.39,
2155
- "learning_rate": 5.1990000000000005e-06,
2156
- "loss": 0.1426,
2157
  "step": 1740
2158
  },
2159
  {
2160
  "epoch": 4.4,
2161
- "learning_rate": 5.214e-06,
2162
- "loss": 0.1481,
2163
  "step": 1745
2164
  },
2165
  {
2166
  "epoch": 4.42,
2167
- "learning_rate": 5.229e-06,
2168
- "loss": 0.163,
2169
  "step": 1750
2170
  },
2171
  {
2172
  "epoch": 4.42,
2173
- "eval_cer": 0.029768965035244312,
2174
- "eval_loss": 0.1489591896533966,
2175
- "eval_runtime": 60.6618,
2176
- "eval_samples_per_second": 41.674,
2177
- "eval_steps_per_second": 1.17,
2178
- "eval_wer": 0.1163397860417728,
2179
  "step": 1750
2180
  },
2181
  {
2182
  "epoch": 4.43,
2183
- "learning_rate": 5.244e-06,
2184
- "loss": 0.1816,
2185
  "step": 1755
2186
  },
2187
  {
2188
  "epoch": 4.44,
2189
- "learning_rate": 5.259000000000001e-06,
2190
  "loss": 0.1444,
2191
  "step": 1760
2192
  },
2193
  {
2194
- "epoch": 4.45,
2195
- "learning_rate": 5.274000000000001e-06,
2196
- "loss": 0.1423,
2197
  "step": 1765
2198
  },
2199
  {
2200
  "epoch": 4.47,
2201
- "learning_rate": 5.2890000000000005e-06,
2202
- "loss": 0.1605,
2203
  "step": 1770
2204
  },
2205
  {
2206
  "epoch": 4.48,
2207
- "learning_rate": 5.304e-06,
2208
- "loss": 0.1355,
2209
  "step": 1775
2210
  },
2211
  {
2212
  "epoch": 4.49,
2213
- "learning_rate": 5.319e-06,
2214
- "loss": 0.1401,
2215
  "step": 1780
2216
  },
2217
  {
2218
- "epoch": 4.5,
2219
- "learning_rate": 5.334000000000001e-06,
2220
- "loss": 0.1593,
2221
  "step": 1785
2222
  },
2223
  {
2224
  "epoch": 4.52,
2225
- "learning_rate": 5.349e-06,
2226
- "loss": 0.1361,
2227
  "step": 1790
2228
  },
2229
  {
2230
  "epoch": 4.53,
2231
- "learning_rate": 5.364e-06,
2232
- "loss": 0.1363,
2233
  "step": 1795
2234
  },
2235
  {
2236
  "epoch": 4.54,
2237
- "learning_rate": 5.379e-06,
2238
- "loss": 0.1527,
2239
  "step": 1800
2240
  },
2241
  {
2242
- "epoch": 4.55,
2243
- "learning_rate": 5.3939999999999995e-06,
2244
- "loss": 0.1645,
2245
  "step": 1805
2246
  },
2247
  {
2248
  "epoch": 4.57,
2249
- "learning_rate": 5.408999999999999e-06,
2250
- "loss": 0.1285,
2251
  "step": 1810
2252
  },
2253
  {
2254
  "epoch": 4.58,
2255
- "learning_rate": 5.424e-06,
2256
- "loss": 0.1366,
2257
  "step": 1815
2258
  },
2259
  {
2260
  "epoch": 4.59,
2261
- "learning_rate": 5.439e-06,
2262
- "loss": 0.1808,
2263
  "step": 1820
2264
  },
2265
  {
2266
  "epoch": 4.61,
2267
- "learning_rate": 5.454e-06,
2268
- "loss": 0.1428,
2269
  "step": 1825
2270
  },
2271
  {
2272
  "epoch": 4.62,
2273
- "learning_rate": 5.469e-06,
2274
- "loss": 0.1314,
2275
  "step": 1830
2276
  },
2277
  {
2278
  "epoch": 4.63,
2279
- "learning_rate": 5.4839999999999995e-06,
2280
- "loss": 0.1479,
2281
  "step": 1835
2282
  },
2283
  {
2284
  "epoch": 4.64,
2285
- "learning_rate": 5.499e-06,
2286
- "loss": 0.135,
2287
  "step": 1840
2288
  },
2289
  {
2290
  "epoch": 4.66,
2291
- "learning_rate": 5.514e-06,
2292
- "loss": 0.1253,
2293
  "step": 1845
2294
  },
2295
  {
2296
  "epoch": 4.67,
2297
- "learning_rate": 5.529e-06,
2298
- "loss": 0.13,
2299
  "step": 1850
2300
  },
2301
  {
2302
  "epoch": 4.68,
2303
- "learning_rate": 5.544e-06,
2304
- "loss": 0.1484,
2305
  "step": 1855
2306
  },
2307
  {
2308
  "epoch": 4.69,
2309
- "learning_rate": 5.559e-06,
2310
- "loss": 0.1201,
2311
  "step": 1860
2312
  },
2313
  {
2314
  "epoch": 4.71,
2315
- "learning_rate": 5.574e-06,
2316
- "loss": 0.1263,
2317
  "step": 1865
2318
  },
2319
  {
2320
  "epoch": 4.72,
2321
- "learning_rate": 5.589e-06,
2322
- "loss": 0.1493,
2323
  "step": 1870
2324
  },
2325
  {
2326
  "epoch": 4.73,
2327
- "learning_rate": 5.604e-06,
2328
- "loss": 0.1276,
2329
  "step": 1875
2330
  },
2331
  {
2332
- "epoch": 4.74,
2333
- "learning_rate": 5.619e-06,
2334
- "loss": 0.1236,
2335
  "step": 1880
2336
  },
2337
  {
2338
  "epoch": 4.76,
2339
- "learning_rate": 5.634e-06,
2340
- "loss": 0.1718,
2341
  "step": 1885
2342
  },
2343
  {
2344
  "epoch": 4.77,
2345
- "learning_rate": 5.649e-06,
2346
- "loss": 0.1292,
2347
  "step": 1890
2348
  },
2349
  {
2350
  "epoch": 4.78,
2351
- "learning_rate": 5.664e-06,
2352
- "loss": 0.1175,
2353
  "step": 1895
2354
  },
2355
  {
2356
- "epoch": 4.79,
2357
- "learning_rate": 5.679e-06,
2358
- "loss": 0.1317,
2359
  "step": 1900
2360
  },
2361
  {
2362
  "epoch": 4.81,
2363
- "learning_rate": 5.694e-06,
2364
- "loss": 0.1412,
2365
  "step": 1905
2366
  },
2367
  {
2368
  "epoch": 4.82,
2369
- "learning_rate": 5.709e-06,
2370
- "loss": 0.1222,
2371
  "step": 1910
2372
  },
2373
  {
2374
  "epoch": 4.83,
2375
- "learning_rate": 5.724e-06,
2376
- "loss": 0.1193,
2377
  "step": 1915
2378
  },
2379
  {
2380
- "epoch": 4.84,
2381
- "learning_rate": 5.7390000000000004e-06,
2382
- "loss": 0.16,
2383
  "step": 1920
2384
  },
2385
  {
2386
  "epoch": 4.86,
2387
- "learning_rate": 5.754e-06,
2388
- "loss": 0.1228,
2389
  "step": 1925
2390
  },
2391
  {
2392
  "epoch": 4.87,
2393
- "learning_rate": 5.769e-06,
2394
- "loss": 0.1195,
2395
  "step": 1930
2396
  },
2397
  {
2398
  "epoch": 4.88,
2399
- "learning_rate": 5.784e-06,
2400
- "loss": 0.1608,
2401
  "step": 1935
2402
  },
2403
  {
2404
  "epoch": 4.9,
2405
- "learning_rate": 5.799e-06,
2406
- "loss": 0.1234,
2407
  "step": 1940
2408
  },
2409
  {
2410
  "epoch": 4.91,
2411
- "learning_rate": 5.814e-06,
2412
- "loss": 0.1265,
2413
  "step": 1945
2414
  },
2415
  {
2416
  "epoch": 4.92,
2417
- "learning_rate": 5.8290000000000004e-06,
2418
- "loss": 0.1431,
2419
  "step": 1950
2420
  },
2421
  {
2422
  "epoch": 4.93,
2423
- "learning_rate": 5.844e-06,
2424
- "loss": 0.1366,
2425
  "step": 1955
2426
  },
2427
  {
2428
  "epoch": 4.95,
2429
- "learning_rate": 5.859e-06,
2430
- "loss": 0.1091,
2431
  "step": 1960
2432
  },
2433
  {
2434
  "epoch": 4.96,
2435
- "learning_rate": 5.874e-06,
2436
- "loss": 0.139,
2437
  "step": 1965
2438
  },
2439
  {
2440
  "epoch": 4.97,
2441
  "learning_rate": 5.886000000000001e-06,
2442
- "loss": 0.1726,
2443
  "step": 1970
2444
  },
2445
  {
2446
  "epoch": 4.98,
2447
  "learning_rate": 5.901000000000001e-06,
2448
- "loss": 0.1148,
2449
  "step": 1975
2450
  },
2451
  {
2452
  "epoch": 5.0,
2453
  "learning_rate": 5.916e-06,
2454
- "loss": 0.1296,
2455
  "step": 1980
2456
  },
2457
  {
2458
- "epoch": 5.0,
2459
- "step": 1980,
2460
- "total_flos": 5.2867996741439324e+19,
2461
- "train_loss": 0.09581804365822763,
2462
- "train_runtime": 4900.8909,
2463
- "train_samples_per_second": 29.115,
2464
- "train_steps_per_second": 0.404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2465
  }
2466
  ],
2467
  "logging_steps": 5,
2468
- "max_steps": 1980,
2469
  "num_input_tokens_seen": 0,
2470
- "num_train_epochs": 5,
2471
  "save_steps": 500,
2472
- "total_flos": 5.2867996741439324e+19,
2473
  "train_batch_size": 12,
2474
  "trial_name": null,
2475
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.994955863808323,
5
  "eval_steps": 250,
6
+ "global_step": 2772,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1871
  {
1872
  "epoch": 3.8,
1873
  "learning_rate": 4.497e-06,
1874
+ "loss": 0.2097,
1875
  "step": 1505
1876
  },
1877
  {
1878
  "epoch": 3.81,
1879
+ "learning_rate": 4.512e-06,
1880
+ "loss": 0.1645,
1881
  "step": 1510
1882
  },
1883
  {
1884
  "epoch": 3.82,
1885
+ "learning_rate": 4.527e-06,
1886
+ "loss": 0.163,
1887
  "step": 1515
1888
  },
1889
  {
1890
  "epoch": 3.84,
1891
+ "learning_rate": 4.542000000000001e-06,
1892
+ "loss": 0.1676,
1893
  "step": 1520
1894
  },
1895
  {
1896
  "epoch": 3.85,
1897
+ "learning_rate": 4.557000000000001e-06,
1898
+ "loss": 0.1946,
1899
  "step": 1525
1900
  },
1901
  {
1902
  "epoch": 3.86,
1903
+ "learning_rate": 4.5720000000000004e-06,
1904
+ "loss": 0.1582,
1905
  "step": 1530
1906
  },
1907
  {
1908
+ "epoch": 3.88,
1909
+ "learning_rate": 4.587e-06,
1910
+ "loss": 0.1588,
1911
  "step": 1535
1912
  },
1913
  {
1914
  "epoch": 3.89,
1915
  "learning_rate": 4.5989999999999995e-06,
1916
+ "loss": 0.1936,
1917
  "step": 1540
1918
  },
1919
  {
1920
  "epoch": 3.9,
1921
  "learning_rate": 4.614e-06,
1922
+ "loss": 0.1525,
1923
  "step": 1545
1924
  },
1925
  {
1926
  "epoch": 3.91,
1927
  "learning_rate": 4.629e-06,
1928
+ "loss": 0.1456,
1929
  "step": 1550
1930
  },
1931
  {
1932
+ "epoch": 3.93,
1933
  "learning_rate": 4.644e-06,
1934
+ "loss": 0.1883,
1935
  "step": 1555
1936
  },
1937
  {
1938
  "epoch": 3.94,
1939
+ "learning_rate": 4.656e-06,
1940
+ "loss": 0.1538,
1941
  "step": 1560
1942
  },
1943
  {
1944
  "epoch": 3.95,
1945
+ "learning_rate": 4.671000000000001e-06,
1946
+ "loss": 0.1621,
1947
  "step": 1565
1948
  },
1949
  {
1950
  "epoch": 3.96,
1951
+ "learning_rate": 4.6860000000000005e-06,
1952
+ "loss": 0.1677,
1953
  "step": 1570
1954
  },
1955
  {
1956
+ "epoch": 3.98,
1957
+ "learning_rate": 4.701e-06,
1958
+ "loss": 0.1697,
1959
  "step": 1575
1960
  },
1961
  {
1962
  "epoch": 3.99,
1963
+ "learning_rate": 4.716e-06,
1964
+ "loss": 0.1669,
1965
  "step": 1580
1966
  },
1967
  {
1968
  "epoch": 4.0,
1969
+ "learning_rate": 4.731e-06,
1970
+ "loss": 0.1954,
1971
  "step": 1585
1972
  },
1973
  {
1974
  "epoch": 4.01,
1975
+ "learning_rate": 4.746e-06,
1976
+ "loss": 0.1518,
1977
  "step": 1590
1978
  },
1979
  {
1980
  "epoch": 4.03,
1981
+ "learning_rate": 4.761000000000001e-06,
1982
+ "loss": 0.153,
1983
  "step": 1595
1984
  },
1985
  {
1986
  "epoch": 4.04,
1987
+ "learning_rate": 4.7760000000000005e-06,
1988
+ "loss": 0.1736,
1989
  "step": 1600
1990
  },
1991
  {
1992
  "epoch": 4.05,
1993
+ "learning_rate": 4.791e-06,
1994
+ "loss": 0.1748,
1995
  "step": 1605
1996
  },
1997
  {
1998
  "epoch": 4.06,
1999
+ "learning_rate": 4.806e-06,
2000
+ "loss": 0.1545,
2001
  "step": 1610
2002
  },
2003
  {
2004
  "epoch": 4.08,
2005
+ "learning_rate": 4.821e-06,
2006
+ "loss": 0.143,
2007
  "step": 1615
2008
  },
2009
  {
2010
  "epoch": 4.09,
2011
+ "learning_rate": 4.836000000000001e-06,
2012
+ "loss": 0.1808,
2013
  "step": 1620
2014
  },
2015
  {
2016
  "epoch": 4.1,
2017
+ "learning_rate": 4.851000000000001e-06,
2018
+ "loss": 0.166,
2019
  "step": 1625
2020
  },
2021
  {
2022
  "epoch": 4.11,
2023
+ "learning_rate": 4.8660000000000005e-06,
2024
+ "loss": 0.1432,
2025
  "step": 1630
2026
  },
2027
  {
2028
  "epoch": 4.13,
2029
+ "learning_rate": 4.881e-06,
2030
+ "loss": 0.1863,
2031
  "step": 1635
2032
  },
2033
  {
2034
  "epoch": 4.14,
2035
+ "learning_rate": 4.896e-06,
2036
+ "loss": 0.1423,
2037
  "step": 1640
2038
  },
2039
  {
2040
  "epoch": 4.15,
2041
+ "learning_rate": 4.911e-06,
2042
+ "loss": 0.1519,
2043
  "step": 1645
2044
  },
2045
  {
2046
+ "epoch": 4.17,
2047
+ "learning_rate": 4.926000000000001e-06,
2048
+ "loss": 0.169,
2049
  "step": 1650
2050
  },
2051
  {
2052
  "epoch": 4.18,
2053
+ "learning_rate": 4.941000000000001e-06,
2054
+ "loss": 0.1729,
2055
  "step": 1655
2056
  },
2057
  {
2058
  "epoch": 4.19,
2059
+ "learning_rate": 4.9560000000000005e-06,
2060
+ "loss": 0.1585,
2061
  "step": 1660
2062
  },
2063
  {
2064
  "epoch": 4.2,
2065
+ "learning_rate": 4.9709999999999995e-06,
2066
+ "loss": 0.1413,
2067
  "step": 1665
2068
  },
2069
  {
2070
+ "epoch": 4.22,
2071
+ "learning_rate": 4.985999999999999e-06,
2072
+ "loss": 0.1832,
2073
  "step": 1670
2074
  },
2075
  {
2076
  "epoch": 4.23,
2077
+ "learning_rate": 5.001e-06,
2078
+ "loss": 0.143,
2079
  "step": 1675
2080
  },
2081
  {
2082
  "epoch": 4.24,
2083
+ "learning_rate": 5.016e-06,
2084
+ "loss": 0.1527,
2085
  "step": 1680
2086
  },
2087
  {
2088
  "epoch": 4.25,
2089
+ "learning_rate": 5.031e-06,
2090
+ "loss": 0.1669,
2091
  "step": 1685
2092
  },
2093
  {
2094
+ "epoch": 4.27,
2095
+ "learning_rate": 5.046e-06,
2096
+ "loss": 0.1313,
2097
  "step": 1690
2098
  },
2099
  {
2100
  "epoch": 4.28,
2101
+ "learning_rate": 5.0609999999999995e-06,
2102
+ "loss": 0.1273,
2103
  "step": 1695
2104
  },
2105
  {
2106
  "epoch": 4.29,
2107
+ "learning_rate": 5.076e-06,
2108
+ "loss": 0.1433,
2109
  "step": 1700
2110
  },
2111
  {
2112
  "epoch": 4.3,
2113
+ "learning_rate": 5.091e-06,
2114
+ "loss": 0.1579,
2115
  "step": 1705
2116
  },
2117
  {
2118
  "epoch": 4.32,
2119
+ "learning_rate": 5.106e-06,
2120
+ "loss": 0.1415,
2121
  "step": 1710
2122
  },
2123
  {
2124
  "epoch": 4.33,
2125
+ "learning_rate": 5.121e-06,
2126
+ "loss": 0.147,
2127
  "step": 1715
2128
  },
2129
  {
2130
  "epoch": 4.34,
2131
+ "learning_rate": 5.136e-06,
2132
+ "loss": 0.1682,
2133
  "step": 1720
2134
  },
2135
  {
2136
  "epoch": 4.35,
2137
+ "learning_rate": 5.1509999999999995e-06,
2138
+ "loss": 0.1352,
2139
  "step": 1725
2140
  },
2141
  {
2142
  "epoch": 4.37,
2143
+ "learning_rate": 5.166e-06,
2144
+ "loss": 0.14,
2145
  "step": 1730
2146
  },
2147
  {
2148
  "epoch": 4.38,
2149
+ "learning_rate": 5.181e-06,
2150
+ "loss": 0.1724,
2151
  "step": 1735
2152
  },
2153
  {
2154
  "epoch": 4.39,
2155
+ "learning_rate": 5.196e-06,
2156
+ "loss": 0.1442,
2157
  "step": 1740
2158
  },
2159
  {
2160
  "epoch": 4.4,
2161
+ "learning_rate": 5.211e-06,
2162
+ "loss": 0.1316,
2163
  "step": 1745
2164
  },
2165
  {
2166
  "epoch": 4.42,
2167
+ "learning_rate": 5.226e-06,
2168
+ "loss": 0.1546,
2169
  "step": 1750
2170
  },
2171
  {
2172
  "epoch": 4.42,
2173
+ "eval_cer": 0.029413526627927875,
2174
+ "eval_loss": 0.14518263936042786,
2175
+ "eval_runtime": 60.4847,
2176
+ "eval_samples_per_second": 41.796,
2177
+ "eval_steps_per_second": 1.174,
2178
+ "eval_wer": 0.11381389030395653,
2179
  "step": 1750
2180
  },
2181
  {
2182
  "epoch": 4.43,
2183
+ "learning_rate": 5.241e-06,
2184
+ "loss": 0.1528,
2185
  "step": 1755
2186
  },
2187
  {
2188
  "epoch": 4.44,
2189
+ "learning_rate": 5.256e-06,
2190
  "loss": 0.1444,
2191
  "step": 1760
2192
  },
2193
  {
2194
+ "epoch": 4.46,
2195
+ "learning_rate": 5.271e-06,
2196
+ "loss": 0.1514,
2197
  "step": 1765
2198
  },
2199
  {
2200
  "epoch": 4.47,
2201
+ "learning_rate": 5.286e-06,
2202
+ "loss": 0.1924,
2203
  "step": 1770
2204
  },
2205
  {
2206
  "epoch": 4.48,
2207
+ "learning_rate": 5.301e-06,
2208
+ "loss": 0.1223,
2209
  "step": 1775
2210
  },
2211
  {
2212
  "epoch": 4.49,
2213
+ "learning_rate": 5.3160000000000004e-06,
2214
+ "loss": 0.1296,
2215
  "step": 1780
2216
  },
2217
  {
2218
+ "epoch": 4.51,
2219
+ "learning_rate": 5.331e-06,
2220
+ "loss": 0.1668,
2221
  "step": 1785
2222
  },
2223
  {
2224
  "epoch": 4.52,
2225
+ "learning_rate": 5.346e-06,
2226
+ "loss": 0.137,
2227
  "step": 1790
2228
  },
2229
  {
2230
  "epoch": 4.53,
2231
+ "learning_rate": 5.361e-06,
2232
+ "loss": 0.1365,
2233
  "step": 1795
2234
  },
2235
  {
2236
  "epoch": 4.54,
2237
+ "learning_rate": 5.376e-06,
2238
+ "loss": 0.1565,
2239
  "step": 1800
2240
  },
2241
  {
2242
+ "epoch": 4.56,
2243
+ "learning_rate": 5.391e-06,
2244
+ "loss": 0.1421,
2245
  "step": 1805
2246
  },
2247
  {
2248
  "epoch": 4.57,
2249
+ "learning_rate": 5.406e-06,
2250
+ "loss": 0.1418,
2251
  "step": 1810
2252
  },
2253
  {
2254
  "epoch": 4.58,
2255
+ "learning_rate": 5.421e-06,
2256
+ "loss": 0.1415,
2257
  "step": 1815
2258
  },
2259
  {
2260
  "epoch": 4.59,
2261
+ "learning_rate": 5.436e-06,
2262
+ "loss": 0.1851,
2263
  "step": 1820
2264
  },
2265
  {
2266
  "epoch": 4.61,
2267
+ "learning_rate": 5.451e-06,
2268
+ "loss": 0.1354,
2269
  "step": 1825
2270
  },
2271
  {
2272
  "epoch": 4.62,
2273
+ "learning_rate": 5.466e-06,
2274
+ "loss": 0.1344,
2275
  "step": 1830
2276
  },
2277
  {
2278
  "epoch": 4.63,
2279
+ "learning_rate": 5.4810000000000005e-06,
2280
+ "loss": 0.1761,
2281
  "step": 1835
2282
  },
2283
  {
2284
  "epoch": 4.64,
2285
+ "learning_rate": 5.496e-06,
2286
+ "loss": 0.1257,
2287
  "step": 1840
2288
  },
2289
  {
2290
  "epoch": 4.66,
2291
+ "learning_rate": 5.511e-06,
2292
+ "loss": 0.1291,
2293
  "step": 1845
2294
  },
2295
  {
2296
  "epoch": 4.67,
2297
+ "learning_rate": 5.526e-06,
2298
+ "loss": 0.1458,
2299
  "step": 1850
2300
  },
2301
  {
2302
  "epoch": 4.68,
2303
+ "learning_rate": 5.541e-06,
2304
+ "loss": 0.1613,
2305
  "step": 1855
2306
  },
2307
  {
2308
  "epoch": 4.69,
2309
+ "learning_rate": 5.556e-06,
2310
+ "loss": 0.12,
2311
  "step": 1860
2312
  },
2313
  {
2314
  "epoch": 4.71,
2315
+ "learning_rate": 5.5710000000000005e-06,
2316
+ "loss": 0.1333,
2317
  "step": 1865
2318
  },
2319
  {
2320
  "epoch": 4.72,
2321
+ "learning_rate": 5.586e-06,
2322
+ "loss": 0.1639,
2323
  "step": 1870
2324
  },
2325
  {
2326
  "epoch": 4.73,
2327
+ "learning_rate": 5.601e-06,
2328
+ "loss": 0.1361,
2329
  "step": 1875
2330
  },
2331
  {
2332
+ "epoch": 4.75,
2333
+ "learning_rate": 5.616e-06,
2334
+ "loss": 0.1319,
2335
  "step": 1880
2336
  },
2337
  {
2338
  "epoch": 4.76,
2339
+ "learning_rate": 5.631e-06,
2340
+ "loss": 0.1608,
2341
  "step": 1885
2342
  },
2343
  {
2344
  "epoch": 4.77,
2345
+ "learning_rate": 5.646000000000001e-06,
2346
+ "loss": 0.123,
2347
  "step": 1890
2348
  },
2349
  {
2350
  "epoch": 4.78,
2351
+ "learning_rate": 5.6610000000000005e-06,
2352
+ "loss": 0.1208,
2353
  "step": 1895
2354
  },
2355
  {
2356
+ "epoch": 4.8,
2357
+ "learning_rate": 5.676e-06,
2358
+ "loss": 0.1335,
2359
  "step": 1900
2360
  },
2361
  {
2362
  "epoch": 4.81,
2363
+ "learning_rate": 5.691e-06,
2364
+ "loss": 0.1401,
2365
  "step": 1905
2366
  },
2367
  {
2368
  "epoch": 4.82,
2369
+ "learning_rate": 5.706e-06,
2370
+ "loss": 0.1159,
2371
  "step": 1910
2372
  },
2373
  {
2374
  "epoch": 4.83,
2375
+ "learning_rate": 5.721000000000001e-06,
2376
+ "loss": 0.1388,
2377
  "step": 1915
2378
  },
2379
  {
2380
+ "epoch": 4.85,
2381
+ "learning_rate": 5.736000000000001e-06,
2382
+ "loss": 0.1646,
2383
  "step": 1920
2384
  },
2385
  {
2386
  "epoch": 4.86,
2387
+ "learning_rate": 5.7510000000000005e-06,
2388
+ "loss": 0.1265,
2389
  "step": 1925
2390
  },
2391
  {
2392
  "epoch": 4.87,
2393
+ "learning_rate": 5.766e-06,
2394
+ "loss": 0.1182,
2395
  "step": 1930
2396
  },
2397
  {
2398
  "epoch": 4.88,
2399
+ "learning_rate": 5.781e-06,
2400
+ "loss": 0.1615,
2401
  "step": 1935
2402
  },
2403
  {
2404
  "epoch": 4.9,
2405
+ "learning_rate": 5.796e-06,
2406
+ "loss": 0.1244,
2407
  "step": 1940
2408
  },
2409
  {
2410
  "epoch": 4.91,
2411
+ "learning_rate": 5.811000000000001e-06,
2412
+ "loss": 0.1281,
2413
  "step": 1945
2414
  },
2415
  {
2416
  "epoch": 4.92,
2417
+ "learning_rate": 5.826000000000001e-06,
2418
+ "loss": 0.1366,
2419
  "step": 1950
2420
  },
2421
  {
2422
  "epoch": 4.93,
2423
+ "learning_rate": 5.8410000000000005e-06,
2424
+ "loss": 0.1452,
2425
  "step": 1955
2426
  },
2427
  {
2428
  "epoch": 4.95,
2429
+ "learning_rate": 5.856e-06,
2430
+ "loss": 0.1169,
2431
  "step": 1960
2432
  },
2433
  {
2434
  "epoch": 4.96,
2435
+ "learning_rate": 5.871e-06,
2436
+ "loss": 0.1341,
2437
  "step": 1965
2438
  },
2439
  {
2440
  "epoch": 4.97,
2441
  "learning_rate": 5.886000000000001e-06,
2442
+ "loss": 0.1373,
2443
  "step": 1970
2444
  },
2445
  {
2446
  "epoch": 4.98,
2447
  "learning_rate": 5.901000000000001e-06,
2448
+ "loss": 0.1185,
2449
  "step": 1975
2450
  },
2451
  {
2452
  "epoch": 5.0,
2453
  "learning_rate": 5.916e-06,
2454
+ "loss": 0.132,
2455
  "step": 1980
2456
  },
2457
  {
2458
+ "epoch": 5.01,
2459
+ "learning_rate": 5.931e-06,
2460
+ "loss": 0.1279,
2461
+ "step": 1985
2462
+ },
2463
+ {
2464
+ "epoch": 5.02,
2465
+ "learning_rate": 5.9459999999999995e-06,
2466
+ "loss": 0.1163,
2467
+ "step": 1990
2468
+ },
2469
+ {
2470
+ "epoch": 5.04,
2471
+ "learning_rate": 5.961e-06,
2472
+ "loss": 0.121,
2473
+ "step": 1995
2474
+ },
2475
+ {
2476
+ "epoch": 5.05,
2477
+ "learning_rate": 5.976e-06,
2478
+ "loss": 0.1245,
2479
+ "step": 2000
2480
+ },
2481
+ {
2482
+ "epoch": 5.05,
2483
+ "eval_cer": 0.025970965873919206,
2484
+ "eval_loss": 0.1316203624010086,
2485
+ "eval_runtime": 59.578,
2486
+ "eval_samples_per_second": 42.432,
2487
+ "eval_steps_per_second": 1.192,
2488
+ "eval_wer": 0.09734250297164204,
2489
+ "step": 2000
2490
+ },
2491
+ {
2492
+ "epoch": 5.06,
2493
+ "learning_rate": 5.991e-06,
2494
+ "loss": 0.117,
2495
+ "step": 2005
2496
+ },
2497
+ {
2498
+ "epoch": 5.07,
2499
+ "learning_rate": 6.006e-06,
2500
+ "loss": 0.115,
2501
+ "step": 2010
2502
+ },
2503
+ {
2504
+ "epoch": 5.09,
2505
+ "learning_rate": 6.021e-06,
2506
+ "loss": 0.1416,
2507
+ "step": 2015
2508
+ },
2509
+ {
2510
+ "epoch": 5.1,
2511
+ "learning_rate": 6.0359999999999995e-06,
2512
+ "loss": 0.1174,
2513
+ "step": 2020
2514
+ },
2515
+ {
2516
+ "epoch": 5.11,
2517
+ "learning_rate": 6.051e-06,
2518
+ "loss": 0.1158,
2519
+ "step": 2025
2520
+ },
2521
+ {
2522
+ "epoch": 5.12,
2523
+ "learning_rate": 6.066e-06,
2524
+ "loss": 0.1249,
2525
+ "step": 2030
2526
+ },
2527
+ {
2528
+ "epoch": 5.14,
2529
+ "learning_rate": 6.081e-06,
2530
+ "loss": 0.1219,
2531
+ "step": 2035
2532
+ },
2533
+ {
2534
+ "epoch": 5.15,
2535
+ "learning_rate": 6.096e-06,
2536
+ "loss": 0.1154,
2537
+ "step": 2040
2538
+ },
2539
+ {
2540
+ "epoch": 5.16,
2541
+ "learning_rate": 6.111e-06,
2542
+ "loss": 0.1172,
2543
+ "step": 2045
2544
+ },
2545
+ {
2546
+ "epoch": 5.17,
2547
+ "learning_rate": 6.126e-06,
2548
+ "loss": 0.1442,
2549
+ "step": 2050
2550
+ },
2551
+ {
2552
+ "epoch": 5.19,
2553
+ "learning_rate": 6.141e-06,
2554
+ "loss": 0.1095,
2555
+ "step": 2055
2556
+ },
2557
+ {
2558
+ "epoch": 5.2,
2559
+ "learning_rate": 6.156e-06,
2560
+ "loss": 0.1127,
2561
+ "step": 2060
2562
+ },
2563
+ {
2564
+ "epoch": 5.21,
2565
+ "learning_rate": 6.171e-06,
2566
+ "loss": 0.1494,
2567
+ "step": 2065
2568
+ },
2569
+ {
2570
+ "epoch": 5.22,
2571
+ "learning_rate": 6.186e-06,
2572
+ "loss": 0.111,
2573
+ "step": 2070
2574
+ },
2575
+ {
2576
+ "epoch": 5.24,
2577
+ "learning_rate": 6.201e-06,
2578
+ "loss": 0.106,
2579
+ "step": 2075
2580
+ },
2581
+ {
2582
+ "epoch": 5.25,
2583
+ "learning_rate": 6.216e-06,
2584
+ "loss": 0.1269,
2585
+ "step": 2080
2586
+ },
2587
+ {
2588
+ "epoch": 5.26,
2589
+ "learning_rate": 6.231e-06,
2590
+ "loss": 0.1254,
2591
+ "step": 2085
2592
+ },
2593
+ {
2594
+ "epoch": 5.27,
2595
+ "learning_rate": 6.246e-06,
2596
+ "loss": 0.1137,
2597
+ "step": 2090
2598
+ },
2599
+ {
2600
+ "epoch": 5.29,
2601
+ "learning_rate": 6.261e-06,
2602
+ "loss": 0.1229,
2603
+ "step": 2095
2604
+ },
2605
+ {
2606
+ "epoch": 5.3,
2607
+ "learning_rate": 6.276e-06,
2608
+ "loss": 0.1341,
2609
+ "step": 2100
2610
+ },
2611
+ {
2612
+ "epoch": 5.31,
2613
+ "learning_rate": 6.291e-06,
2614
+ "loss": 0.1174,
2615
+ "step": 2105
2616
+ },
2617
+ {
2618
+ "epoch": 5.33,
2619
+ "learning_rate": 6.306e-06,
2620
+ "loss": 0.1136,
2621
+ "step": 2110
2622
+ },
2623
+ {
2624
+ "epoch": 5.34,
2625
+ "learning_rate": 6.321e-06,
2626
+ "loss": 0.1498,
2627
+ "step": 2115
2628
+ },
2629
+ {
2630
+ "epoch": 5.35,
2631
+ "learning_rate": 6.336e-06,
2632
+ "loss": 0.1035,
2633
+ "step": 2120
2634
+ },
2635
+ {
2636
+ "epoch": 5.36,
2637
+ "learning_rate": 6.351e-06,
2638
+ "loss": 0.1065,
2639
+ "step": 2125
2640
+ },
2641
+ {
2642
+ "epoch": 5.38,
2643
+ "learning_rate": 6.3660000000000005e-06,
2644
+ "loss": 0.1331,
2645
+ "step": 2130
2646
+ },
2647
+ {
2648
+ "epoch": 5.39,
2649
+ "learning_rate": 6.381e-06,
2650
+ "loss": 0.1261,
2651
+ "step": 2135
2652
+ },
2653
+ {
2654
+ "epoch": 5.4,
2655
+ "learning_rate": 6.396e-06,
2656
+ "loss": 0.105,
2657
+ "step": 2140
2658
+ },
2659
+ {
2660
+ "epoch": 5.41,
2661
+ "learning_rate": 6.411e-06,
2662
+ "loss": 0.1193,
2663
+ "step": 2145
2664
+ },
2665
+ {
2666
+ "epoch": 5.43,
2667
+ "learning_rate": 6.426e-06,
2668
+ "loss": 0.1445,
2669
+ "step": 2150
2670
+ },
2671
+ {
2672
+ "epoch": 5.44,
2673
+ "learning_rate": 6.441e-06,
2674
+ "loss": 0.1145,
2675
+ "step": 2155
2676
+ },
2677
+ {
2678
+ "epoch": 5.45,
2679
+ "learning_rate": 6.4560000000000005e-06,
2680
+ "loss": 0.1091,
2681
+ "step": 2160
2682
+ },
2683
+ {
2684
+ "epoch": 5.46,
2685
+ "learning_rate": 6.471e-06,
2686
+ "loss": 0.154,
2687
+ "step": 2165
2688
+ },
2689
+ {
2690
+ "epoch": 5.48,
2691
+ "learning_rate": 6.486e-06,
2692
+ "loss": 0.1198,
2693
+ "step": 2170
2694
+ },
2695
+ {
2696
+ "epoch": 5.49,
2697
+ "learning_rate": 6.501e-06,
2698
+ "loss": 0.0978,
2699
+ "step": 2175
2700
+ },
2701
+ {
2702
+ "epoch": 5.5,
2703
+ "learning_rate": 6.516e-06,
2704
+ "loss": 0.114,
2705
+ "step": 2180
2706
+ },
2707
+ {
2708
+ "epoch": 5.51,
2709
+ "learning_rate": 6.531000000000001e-06,
2710
+ "loss": 0.1289,
2711
+ "step": 2185
2712
+ },
2713
+ {
2714
+ "epoch": 5.53,
2715
+ "learning_rate": 6.5460000000000005e-06,
2716
+ "loss": 0.1059,
2717
+ "step": 2190
2718
+ },
2719
+ {
2720
+ "epoch": 5.54,
2721
+ "learning_rate": 6.561e-06,
2722
+ "loss": 0.1077,
2723
+ "step": 2195
2724
+ },
2725
+ {
2726
+ "epoch": 5.55,
2727
+ "learning_rate": 6.576e-06,
2728
+ "loss": 0.1407,
2729
+ "step": 2200
2730
+ },
2731
+ {
2732
+ "epoch": 5.56,
2733
+ "learning_rate": 6.591e-06,
2734
+ "loss": 0.1077,
2735
+ "step": 2205
2736
+ },
2737
+ {
2738
+ "epoch": 5.58,
2739
+ "learning_rate": 6.606000000000001e-06,
2740
+ "loss": 0.1089,
2741
+ "step": 2210
2742
+ },
2743
+ {
2744
+ "epoch": 5.59,
2745
+ "learning_rate": 6.621000000000001e-06,
2746
+ "loss": 0.1244,
2747
+ "step": 2215
2748
+ },
2749
+ {
2750
+ "epoch": 5.6,
2751
+ "learning_rate": 6.6360000000000005e-06,
2752
+ "loss": 0.1006,
2753
+ "step": 2220
2754
+ },
2755
+ {
2756
+ "epoch": 5.62,
2757
+ "learning_rate": 6.651e-06,
2758
+ "loss": 0.1124,
2759
+ "step": 2225
2760
+ },
2761
+ {
2762
+ "epoch": 5.63,
2763
+ "learning_rate": 6.666e-06,
2764
+ "loss": 0.1202,
2765
+ "step": 2230
2766
+ },
2767
+ {
2768
+ "epoch": 5.64,
2769
+ "learning_rate": 6.681e-06,
2770
+ "loss": 0.1208,
2771
+ "step": 2235
2772
+ },
2773
+ {
2774
+ "epoch": 5.65,
2775
+ "learning_rate": 6.696000000000001e-06,
2776
+ "loss": 0.0993,
2777
+ "step": 2240
2778
+ },
2779
+ {
2780
+ "epoch": 5.67,
2781
+ "learning_rate": 6.711000000000001e-06,
2782
+ "loss": 0.1111,
2783
+ "step": 2245
2784
+ },
2785
+ {
2786
+ "epoch": 5.68,
2787
+ "learning_rate": 6.7260000000000005e-06,
2788
+ "loss": 0.1341,
2789
+ "step": 2250
2790
+ },
2791
+ {
2792
+ "epoch": 5.68,
2793
+ "eval_cer": 0.023351105253699156,
2794
+ "eval_loss": 0.11955570429563522,
2795
+ "eval_runtime": 60.3253,
2796
+ "eval_samples_per_second": 41.906,
2797
+ "eval_steps_per_second": 1.177,
2798
+ "eval_wer": 0.08670826965528952,
2799
+ "step": 2250
2800
+ },
2801
+ {
2802
+ "epoch": 5.69,
2803
+ "learning_rate": 6.741e-06,
2804
+ "loss": 0.0995,
2805
+ "step": 2255
2806
+ },
2807
+ {
2808
+ "epoch": 5.7,
2809
+ "learning_rate": 6.756e-06,
2810
+ "loss": 0.1081,
2811
+ "step": 2260
2812
+ },
2813
+ {
2814
+ "epoch": 5.72,
2815
+ "learning_rate": 6.771000000000001e-06,
2816
+ "loss": 0.1339,
2817
+ "step": 2265
2818
+ },
2819
+ {
2820
+ "epoch": 5.73,
2821
+ "learning_rate": 6.786000000000001e-06,
2822
+ "loss": 0.1044,
2823
+ "step": 2270
2824
+ },
2825
+ {
2826
+ "epoch": 5.74,
2827
+ "learning_rate": 6.801000000000001e-06,
2828
+ "loss": 0.1029,
2829
+ "step": 2275
2830
+ },
2831
+ {
2832
+ "epoch": 5.75,
2833
+ "learning_rate": 6.8160000000000005e-06,
2834
+ "loss": 0.1184,
2835
+ "step": 2280
2836
+ },
2837
+ {
2838
+ "epoch": 5.77,
2839
+ "learning_rate": 6.831e-06,
2840
+ "loss": 0.1163,
2841
+ "step": 2285
2842
+ },
2843
+ {
2844
+ "epoch": 5.78,
2845
+ "learning_rate": 6.845999999999999e-06,
2846
+ "loss": 0.1023,
2847
+ "step": 2290
2848
+ },
2849
+ {
2850
+ "epoch": 5.79,
2851
+ "learning_rate": 6.861e-06,
2852
+ "loss": 0.1078,
2853
+ "step": 2295
2854
+ },
2855
+ {
2856
+ "epoch": 5.8,
2857
+ "learning_rate": 6.876e-06,
2858
+ "loss": 0.1242,
2859
+ "step": 2300
2860
+ },
2861
+ {
2862
+ "epoch": 5.82,
2863
+ "learning_rate": 6.891e-06,
2864
+ "loss": 0.1036,
2865
+ "step": 2305
2866
+ },
2867
+ {
2868
+ "epoch": 5.83,
2869
+ "learning_rate": 6.906e-06,
2870
+ "loss": 0.109,
2871
+ "step": 2310
2872
+ },
2873
+ {
2874
+ "epoch": 5.84,
2875
+ "learning_rate": 6.9209999999999995e-06,
2876
+ "loss": 0.1163,
2877
+ "step": 2315
2878
+ },
2879
+ {
2880
+ "epoch": 5.85,
2881
+ "learning_rate": 6.936e-06,
2882
+ "loss": 0.0939,
2883
+ "step": 2320
2884
+ },
2885
+ {
2886
+ "epoch": 5.87,
2887
+ "learning_rate": 6.951e-06,
2888
+ "loss": 0.1015,
2889
+ "step": 2325
2890
+ },
2891
+ {
2892
+ "epoch": 5.88,
2893
+ "learning_rate": 6.966e-06,
2894
+ "loss": 0.112,
2895
+ "step": 2330
2896
+ },
2897
+ {
2898
+ "epoch": 5.89,
2899
+ "learning_rate": 6.981e-06,
2900
+ "loss": 0.1072,
2901
+ "step": 2335
2902
+ },
2903
+ {
2904
+ "epoch": 5.91,
2905
+ "learning_rate": 6.996e-06,
2906
+ "loss": 0.109,
2907
+ "step": 2340
2908
+ },
2909
+ {
2910
+ "epoch": 5.92,
2911
+ "learning_rate": 7.011e-06,
2912
+ "loss": 0.1115,
2913
+ "step": 2345
2914
+ },
2915
+ {
2916
+ "epoch": 5.93,
2917
+ "learning_rate": 7.026e-06,
2918
+ "loss": 0.1295,
2919
+ "step": 2350
2920
+ },
2921
+ {
2922
+ "epoch": 5.94,
2923
+ "learning_rate": 7.041e-06,
2924
+ "loss": 0.094,
2925
+ "step": 2355
2926
+ },
2927
+ {
2928
+ "epoch": 5.96,
2929
+ "learning_rate": 7.056e-06,
2930
+ "loss": 0.1087,
2931
+ "step": 2360
2932
+ },
2933
+ {
2934
+ "epoch": 5.97,
2935
+ "learning_rate": 7.071e-06,
2936
+ "loss": 0.1491,
2937
+ "step": 2365
2938
+ },
2939
+ {
2940
+ "epoch": 5.98,
2941
+ "learning_rate": 7.086e-06,
2942
+ "loss": 0.1048,
2943
+ "step": 2370
2944
+ },
2945
+ {
2946
+ "epoch": 5.99,
2947
+ "learning_rate": 7.101e-06,
2948
+ "loss": 0.0986,
2949
+ "step": 2375
2950
+ },
2951
+ {
2952
+ "epoch": 6.01,
2953
+ "learning_rate": 7.116e-06,
2954
+ "loss": 0.1152,
2955
+ "step": 2380
2956
+ },
2957
+ {
2958
+ "epoch": 6.02,
2959
+ "learning_rate": 7.131e-06,
2960
+ "loss": 0.0956,
2961
+ "step": 2385
2962
+ },
2963
+ {
2964
+ "epoch": 6.03,
2965
+ "learning_rate": 7.146e-06,
2966
+ "loss": 0.0967,
2967
+ "step": 2390
2968
+ },
2969
+ {
2970
+ "epoch": 6.04,
2971
+ "learning_rate": 7.161e-06,
2972
+ "loss": 0.1229,
2973
+ "step": 2395
2974
+ },
2975
+ {
2976
+ "epoch": 6.06,
2977
+ "learning_rate": 7.176e-06,
2978
+ "loss": 0.0931,
2979
+ "step": 2400
2980
+ },
2981
+ {
2982
+ "epoch": 6.07,
2983
+ "learning_rate": 7.191e-06,
2984
+ "loss": 0.0938,
2985
+ "step": 2405
2986
+ },
2987
+ {
2988
+ "epoch": 6.08,
2989
+ "learning_rate": 7.206e-06,
2990
+ "loss": 0.1209,
2991
+ "step": 2410
2992
+ },
2993
+ {
2994
+ "epoch": 6.09,
2995
+ "learning_rate": 7.221e-06,
2996
+ "loss": 0.1081,
2997
+ "step": 2415
2998
+ },
2999
+ {
3000
+ "epoch": 6.11,
3001
+ "learning_rate": 7.236e-06,
3002
+ "loss": 0.0918,
3003
+ "step": 2420
3004
+ },
3005
+ {
3006
+ "epoch": 6.12,
3007
+ "learning_rate": 7.2510000000000005e-06,
3008
+ "loss": 0.0979,
3009
+ "step": 2425
3010
+ },
3011
+ {
3012
+ "epoch": 6.13,
3013
+ "learning_rate": 7.266e-06,
3014
+ "loss": 0.1218,
3015
+ "step": 2430
3016
+ },
3017
+ {
3018
+ "epoch": 6.15,
3019
+ "learning_rate": 7.281e-06,
3020
+ "loss": 0.0971,
3021
+ "step": 2435
3022
+ },
3023
+ {
3024
+ "epoch": 6.16,
3025
+ "learning_rate": 7.296e-06,
3026
+ "loss": 0.0977,
3027
+ "step": 2440
3028
+ },
3029
+ {
3030
+ "epoch": 6.17,
3031
+ "learning_rate": 7.311e-06,
3032
+ "loss": 0.1212,
3033
+ "step": 2445
3034
+ },
3035
+ {
3036
+ "epoch": 6.18,
3037
+ "learning_rate": 7.326e-06,
3038
+ "loss": 0.0886,
3039
+ "step": 2450
3040
+ },
3041
+ {
3042
+ "epoch": 6.2,
3043
+ "learning_rate": 7.3410000000000005e-06,
3044
+ "loss": 0.0943,
3045
+ "step": 2455
3046
+ },
3047
+ {
3048
+ "epoch": 6.21,
3049
+ "learning_rate": 7.356e-06,
3050
+ "loss": 0.1049,
3051
+ "step": 2460
3052
+ },
3053
+ {
3054
+ "epoch": 6.22,
3055
+ "learning_rate": 7.371e-06,
3056
+ "loss": 0.0964,
3057
+ "step": 2465
3058
+ },
3059
+ {
3060
+ "epoch": 6.23,
3061
+ "learning_rate": 7.386e-06,
3062
+ "loss": 0.0978,
3063
+ "step": 2470
3064
+ },
3065
+ {
3066
+ "epoch": 6.25,
3067
+ "learning_rate": 7.401e-06,
3068
+ "loss": 0.1037,
3069
+ "step": 2475
3070
+ },
3071
+ {
3072
+ "epoch": 6.26,
3073
+ "learning_rate": 7.416000000000001e-06,
3074
+ "loss": 0.1043,
3075
+ "step": 2480
3076
+ },
3077
+ {
3078
+ "epoch": 6.27,
3079
+ "learning_rate": 7.4310000000000005e-06,
3080
+ "loss": 0.0984,
3081
+ "step": 2485
3082
+ },
3083
+ {
3084
+ "epoch": 6.28,
3085
+ "learning_rate": 7.446e-06,
3086
+ "loss": 0.0903,
3087
+ "step": 2490
3088
+ },
3089
+ {
3090
+ "epoch": 6.3,
3091
+ "learning_rate": 7.4579999999999996e-06,
3092
+ "loss": 0.1169,
3093
+ "step": 2495
3094
+ },
3095
+ {
3096
+ "epoch": 6.31,
3097
+ "learning_rate": 7.4729999999999994e-06,
3098
+ "loss": 0.0942,
3099
+ "step": 2500
3100
+ },
3101
+ {
3102
+ "epoch": 6.31,
3103
+ "eval_cer": 0.02133029812895625,
3104
+ "eval_loss": 0.11278611421585083,
3105
+ "eval_runtime": 59.9521,
3106
+ "eval_samples_per_second": 42.167,
3107
+ "eval_steps_per_second": 1.184,
3108
+ "eval_wer": 0.0794277466462897,
3109
+ "step": 2500
3110
+ },
3111
+ {
3112
+ "epoch": 6.32,
3113
+ "learning_rate": 7.488e-06,
3114
+ "loss": 0.0928,
3115
+ "step": 2505
3116
+ },
3117
+ {
3118
+ "epoch": 6.33,
3119
+ "learning_rate": 7.503e-06,
3120
+ "loss": 0.1041,
3121
+ "step": 2510
3122
+ },
3123
+ {
3124
+ "epoch": 6.35,
3125
+ "learning_rate": 7.518e-06,
3126
+ "loss": 0.1044,
3127
+ "step": 2515
3128
+ },
3129
+ {
3130
+ "epoch": 6.36,
3131
+ "learning_rate": 7.533e-06,
3132
+ "loss": 0.0916,
3133
+ "step": 2520
3134
+ },
3135
+ {
3136
+ "epoch": 6.37,
3137
+ "learning_rate": 7.5479999999999996e-06,
3138
+ "loss": 0.0963,
3139
+ "step": 2525
3140
+ },
3141
+ {
3142
+ "epoch": 6.38,
3143
+ "learning_rate": 7.563e-06,
3144
+ "loss": 0.1231,
3145
+ "step": 2530
3146
+ },
3147
+ {
3148
+ "epoch": 6.4,
3149
+ "learning_rate": 7.578e-06,
3150
+ "loss": 0.09,
3151
+ "step": 2535
3152
+ },
3153
+ {
3154
+ "epoch": 6.41,
3155
+ "learning_rate": 7.593e-06,
3156
+ "loss": 0.0903,
3157
+ "step": 2540
3158
+ },
3159
+ {
3160
+ "epoch": 6.42,
3161
+ "learning_rate": 7.608e-06,
3162
+ "loss": 0.1099,
3163
+ "step": 2545
3164
+ },
3165
+ {
3166
+ "epoch": 6.44,
3167
+ "learning_rate": 7.623e-06,
3168
+ "loss": 0.0868,
3169
+ "step": 2550
3170
+ },
3171
+ {
3172
+ "epoch": 6.45,
3173
+ "learning_rate": 7.638e-06,
3174
+ "loss": 0.086,
3175
+ "step": 2555
3176
+ },
3177
+ {
3178
+ "epoch": 6.46,
3179
+ "learning_rate": 7.653e-06,
3180
+ "loss": 0.1063,
3181
+ "step": 2560
3182
+ },
3183
+ {
3184
+ "epoch": 6.47,
3185
+ "learning_rate": 7.668e-06,
3186
+ "loss": 0.1004,
3187
+ "step": 2565
3188
+ },
3189
+ {
3190
+ "epoch": 6.49,
3191
+ "learning_rate": 7.683e-06,
3192
+ "loss": 0.0893,
3193
+ "step": 2570
3194
+ },
3195
+ {
3196
+ "epoch": 6.5,
3197
+ "learning_rate": 7.698e-06,
3198
+ "loss": 0.0942,
3199
+ "step": 2575
3200
+ },
3201
+ {
3202
+ "epoch": 6.51,
3203
+ "learning_rate": 7.713e-06,
3204
+ "loss": 0.1092,
3205
+ "step": 2580
3206
+ },
3207
+ {
3208
+ "epoch": 6.52,
3209
+ "learning_rate": 7.728e-06,
3210
+ "loss": 0.0904,
3211
+ "step": 2585
3212
+ },
3213
+ {
3214
+ "epoch": 6.54,
3215
+ "learning_rate": 7.743e-06,
3216
+ "loss": 0.087,
3217
+ "step": 2590
3218
+ },
3219
+ {
3220
+ "epoch": 6.55,
3221
+ "learning_rate": 7.758000000000001e-06,
3222
+ "loss": 0.1143,
3223
+ "step": 2595
3224
+ },
3225
+ {
3226
+ "epoch": 6.56,
3227
+ "learning_rate": 7.773e-06,
3228
+ "loss": 0.0878,
3229
+ "step": 2600
3230
+ },
3231
+ {
3232
+ "epoch": 6.57,
3233
+ "learning_rate": 7.788e-06,
3234
+ "loss": 0.0854,
3235
+ "step": 2605
3236
+ },
3237
+ {
3238
+ "epoch": 6.59,
3239
+ "learning_rate": 7.803e-06,
3240
+ "loss": 0.1021,
3241
+ "step": 2610
3242
+ },
3243
+ {
3244
+ "epoch": 6.6,
3245
+ "learning_rate": 7.818e-06,
3246
+ "loss": 0.1077,
3247
+ "step": 2615
3248
+ },
3249
+ {
3250
+ "epoch": 6.61,
3251
+ "learning_rate": 7.833e-06,
3252
+ "loss": 0.0884,
3253
+ "step": 2620
3254
+ },
3255
+ {
3256
+ "epoch": 6.62,
3257
+ "learning_rate": 7.848e-06,
3258
+ "loss": 0.09,
3259
+ "step": 2625
3260
+ },
3261
+ {
3262
+ "epoch": 6.64,
3263
+ "learning_rate": 7.863e-06,
3264
+ "loss": 0.1044,
3265
+ "step": 2630
3266
+ },
3267
+ {
3268
+ "epoch": 6.65,
3269
+ "learning_rate": 7.878e-06,
3270
+ "loss": 0.083,
3271
+ "step": 2635
3272
+ },
3273
+ {
3274
+ "epoch": 6.66,
3275
+ "learning_rate": 7.893e-06,
3276
+ "loss": 0.0884,
3277
+ "step": 2640
3278
+ },
3279
+ {
3280
+ "epoch": 6.67,
3281
+ "learning_rate": 7.908e-06,
3282
+ "loss": 0.1213,
3283
+ "step": 2645
3284
+ },
3285
+ {
3286
+ "epoch": 6.69,
3287
+ "learning_rate": 7.923e-06,
3288
+ "loss": 0.0983,
3289
+ "step": 2650
3290
+ },
3291
+ {
3292
+ "epoch": 6.7,
3293
+ "learning_rate": 7.938000000000001e-06,
3294
+ "loss": 0.0923,
3295
+ "step": 2655
3296
+ },
3297
+ {
3298
+ "epoch": 6.71,
3299
+ "learning_rate": 7.953e-06,
3300
+ "loss": 0.1014,
3301
+ "step": 2660
3302
+ },
3303
+ {
3304
+ "epoch": 6.73,
3305
+ "learning_rate": 7.968e-06,
3306
+ "loss": 0.0897,
3307
+ "step": 2665
3308
+ },
3309
+ {
3310
+ "epoch": 6.74,
3311
+ "learning_rate": 7.983e-06,
3312
+ "loss": 0.0831,
3313
+ "step": 2670
3314
+ },
3315
+ {
3316
+ "epoch": 6.75,
3317
+ "learning_rate": 7.998e-06,
3318
+ "loss": 0.0856,
3319
+ "step": 2675
3320
+ },
3321
+ {
3322
+ "epoch": 6.76,
3323
+ "learning_rate": 8.013000000000001e-06,
3324
+ "loss": 0.1153,
3325
+ "step": 2680
3326
+ },
3327
+ {
3328
+ "epoch": 6.78,
3329
+ "learning_rate": 8.028e-06,
3330
+ "loss": 0.0824,
3331
+ "step": 2685
3332
+ },
3333
+ {
3334
+ "epoch": 6.79,
3335
+ "learning_rate": 8.043e-06,
3336
+ "loss": 0.0869,
3337
+ "step": 2690
3338
+ },
3339
+ {
3340
+ "epoch": 6.8,
3341
+ "learning_rate": 8.058e-06,
3342
+ "loss": 0.1197,
3343
+ "step": 2695
3344
+ },
3345
+ {
3346
+ "epoch": 6.81,
3347
+ "learning_rate": 8.073e-06,
3348
+ "loss": 0.0872,
3349
+ "step": 2700
3350
+ },
3351
+ {
3352
+ "epoch": 6.83,
3353
+ "learning_rate": 8.088000000000001e-06,
3354
+ "loss": 0.0873,
3355
+ "step": 2705
3356
+ },
3357
+ {
3358
+ "epoch": 6.84,
3359
+ "learning_rate": 8.103e-06,
3360
+ "loss": 0.1031,
3361
+ "step": 2710
3362
+ },
3363
+ {
3364
+ "epoch": 6.85,
3365
+ "learning_rate": 8.118000000000001e-06,
3366
+ "loss": 0.0902,
3367
+ "step": 2715
3368
+ },
3369
+ {
3370
+ "epoch": 6.86,
3371
+ "learning_rate": 8.133e-06,
3372
+ "loss": 0.0878,
3373
+ "step": 2720
3374
+ },
3375
+ {
3376
+ "epoch": 6.88,
3377
+ "learning_rate": 8.148e-06,
3378
+ "loss": 0.099,
3379
+ "step": 2725
3380
+ },
3381
+ {
3382
+ "epoch": 6.89,
3383
+ "learning_rate": 8.163000000000001e-06,
3384
+ "loss": 0.1037,
3385
+ "step": 2730
3386
+ },
3387
+ {
3388
+ "epoch": 6.9,
3389
+ "learning_rate": 8.178e-06,
3390
+ "loss": 0.0897,
3391
+ "step": 2735
3392
+ },
3393
+ {
3394
+ "epoch": 6.91,
3395
+ "learning_rate": 8.193000000000001e-06,
3396
+ "loss": 0.0888,
3397
+ "step": 2740
3398
+ },
3399
+ {
3400
+ "epoch": 6.93,
3401
+ "learning_rate": 8.208e-06,
3402
+ "loss": 0.1109,
3403
+ "step": 2745
3404
+ },
3405
+ {
3406
+ "epoch": 6.94,
3407
+ "learning_rate": 8.223e-06,
3408
+ "loss": 0.0848,
3409
+ "step": 2750
3410
+ },
3411
+ {
3412
+ "epoch": 6.94,
3413
+ "eval_cer": 0.01967291679146948,
3414
+ "eval_loss": 0.10771536827087402,
3415
+ "eval_runtime": 59.6954,
3416
+ "eval_samples_per_second": 42.348,
3417
+ "eval_steps_per_second": 1.189,
3418
+ "eval_wer": 0.07172270334521991,
3419
+ "step": 2750
3420
+ },
3421
+ {
3422
+ "epoch": 6.95,
3423
+ "learning_rate": 8.238e-06,
3424
+ "loss": 0.0871,
3425
+ "step": 2755
3426
+ },
3427
+ {
3428
+ "epoch": 6.96,
3429
+ "learning_rate": 8.253e-06,
3430
+ "loss": 0.1006,
3431
+ "step": 2760
3432
+ },
3433
+ {
3434
+ "epoch": 6.98,
3435
+ "learning_rate": 8.268000000000001e-06,
3436
+ "loss": 0.1059,
3437
+ "step": 2765
3438
+ },
3439
+ {
3440
+ "epoch": 6.99,
3441
+ "learning_rate": 8.283e-06,
3442
+ "loss": 0.0973,
3443
+ "step": 2770
3444
+ },
3445
+ {
3446
+ "epoch": 6.99,
3447
+ "step": 2772,
3448
+ "total_flos": 7.402330898771948e+19,
3449
+ "train_loss": 0.05688602840165039,
3450
+ "train_runtime": 6445.258,
3451
+ "train_samples_per_second": 30.994,
3452
+ "train_steps_per_second": 0.43
3453
  }
3454
  ],
3455
  "logging_steps": 5,
3456
+ "max_steps": 2772,
3457
  "num_input_tokens_seen": 0,
3458
+ "num_train_epochs": 7,
3459
  "save_steps": 500,
3460
+ "total_flos": 7.402330898771948e+19,
3461
  "train_batch_size": 12,
3462
  "trial_name": null,
3463
  "trial_params": null