marinone94 commited on
Commit
166c885
1 Parent(s): 7c14ed3

final training

Browse files
all_results.json CHANGED
@@ -2,31 +2,31 @@
2
  "epoch": 2.32,
3
  "eval_loss": 1.6191972494125366,
4
  "eval_pretrained_loss": 1.710707187652588,
5
- "eval_pretrained_runtime": 113.3653,
6
- "eval_pretrained_samples_per_second": 2.911,
7
- "eval_pretrained_steps_per_second": 0.097,
8
  "eval_pretrained_wer": 258.2308797700783,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
- "test_finetuned_loss": 1.0526511669158936,
14
- "test_finetuned_runtime": 167.4865,
15
- "test_finetuned_samples_per_second": 4.532,
16
- "test_finetuned_steps_per_second": 0.143,
17
- "test_finetuned_wer": 176.51198300471353,
18
  "test_loss": 1.7568330764770508,
19
  "test_pretrained_loss": 1.723103642463684,
20
- "test_pretrained_runtime": 171.9859,
21
- "test_pretrained_samples_per_second": 4.413,
22
- "test_pretrained_steps_per_second": 0.14,
23
  "test_pretrained_wer": 261.9265750514506,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
- "train_loss": 0.9721650715385165,
29
- "train_runtime": 1491.7987,
30
- "train_samples_per_second": 4.805,
31
- "train_steps_per_second": 0.075
32
  }
 
2
  "epoch": 2.32,
3
  "eval_loss": 1.6191972494125366,
4
  "eval_pretrained_loss": 1.710707187652588,
5
+ "eval_pretrained_runtime": 125.9444,
6
+ "eval_pretrained_samples_per_second": 2.62,
7
+ "eval_pretrained_steps_per_second": 0.087,
8
  "eval_pretrained_wer": 258.2308797700783,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
+ "test_finetuned_loss": 1.0840972661972046,
14
+ "test_finetuned_runtime": 169.6782,
15
+ "test_finetuned_samples_per_second": 4.473,
16
+ "test_finetuned_steps_per_second": 0.141,
17
+ "test_finetuned_wer": 172.4357697669787,
18
  "test_loss": 1.7568330764770508,
19
  "test_pretrained_loss": 1.723103642463684,
20
+ "test_pretrained_runtime": 196.5873,
21
+ "test_pretrained_samples_per_second": 3.861,
22
+ "test_pretrained_steps_per_second": 0.122,
23
  "test_pretrained_wer": 261.9265750514506,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
+ "train_loss": 1.0522634956453527,
29
+ "train_runtime": 1735.2585,
30
+ "train_samples_per_second": 4.131,
31
+ "train_steps_per_second": 0.065
32
  }
eval_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "eval_pretrained_loss": 1.710707187652588,
3
- "eval_pretrained_runtime": 113.3653,
4
- "eval_pretrained_samples_per_second": 2.911,
5
- "eval_pretrained_steps_per_second": 0.097,
6
  "eval_pretrained_wer": 258.2308797700783
7
  }
 
1
  {
2
  "eval_pretrained_loss": 1.710707187652588,
3
+ "eval_pretrained_runtime": 125.9444,
4
+ "eval_pretrained_samples_per_second": 2.62,
5
+ "eval_pretrained_steps_per_second": 0.087,
6
  "eval_pretrained_wer": 258.2308797700783
7
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c74db4ae6de9cc4fe7ef67b639a61333e374adb566ec56ff72f1129f43c25bf4
3
  size 151098921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2027faf2910d36a40b249965d707ab19532ef0effc8d243d184de4439275106e
3
  size 151098921
requirements.txt CHANGED
@@ -1,100 +1,295 @@
1
  absl-py==1.3.0
2
  aiohttp==3.8.3
3
  aiosignal==1.3.1
 
4
  appdirs==1.4.4
 
 
 
5
  async-timeout==4.0.2
 
6
  attrs==22.1.0
7
  audioread==3.0.0
 
 
 
 
 
 
 
8
  boto3==1.26.27
9
  botocore==1.29.27
 
10
  cachetools==5.2.0
 
11
  certifi==2022.12.7
12
  cffi==1.15.1
 
13
  charset-normalizer==2.1.1
14
  click==8.1.3
 
 
 
 
 
 
15
  contextlib2==21.6.0
 
 
 
 
 
16
  datasets @ git+https://github.com/huggingface/datasets@6338ded243ccb495b53b1996cba0847bdc250aba
 
17
  decorator==5.1.1
 
18
  dill==0.3.6
 
 
 
 
19
  docker-pycreds==0.4.0
 
 
20
  evaluate==0.3.0
 
21
  filelock==3.8.2
 
 
 
22
  frozenlist==1.3.3
23
  fsspec==2022.11.0
 
 
24
  gitdb==4.0.10
25
  GitPython==3.1.29
 
26
  google-auth==2.15.0
27
  google-auth-oauthlib==0.4.6
28
  google-pasta==0.2.0
29
  grpcio==1.51.1
 
 
 
 
30
  huggingface-hub==0.11.1
 
 
31
  idna==3.4
 
 
32
  importlib-metadata==4.13.0
 
 
 
 
 
 
 
 
 
 
33
  jiwer==2.5.1
34
  jmespath==1.0.1
35
  joblib==1.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  Levenshtein==0.20.2
37
  librosa==0.9.2
 
38
  llvmlite==0.39.1
 
 
 
39
  Markdown==3.4.1
40
  MarkupSafe==2.1.1
 
 
 
 
41
  more-itertools==9.0.0
 
42
  multidict==6.0.3
 
43
  multiprocess==0.70.14
 
 
 
 
 
 
 
 
 
 
44
  numba==0.56.4
 
45
  numpy==1.23.5
 
 
 
 
 
46
  oauthlib==3.2.2
 
 
 
47
  packaging==20.9
48
  pandas==1.5.2
 
 
 
 
 
49
  pathos==0.3.0
50
  pathtools==0.1.2
 
 
 
 
 
 
 
51
  pooch==1.6.0
52
  pox==0.3.2
53
  ppft==1.7.6.6
 
54
  promise==2.3
 
55
  protobuf==3.20.3
56
  protobuf3-to-dict==0.1.5
57
  psutil==5.9.4
 
 
58
  pyarrow==10.0.1
59
  pyasn1==0.4.8
60
  pyasn1-modules==0.2.8
 
61
  pycparser==2.21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  pyparsing==3.0.9
 
 
 
 
 
 
 
63
  python-dateutil==2.8.2
 
 
64
  pytz==2022.6
 
65
  PyYAML==6.0
 
66
  rapidfuzz==2.13.4
67
  regex==2022.10.31
68
  requests==2.28.1
69
  requests-oauthlib==1.3.1
 
70
  resampy==0.4.2
71
  responses==0.18.0
72
  rsa==4.9
73
  s3transfer==0.6.0
74
  sagemaker==2.121.1
75
  schema==0.7.5
 
 
76
  scikit-learn==1.2.0
77
  scipy==1.9.3
 
 
 
78
  sentry-sdk==1.11.1
 
79
  setproctitle==1.3.2
80
  shortuuid==1.0.11
 
81
  six==1.16.0
82
  smdebug-rulesconfig==1.0.1
83
  smmap==5.0.0
 
 
84
  soundfile==0.11.0
 
 
 
 
 
 
85
  tensorboard==2.11.0
86
  tensorboard-data-server==0.6.1
87
  tensorboard-plugin-wit==1.8.1
 
 
 
 
 
 
88
  threadpoolctl==3.1.0
 
 
89
  tokenizers==0.13.2
90
- torch==1.11.0
91
- torchaudio==0.11.0
 
 
 
 
92
  tqdm==4.64.1
 
93
  transformers @ git+https://github.com/huggingface/transformers@9a6c6ef97fa5df4b1fb8dbc9e8c10ee3a9ed7e2a
94
- typing-extensions==4.4.0
 
 
 
 
95
  urllib3==1.26.13
 
 
 
96
  wandb==0.13.6
 
 
 
97
  Werkzeug==2.2.2
 
 
 
 
98
  xxhash==3.1.0
99
  yarl==1.8.2
100
- zipp==3.11.0
 
 
1
  absl-py==1.3.0
2
  aiohttp==3.8.3
3
  aiosignal==1.3.1
4
+ anyio==3.6.2
5
  appdirs==1.4.4
6
+ argon2-cffi==21.3.0
7
+ argon2-cffi-bindings==21.2.0
8
+ astunparse==1.6.2
9
  async-timeout==4.0.2
10
+ atomicwrites==1.1.5
11
  attrs==22.1.0
12
  audioread==3.0.0
13
+ Automat==0.8.0
14
+ Babel==2.10.3
15
+ backcall==0.1.0
16
+ beautifulsoup4==4.8.2
17
+ bleach==3.1.1
18
+ blinker==1.4
19
+ blosc==1.7.0
20
  boto3==1.26.27
21
  botocore==1.29.27
22
+ bottle==0.12.15
23
  cachetools==5.2.0
24
+ caffe==1.0.0
25
  certifi==2022.12.7
26
  cffi==1.15.1
27
+ chardet==3.0.4
28
  charset-normalizer==2.1.1
29
  click==8.1.3
30
+ cloud-init==22.3.4
31
+ cloudpickle==1.3.0
32
+ colorama==0.4.3
33
+ command-not-found==0.3
34
+ configobj==5.0.6
35
+ constantly==15.1.0
36
  contextlib2==21.6.0
37
+ cryptography==2.8
38
+ ctop==1.0.0
39
+ cycler==0.10.0
40
+ Cython==0.29.14
41
+ dask==2.8.1+dfsg
42
  datasets @ git+https://github.com/huggingface/datasets@6338ded243ccb495b53b1996cba0847bdc250aba
43
+ dbus-python==1.2.16
44
  decorator==5.1.1
45
+ defusedxml==0.6.0
46
  dill==0.3.6
47
+ distlib==0.3.0
48
+ distro==1.4.0
49
+ distro-info===0.23ubuntu1
50
+ docker==4.1.0
51
  docker-pycreds==0.4.0
52
+ entrypoints==0.3
53
+ et-xmlfile==1.0.1
54
  evaluate==0.3.0
55
+ fastjsonschema==2.16.2
56
  filelock==3.8.2
57
+ flake8==3.7.9
58
+ flatbuffers==1.12
59
+ fonttools==4.38.0
60
  frozenlist==1.3.3
61
  fsspec==2022.11.0
62
+ future==0.18.2
63
+ gast==0.4.0
64
  gitdb==4.0.10
65
  GitPython==3.1.29
66
+ Glances==3.1.3
67
  google-auth==2.15.0
68
  google-auth-oauthlib==0.4.6
69
  google-pasta==0.2.0
70
  grpcio==1.51.1
71
+ h5py==2.10.0
72
+ html5lib==1.0.1
73
+ htmlmin==0.1.12
74
+ httplib2==0.14.0
75
  huggingface-hub==0.11.1
76
+ hyperlink==19.0.0
77
+ icdiff==1.9.5
78
  idna==3.4
79
+ ImageHash==4.3.1
80
+ imageio==2.4.1
81
  importlib-metadata==4.13.0
82
+ incremental==16.10.1
83
+ influxdb==5.2.0
84
+ iotop==0.6
85
+ ipykernel==5.2.0
86
+ ipython==7.13.0
87
+ ipython_genutils==0.2.0
88
+ ipywidgets==8.0.2
89
+ jdcal==1.0
90
+ jedi==0.15.2
91
+ Jinja2==3.1.2
92
  jiwer==2.5.1
93
  jmespath==1.0.1
94
  joblib==1.2.0
95
+ json5==0.9.10
96
+ jsonpatch==1.22
97
+ jsonpointer==2.0
98
+ jsonschema==3.2.0
99
+ jupyter-console==6.0.0
100
+ jupyter-server==1.21.0
101
+ jupyter_client==7.4.4
102
+ jupyter_core==4.11.2
103
+ jupyterlab==3.5.0
104
+ jupyterlab-pygments==0.2.2
105
+ jupyterlab-widgets==3.0.3
106
+ jupyterlab_server==2.16.1
107
+ kaptan==0.5.10
108
+ keras==2.9.0
109
+ Keras-Preprocessing==1.1.2
110
+ keyring==18.0.1
111
+ kiwisolver==1.0.1
112
+ language-selector==0.1
113
+ launchpadlib==1.10.13
114
+ lazr.restfulclient==0.14.2
115
+ lazr.uri==1.0.3
116
  Levenshtein==0.20.2
117
  librosa==0.9.2
118
+ libtmux==0.8.2
119
  llvmlite==0.39.1
120
+ locket==0.2.0
121
+ lxml==4.5.0
122
+ Mako==1.1.0
123
  Markdown==3.4.1
124
  MarkupSafe==2.1.1
125
+ matplotlib==3.5.3
126
+ mccabe==0.6.1
127
+ missingno==0.5.1
128
+ mistune==2.0.4
129
  more-itertools==9.0.0
130
+ mpi4py==3.0.3
131
  multidict==6.0.3
132
+ multimethod==1.9
133
  multiprocess==0.70.14
134
+ nbclassic==0.4.7
135
+ nbclient==0.7.0
136
+ nbconvert==7.2.2
137
+ nbformat==5.7.0
138
+ nest-asyncio==1.5.6
139
+ netifaces==0.10.4
140
+ networkx==2.4
141
+ nose==1.3.7
142
+ notebook==6.0.3
143
+ notebook_shim==0.2.0
144
  numba==0.56.4
145
+ numexpr==2.7.1
146
  numpy==1.23.5
147
+ nvidia-cublas-cu11==11.10.3.66
148
+ nvidia-cuda-nvrtc-cu11==11.7.99
149
+ nvidia-cuda-runtime-cu11==11.7.99
150
+ nvidia-cudnn-cu11==8.5.0.96
151
+ nvidia-ml-py==7.352.0
152
  oauthlib==3.2.2
153
+ olefile==0.46
154
+ openpyxl==3.0.3
155
+ opt-einsum==3.3.0
156
  packaging==20.9
157
  pandas==1.5.2
158
+ pandas-profiling==3.4.0
159
+ pandocfilters==1.4.2
160
+ parameterized==0.7.0
161
+ parso==0.5.2
162
+ partd==1.0.0
163
  pathos==0.3.0
164
  pathtools==0.1.2
165
+ patsy==0.5.3
166
+ pexpect==4.6.0
167
+ phik==0.12.2
168
+ pickleshare==0.7.5
169
+ Pillow==7.0.0
170
+ pluggy==0.13.0
171
+ ply==3.11
172
  pooch==1.6.0
173
  pox==0.3.2
174
  ppft==1.7.6.6
175
+ prometheus-client==0.7.1
176
  promise==2.3
177
+ prompt-toolkit==2.0.10
178
  protobuf==3.20.3
179
  protobuf3-to-dict==0.1.5
180
  psutil==5.9.4
181
+ ptyprocess==0.7.0
182
+ py==1.8.1
183
  pyarrow==10.0.1
184
  pyasn1==0.4.8
185
  pyasn1-modules==0.2.8
186
+ pycodestyle==2.5.0
187
  pycparser==2.21
188
+ pycryptodomex==3.6.1
189
+ pycuda==2019.1.2
190
+ pydantic==1.10.2
191
+ pydot==1.4.1
192
+ pyflakes==2.1.1
193
+ Pygments==2.13.0
194
+ PyGObject==3.36.0
195
+ pygpu==0.7.6
196
+ PyHamcrest==1.9.0
197
+ pyinotify==0.9.6
198
+ PyJWT==1.7.1
199
+ pymacaroons==0.13.0
200
+ PyNaCl==1.3.0
201
+ pyOpenSSL==19.0.0
202
  pyparsing==3.0.9
203
+ pyrsistent==0.15.5
204
+ pyserial==3.4
205
+ pysmi==0.3.2
206
+ pysnmp==4.4.6
207
+ pystache==0.5.4
208
+ pytest==4.6.9
209
+ python-apt==2.0.0+ubuntu0.20.4.8
210
  python-dateutil==2.8.2
211
+ python-debian===0.1.36ubuntu1
212
+ pytools==2019.1.1
213
  pytz==2022.6
214
+ PyWavelets==0.5.1
215
  PyYAML==6.0
216
+ pyzmq==24.0.1
217
  rapidfuzz==2.13.4
218
  regex==2022.10.31
219
  requests==2.28.1
220
  requests-oauthlib==1.3.1
221
+ requests-unixsocket==0.2.0
222
  resampy==0.4.2
223
  responses==0.18.0
224
  rsa==4.9
225
  s3transfer==0.6.0
226
  sagemaker==2.121.1
227
  schema==0.7.5
228
+ scikit-cuda==0.5.3
229
+ scikit-image==0.16.2
230
  scikit-learn==1.2.0
231
  scipy==1.9.3
232
+ seaborn==0.12.1
233
+ SecretStorage==2.3.1
234
+ Send2Trash==1.8.0
235
  sentry-sdk==1.11.1
236
+ service-identity==18.1.0
237
  setproctitle==1.3.2
238
  shortuuid==1.0.11
239
+ simplejson==3.16.0
240
  six==1.16.0
241
  smdebug-rulesconfig==1.0.1
242
  smmap==5.0.0
243
+ sniffio==1.3.0
244
+ sos==4.4
245
  soundfile==0.11.0
246
+ soupsieve==1.9.5
247
+ ssh-import-id==5.10
248
+ statsmodels==0.13.2
249
+ systemd-python==234
250
+ tables==3.6.1
251
+ tangled-up-in-unicode==0.2.0
252
  tensorboard==2.11.0
253
  tensorboard-data-server==0.6.1
254
  tensorboard-plugin-wit==1.8.1
255
+ tensorflow-estimator==2.9.0
256
+ tensorflow-gpu==2.9.1
257
+ termcolor==1.1.0
258
+ terminado==0.17.0
259
+ testpath==0.4.4
260
+ Theano==1.0.4
261
  threadpoolctl==3.1.0
262
+ tinycss2==1.2.1
263
+ tmuxp==1.5.4
264
  tokenizers==0.13.2
265
+ tomli==2.0.1
266
+ toolz==0.9.0
267
+ torch==1.13.1
268
+ torchaudio==0.13.1
269
+ torchvision==0.13.1
270
+ tornado==6.2
271
  tqdm==4.64.1
272
+ traitlets==5.5.0
273
  transformers @ git+https://github.com/huggingface/transformers@9a6c6ef97fa5df4b1fb8dbc9e8c10ee3a9ed7e2a
274
+ Twisted==18.9.0
275
+ typing_extensions==4.4.0
276
+ ubuntu-advantage-tools==27.11.2
277
+ ufw==0.36
278
+ unattended-upgrades==0.1
279
  urllib3==1.26.13
280
+ virtualenv==20.0.17
281
+ visions==0.7.5
282
+ wadllib==1.3.3
283
  wandb==0.13.6
284
+ wcwidth==0.1.8
285
+ webencodings==0.5.1
286
+ websocket-client==0.53.0
287
  Werkzeug==2.2.2
288
+ widgetsnbextension==4.0.3
289
+ wrapt==1.11.2
290
+ xlrd==1.1.0
291
+ xlwt==1.3.0
292
  xxhash==3.1.0
293
  yarl==1.8.2
294
+ zipp==3.11.0
295
+ zope.interface==4.7.1
test_finetuned_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.32,
3
- "test_finetuned_loss": 1.0526511669158936,
4
- "test_finetuned_runtime": 167.4865,
5
- "test_finetuned_samples_per_second": 4.532,
6
- "test_finetuned_steps_per_second": 0.143,
7
- "test_finetuned_wer": 176.51198300471353
8
  }
 
1
  {
2
  "epoch": 2.32,
3
+ "test_finetuned_loss": 1.0840972661972046,
4
+ "test_finetuned_runtime": 169.6782,
5
+ "test_finetuned_samples_per_second": 4.473,
6
+ "test_finetuned_steps_per_second": 0.141,
7
+ "test_finetuned_wer": 172.4357697669787
8
  }
test_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "test_pretrained_loss": 1.723103642463684,
3
- "test_pretrained_runtime": 171.9859,
4
- "test_pretrained_samples_per_second": 4.413,
5
- "test_pretrained_steps_per_second": 0.14,
6
  "test_pretrained_wer": 261.9265750514506
7
  }
 
1
  {
2
  "test_pretrained_loss": 1.723103642463684,
3
+ "test_pretrained_runtime": 196.5873,
4
+ "test_pretrained_samples_per_second": 3.861,
5
+ "test_pretrained_steps_per_second": 0.122,
6
  "test_pretrained_wer": 261.9265750514506
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 2.32,
3
- "train_loss": 0.9721650715385165,
4
- "train_runtime": 1491.7987,
5
- "train_samples_per_second": 4.805,
6
- "train_steps_per_second": 0.075
7
  }
 
1
  {
2
  "epoch": 2.32,
3
+ "train_loss": 1.0522634956453527,
4
+ "train_runtime": 1735.2585,
5
+ "train_samples_per_second": 4.131,
6
+ "train_steps_per_second": 0.065
7
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 158.1031454574485,
3
  "best_model_checkpoint": "./checkpoint-88",
4
  "epoch": 2.3214285714285716,
5
  "global_step": 112,
@@ -15,768 +15,768 @@
15
  },
16
  {
17
  "epoch": 0.02,
18
- "learning_rate": 4.347826086956522e-07,
19
  "loss": 1.6564,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.03,
24
- "learning_rate": 8.695652173913044e-07,
25
  "loss": 1.6518,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.04,
30
- "learning_rate": 1.3043478260869566e-06,
31
- "loss": 1.6233,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
- "learning_rate": 1.7391304347826088e-06,
37
- "loss": 1.712,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
- "learning_rate": 2.173913043478261e-06,
43
- "loss": 1.6476,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.06,
48
- "learning_rate": 2.6086956521739132e-06,
49
- "loss": 1.5716,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.07,
54
- "learning_rate": 3.043478260869566e-06,
55
- "loss": 1.5909,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.08,
60
- "learning_rate": 3.4782608695652175e-06,
61
- "loss": 1.5905,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.09,
66
- "learning_rate": 3.91304347826087e-06,
67
- "loss": 1.5486,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.1,
72
- "learning_rate": 4.347826086956522e-06,
73
- "loss": 1.5299,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.1,
78
- "eval_loss": 1.5621598958969116,
79
- "eval_runtime": 105.0814,
80
- "eval_samples_per_second": 3.14,
81
- "eval_steps_per_second": 0.105,
82
- "eval_wer": 219.67108414497844,
83
  "step": 11
84
  },
85
  {
86
  "epoch": 0.11,
87
- "learning_rate": 4.782608695652174e-06,
88
- "loss": 1.4537,
89
  "step": 12
90
  },
91
  {
92
  "epoch": 0.12,
93
- "learning_rate": 5.2173913043478265e-06,
94
- "loss": 1.3729,
95
  "step": 13
96
  },
97
  {
98
  "epoch": 0.12,
99
- "learning_rate": 5.652173913043479e-06,
100
- "loss": 1.4921,
101
  "step": 14
102
  },
103
  {
104
  "epoch": 0.13,
105
- "learning_rate": 6.086956521739132e-06,
106
- "loss": 1.5069,
107
  "step": 15
108
  },
109
  {
110
  "epoch": 0.14,
111
- "learning_rate": 6.521739130434783e-06,
112
- "loss": 1.388,
113
  "step": 16
114
  },
115
  {
116
  "epoch": 0.15,
117
- "learning_rate": 6.956521739130435e-06,
118
- "loss": 1.3857,
119
  "step": 17
120
  },
121
  {
122
  "epoch": 0.16,
123
- "learning_rate": 7.391304347826087e-06,
124
- "loss": 1.3389,
125
  "step": 18
126
  },
127
  {
128
  "epoch": 0.17,
129
- "learning_rate": 7.82608695652174e-06,
130
- "loss": 1.3089,
131
  "step": 19
132
  },
133
  {
134
  "epoch": 0.18,
135
- "learning_rate": 8.260869565217392e-06,
136
- "loss": 1.2118,
137
  "step": 20
138
  },
139
  {
140
  "epoch": 0.19,
141
- "learning_rate": 8.695652173913044e-06,
142
- "loss": 1.1634,
143
  "step": 21
144
  },
145
  {
146
  "epoch": 0.2,
147
- "learning_rate": 9.130434782608697e-06,
148
- "loss": 1.1908,
149
  "step": 22
150
  },
151
  {
152
  "epoch": 0.2,
153
- "eval_loss": 1.3651723861694336,
154
- "eval_runtime": 110.8115,
155
- "eval_samples_per_second": 2.978,
156
- "eval_steps_per_second": 0.099,
157
- "eval_wer": 192.2401405077439,
158
  "step": 22
159
  },
160
  {
161
  "epoch": 0.21,
162
- "learning_rate": 9.565217391304349e-06,
163
- "loss": 1.1192,
164
  "step": 23
165
  },
166
  {
167
  "epoch": 0.21,
168
- "learning_rate": 1e-05,
169
- "loss": 1.2042,
170
  "step": 24
171
  },
172
  {
173
  "epoch": 0.22,
174
- "learning_rate": 9.887640449438202e-06,
175
- "loss": 1.1448,
176
  "step": 25
177
  },
178
  {
179
  "epoch": 0.23,
180
- "learning_rate": 9.775280898876405e-06,
181
- "loss": 1.1393,
182
  "step": 26
183
  },
184
  {
185
  "epoch": 0.24,
186
- "learning_rate": 9.662921348314608e-06,
187
- "loss": 1.1482,
188
  "step": 27
189
  },
190
  {
191
  "epoch": 0.25,
192
- "learning_rate": 9.55056179775281e-06,
193
- "loss": 1.179,
194
  "step": 28
195
  },
196
  {
197
  "epoch": 0.26,
198
- "learning_rate": 9.438202247191012e-06,
199
- "loss": 0.9847,
200
  "step": 29
201
  },
202
  {
203
  "epoch": 0.27,
204
- "learning_rate": 9.325842696629213e-06,
205
- "loss": 1.1149,
206
  "step": 30
207
  },
208
  {
209
  "epoch": 0.28,
210
- "learning_rate": 9.213483146067417e-06,
211
- "loss": 1.0657,
212
  "step": 31
213
  },
214
  {
215
  "epoch": 0.29,
216
- "learning_rate": 9.101123595505619e-06,
217
- "loss": 1.0028,
218
  "step": 32
219
  },
220
  {
221
  "epoch": 0.29,
222
- "learning_rate": 8.988764044943822e-06,
223
- "loss": 1.1161,
224
  "step": 33
225
  },
226
  {
227
  "epoch": 0.29,
228
- "eval_loss": 1.1921106576919556,
229
- "eval_runtime": 103.0628,
230
- "eval_samples_per_second": 3.202,
231
- "eval_steps_per_second": 0.107,
232
- "eval_wer": 200.23950183618075,
233
  "step": 33
234
  },
235
  {
236
  "epoch": 0.3,
237
- "learning_rate": 8.876404494382023e-06,
238
- "loss": 1.0931,
239
  "step": 34
240
  },
241
  {
242
  "epoch": 0.31,
243
- "learning_rate": 8.764044943820226e-06,
244
- "loss": 1.0635,
245
  "step": 35
246
  },
247
  {
248
  "epoch": 0.32,
249
- "learning_rate": 8.651685393258428e-06,
250
- "loss": 1.0671,
251
  "step": 36
252
  },
253
  {
254
  "epoch": 0.33,
255
- "learning_rate": 8.53932584269663e-06,
256
- "loss": 1.0251,
257
  "step": 37
258
  },
259
  {
260
  "epoch": 0.34,
261
- "learning_rate": 8.426966292134832e-06,
262
- "loss": 1.0486,
263
  "step": 38
264
  },
265
  {
266
  "epoch": 1.01,
267
- "learning_rate": 8.314606741573035e-06,
268
- "loss": 0.9926,
269
  "step": 39
270
  },
271
  {
272
  "epoch": 1.02,
273
- "learning_rate": 8.202247191011237e-06,
274
- "loss": 1.0232,
275
  "step": 40
276
  },
277
  {
278
  "epoch": 1.03,
279
- "learning_rate": 8.08988764044944e-06,
280
- "loss": 0.9421,
281
  "step": 41
282
  },
283
  {
284
  "epoch": 1.04,
285
- "learning_rate": 7.97752808988764e-06,
286
- "loss": 0.9541,
287
  "step": 42
288
  },
289
  {
290
  "epoch": 1.04,
291
- "learning_rate": 7.865168539325843e-06,
292
- "loss": 0.9639,
293
  "step": 43
294
  },
295
  {
296
  "epoch": 1.05,
297
- "learning_rate": 7.752808988764046e-06,
298
- "loss": 0.9216,
299
  "step": 44
300
  },
301
  {
302
  "epoch": 1.05,
303
- "eval_loss": 1.1263455152511597,
304
- "eval_runtime": 93.6663,
305
- "eval_samples_per_second": 3.523,
306
- "eval_steps_per_second": 0.117,
307
- "eval_wer": 186.52403001756346,
308
  "step": 44
309
  },
310
  {
311
  "epoch": 1.06,
312
- "learning_rate": 7.640449438202247e-06,
313
- "loss": 0.9097,
314
  "step": 45
315
  },
316
  {
317
  "epoch": 1.07,
318
- "learning_rate": 7.5280898876404495e-06,
319
- "loss": 0.8688,
320
  "step": 46
321
  },
322
  {
323
  "epoch": 1.08,
324
- "learning_rate": 7.415730337078652e-06,
325
- "loss": 0.9019,
326
  "step": 47
327
  },
328
  {
329
  "epoch": 1.09,
330
- "learning_rate": 7.303370786516854e-06,
331
- "loss": 0.9135,
332
  "step": 48
333
  },
334
  {
335
  "epoch": 1.1,
336
- "learning_rate": 7.191011235955056e-06,
337
- "loss": 0.9033,
338
  "step": 49
339
  },
340
  {
341
  "epoch": 1.11,
342
- "learning_rate": 7.078651685393258e-06,
343
- "loss": 0.8575,
344
  "step": 50
345
  },
346
  {
347
  "epoch": 1.12,
348
- "learning_rate": 6.966292134831461e-06,
349
- "loss": 0.8276,
350
  "step": 51
351
  },
352
  {
353
  "epoch": 1.12,
354
- "learning_rate": 6.853932584269663e-06,
355
- "loss": 0.9276,
356
  "step": 52
357
  },
358
  {
359
  "epoch": 1.13,
360
- "learning_rate": 6.741573033707865e-06,
361
- "loss": 0.9186,
362
  "step": 53
363
  },
364
  {
365
  "epoch": 1.14,
366
- "learning_rate": 6.629213483146067e-06,
367
- "loss": 0.8693,
368
  "step": 54
369
  },
370
  {
371
  "epoch": 1.15,
372
- "learning_rate": 6.51685393258427e-06,
373
- "loss": 0.8441,
374
  "step": 55
375
  },
376
  {
377
  "epoch": 1.15,
378
- "eval_loss": 1.0945535898208618,
379
- "eval_runtime": 102.0356,
380
- "eval_samples_per_second": 3.234,
381
- "eval_steps_per_second": 0.108,
382
- "eval_wer": 179.32300814306242,
383
  "step": 55
384
  },
385
  {
386
  "epoch": 1.16,
387
- "learning_rate": 6.404494382022472e-06,
388
- "loss": 0.8189,
389
  "step": 56
390
  },
391
  {
392
  "epoch": 1.17,
393
- "learning_rate": 6.292134831460674e-06,
394
- "loss": 0.8047,
395
  "step": 57
396
  },
397
  {
398
  "epoch": 1.18,
399
- "learning_rate": 6.179775280898876e-06,
400
- "loss": 0.7858,
401
  "step": 58
402
  },
403
  {
404
  "epoch": 1.19,
405
- "learning_rate": 6.06741573033708e-06,
406
- "loss": 0.7312,
407
  "step": 59
408
  },
409
  {
410
  "epoch": 1.2,
411
- "learning_rate": 5.955056179775281e-06,
412
- "loss": 0.7638,
413
  "step": 60
414
  },
415
  {
416
  "epoch": 1.21,
417
- "learning_rate": 5.842696629213483e-06,
418
- "loss": 0.7374,
419
  "step": 61
420
  },
421
  {
422
  "epoch": 1.21,
423
- "learning_rate": 5.730337078651685e-06,
424
- "loss": 0.7986,
425
  "step": 62
426
  },
427
  {
428
  "epoch": 1.22,
429
- "learning_rate": 5.617977528089889e-06,
430
- "loss": 0.762,
431
  "step": 63
432
  },
433
  {
434
  "epoch": 1.23,
435
- "learning_rate": 5.50561797752809e-06,
436
- "loss": 0.785,
437
  "step": 64
438
  },
439
  {
440
  "epoch": 1.24,
441
- "learning_rate": 5.393258426966292e-06,
442
- "loss": 0.8349,
443
  "step": 65
444
  },
445
  {
446
  "epoch": 1.25,
447
- "learning_rate": 5.280898876404494e-06,
448
- "loss": 0.8505,
449
  "step": 66
450
  },
451
  {
452
  "epoch": 1.25,
453
- "eval_loss": 1.0748353004455566,
454
- "eval_runtime": 93.3893,
455
- "eval_samples_per_second": 3.534,
456
- "eval_steps_per_second": 0.118,
457
- "eval_wer": 159.68385757624142,
458
  "step": 66
459
  },
460
  {
461
  "epoch": 1.26,
462
- "learning_rate": 5.168539325842698e-06,
463
- "loss": 0.7309,
464
  "step": 67
465
  },
466
  {
467
  "epoch": 1.27,
468
- "learning_rate": 5.0561797752809e-06,
469
- "loss": 0.8261,
470
  "step": 68
471
  },
472
  {
473
  "epoch": 1.28,
474
- "learning_rate": 4.943820224719101e-06,
475
- "loss": 0.8053,
476
  "step": 69
477
  },
478
  {
479
  "epoch": 1.29,
480
- "learning_rate": 4.831460674157304e-06,
481
- "loss": 0.7672,
482
  "step": 70
483
  },
484
  {
485
  "epoch": 1.29,
486
- "learning_rate": 4.719101123595506e-06,
487
- "loss": 0.8692,
488
  "step": 71
489
  },
490
  {
491
  "epoch": 1.3,
492
- "learning_rate": 4.606741573033709e-06,
493
- "loss": 0.8588,
494
  "step": 72
495
  },
496
  {
497
  "epoch": 1.31,
498
- "learning_rate": 4.494382022471911e-06,
499
- "loss": 0.8277,
500
  "step": 73
501
  },
502
  {
503
  "epoch": 1.32,
504
- "learning_rate": 4.382022471910113e-06,
505
- "loss": 0.8387,
506
  "step": 74
507
  },
508
  {
509
  "epoch": 1.33,
510
- "learning_rate": 4.269662921348315e-06,
511
- "loss": 0.801,
512
  "step": 75
513
  },
514
  {
515
  "epoch": 1.34,
516
- "learning_rate": 4.157303370786518e-06,
517
- "loss": 0.7801,
518
  "step": 76
519
  },
520
  {
521
  "epoch": 2.01,
522
- "learning_rate": 4.04494382022472e-06,
523
- "loss": 0.7844,
524
  "step": 77
525
  },
526
  {
527
  "epoch": 2.01,
528
- "eval_loss": 1.0585265159606934,
529
- "eval_runtime": 87.6428,
530
- "eval_samples_per_second": 3.765,
531
- "eval_steps_per_second": 0.126,
532
- "eval_wer": 163.2923519080313,
533
  "step": 77
534
  },
535
  {
536
  "epoch": 2.02,
537
- "learning_rate": 3.932584269662922e-06,
538
- "loss": 0.8227,
539
  "step": 78
540
  },
541
  {
542
  "epoch": 2.03,
543
- "learning_rate": 3.820224719101124e-06,
544
- "loss": 0.757,
545
  "step": 79
546
  },
547
  {
548
  "epoch": 2.04,
549
- "learning_rate": 3.707865168539326e-06,
550
- "loss": 0.7713,
551
  "step": 80
552
  },
553
  {
554
  "epoch": 2.04,
555
- "learning_rate": 3.595505617977528e-06,
556
- "loss": 0.7782,
557
  "step": 81
558
  },
559
  {
560
  "epoch": 2.05,
561
- "learning_rate": 3.4831460674157306e-06,
562
- "loss": 0.7468,
563
  "step": 82
564
  },
565
  {
566
  "epoch": 2.06,
567
- "learning_rate": 3.3707865168539327e-06,
568
- "loss": 0.7457,
569
  "step": 83
570
  },
571
  {
572
  "epoch": 2.07,
573
- "learning_rate": 3.258426966292135e-06,
574
- "loss": 0.7126,
575
  "step": 84
576
  },
577
  {
578
  "epoch": 2.08,
579
- "learning_rate": 3.146067415730337e-06,
580
- "loss": 0.7476,
581
  "step": 85
582
  },
583
  {
584
  "epoch": 2.09,
585
- "learning_rate": 3.03370786516854e-06,
586
- "loss": 0.76,
587
  "step": 86
588
  },
589
  {
590
  "epoch": 2.1,
591
- "learning_rate": 2.9213483146067416e-06,
592
- "loss": 0.7673,
593
  "step": 87
594
  },
595
  {
596
  "epoch": 2.11,
597
- "learning_rate": 2.8089887640449444e-06,
598
- "loss": 0.7208,
599
  "step": 88
600
  },
601
  {
602
  "epoch": 2.11,
603
- "eval_loss": 1.0490810871124268,
604
- "eval_runtime": 106.8845,
605
- "eval_samples_per_second": 3.087,
606
- "eval_steps_per_second": 0.103,
607
- "eval_wer": 158.1031454574485,
608
  "step": 88
609
  },
610
  {
611
  "epoch": 2.12,
612
- "learning_rate": 2.696629213483146e-06,
613
- "loss": 0.7045,
614
  "step": 89
615
  },
616
  {
617
  "epoch": 2.12,
618
- "learning_rate": 2.584269662921349e-06,
619
- "loss": 0.7887,
620
  "step": 90
621
  },
622
  {
623
  "epoch": 2.13,
624
- "learning_rate": 2.4719101123595505e-06,
625
- "loss": 0.7821,
626
  "step": 91
627
  },
628
  {
629
  "epoch": 2.14,
630
- "learning_rate": 2.359550561797753e-06,
631
- "loss": 0.7487,
632
  "step": 92
633
  },
634
  {
635
  "epoch": 2.15,
636
- "learning_rate": 2.2471910112359554e-06,
637
- "loss": 0.7281,
638
  "step": 93
639
  },
640
  {
641
  "epoch": 2.16,
642
- "learning_rate": 2.1348314606741574e-06,
643
- "loss": 0.7084,
644
  "step": 94
645
  },
646
  {
647
  "epoch": 2.17,
648
- "learning_rate": 2.02247191011236e-06,
649
- "loss": 0.6971,
650
  "step": 95
651
  },
652
  {
653
  "epoch": 2.18,
654
- "learning_rate": 1.910112359550562e-06,
655
- "loss": 0.6869,
656
  "step": 96
657
  },
658
  {
659
  "epoch": 2.19,
660
- "learning_rate": 1.797752808988764e-06,
661
- "loss": 0.6411,
662
  "step": 97
663
  },
664
  {
665
  "epoch": 2.2,
666
- "learning_rate": 1.6853932584269663e-06,
667
- "loss": 0.6658,
668
  "step": 98
669
  },
670
  {
671
  "epoch": 2.21,
672
- "learning_rate": 1.5730337078651686e-06,
673
- "loss": 0.6481,
674
  "step": 99
675
  },
676
  {
677
  "epoch": 2.21,
678
- "eval_loss": 1.046801209449768,
679
- "eval_runtime": 89.0792,
680
- "eval_samples_per_second": 3.705,
681
- "eval_steps_per_second": 0.123,
682
- "eval_wer": 158.51828197349514,
683
  "step": 99
684
  },
685
  {
686
  "epoch": 2.21,
687
- "learning_rate": 1.4606741573033708e-06,
688
- "loss": 0.7064,
689
  "step": 100
690
  },
691
  {
692
  "epoch": 2.22,
693
- "learning_rate": 1.348314606741573e-06,
694
- "loss": 0.6699,
695
  "step": 101
696
  },
697
  {
698
  "epoch": 2.23,
699
- "learning_rate": 1.2359550561797752e-06,
700
- "loss": 0.6974,
701
  "step": 102
702
  },
703
  {
704
  "epoch": 2.24,
705
- "learning_rate": 1.1235955056179777e-06,
706
- "loss": 0.7523,
707
  "step": 103
708
  },
709
  {
710
  "epoch": 2.25,
711
- "learning_rate": 1.01123595505618e-06,
712
- "loss": 0.7621,
713
  "step": 104
714
  },
715
  {
716
  "epoch": 2.26,
717
- "learning_rate": 8.98876404494382e-07,
718
- "loss": 0.657,
719
  "step": 105
720
  },
721
  {
722
  "epoch": 2.27,
723
- "learning_rate": 7.865168539325843e-07,
724
- "loss": 0.7443,
725
  "step": 106
726
  },
727
  {
728
  "epoch": 2.28,
729
- "learning_rate": 6.741573033707865e-07,
730
- "loss": 0.729,
731
  "step": 107
732
  },
733
  {
734
  "epoch": 2.29,
735
- "learning_rate": 5.617977528089888e-07,
736
- "loss": 0.6983,
737
  "step": 108
738
  },
739
  {
740
  "epoch": 2.29,
741
- "learning_rate": 4.49438202247191e-07,
742
- "loss": 0.7963,
743
  "step": 109
744
  },
745
  {
746
  "epoch": 2.3,
747
- "learning_rate": 3.3707865168539325e-07,
748
- "loss": 0.7912,
749
  "step": 110
750
  },
751
  {
752
  "epoch": 2.3,
753
- "eval_loss": 1.045613408088684,
754
- "eval_runtime": 91.8976,
755
- "eval_samples_per_second": 3.591,
756
- "eval_steps_per_second": 0.12,
757
- "eval_wer": 168.6092926712438,
758
  "step": 110
759
  },
760
  {
761
  "epoch": 2.31,
762
- "learning_rate": 2.247191011235955e-07,
763
- "loss": 0.7626,
764
  "step": 111
765
  },
766
  {
767
  "epoch": 2.32,
768
- "learning_rate": 1.1235955056179776e-07,
769
- "loss": 0.7754,
770
  "step": 112
771
  },
772
  {
773
  "epoch": 2.32,
774
  "step": 112,
775
  "total_flos": 1.7415399333888e+17,
776
- "train_loss": 0.9721650715385165,
777
- "train_runtime": 1491.7987,
778
- "train_samples_per_second": 4.805,
779
- "train_steps_per_second": 0.075
780
  }
781
  ],
782
  "max_steps": 112,
 
1
  {
2
+ "best_metric": 161.45617116397892,
3
  "best_model_checkpoint": "./checkpoint-88",
4
  "epoch": 2.3214285714285716,
5
  "global_step": 112,
 
15
  },
16
  {
17
  "epoch": 0.02,
18
+ "learning_rate": 2.2058823529411765e-07,
19
  "loss": 1.6564,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.03,
24
+ "learning_rate": 4.411764705882353e-07,
25
  "loss": 1.6518,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.04,
30
+ "learning_rate": 6.61764705882353e-07,
31
+ "loss": 1.6237,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
+ "learning_rate": 8.823529411764706e-07,
37
+ "loss": 1.7141,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
+ "learning_rate": 1.1029411764705884e-06,
43
+ "loss": 1.655,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.06,
48
+ "learning_rate": 1.323529411764706e-06,
49
+ "loss": 1.5904,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.07,
54
+ "learning_rate": 1.5441176470588234e-06,
55
+ "loss": 1.6232,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.08,
60
+ "learning_rate": 1.7647058823529412e-06,
61
+ "loss": 1.6403,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.09,
66
+ "learning_rate": 1.985294117647059e-06,
67
+ "loss": 1.6065,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.1,
72
+ "learning_rate": 2.2058823529411767e-06,
73
+ "loss": 1.5947,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.1,
78
+ "eval_loss": 1.629066824913025,
79
+ "eval_runtime": 125.637,
80
+ "eval_samples_per_second": 2.627,
81
+ "eval_steps_per_second": 0.088,
82
+ "eval_wer": 227.94188088775348,
83
  "step": 11
84
  },
85
  {
86
  "epoch": 0.11,
87
+ "learning_rate": 2.4264705882352943e-06,
88
+ "loss": 1.52,
89
  "step": 12
90
  },
91
  {
92
  "epoch": 0.12,
93
+ "learning_rate": 2.647058823529412e-06,
94
+ "loss": 1.4456,
95
  "step": 13
96
  },
97
  {
98
  "epoch": 0.12,
99
+ "learning_rate": 2.867647058823529e-06,
100
+ "loss": 1.58,
101
  "step": 14
102
  },
103
  {
104
  "epoch": 0.13,
105
+ "learning_rate": 3.088235294117647e-06,
106
+ "loss": 1.5958,
107
  "step": 15
108
  },
109
  {
110
  "epoch": 0.14,
111
+ "learning_rate": 3.308823529411765e-06,
112
+ "loss": 1.4695,
113
  "step": 16
114
  },
115
  {
116
  "epoch": 0.15,
117
+ "learning_rate": 3.5294117647058825e-06,
118
+ "loss": 1.4718,
119
  "step": 17
120
  },
121
  {
122
  "epoch": 0.16,
123
+ "learning_rate": 3.75e-06,
124
+ "loss": 1.4406,
125
  "step": 18
126
  },
127
  {
128
  "epoch": 0.17,
129
+ "learning_rate": 3.970588235294118e-06,
130
+ "loss": 1.4339,
131
  "step": 19
132
  },
133
  {
134
  "epoch": 0.18,
135
+ "learning_rate": 4.191176470588236e-06,
136
+ "loss": 1.313,
137
  "step": 20
138
  },
139
  {
140
  "epoch": 0.19,
141
+ "learning_rate": 4.411764705882353e-06,
142
+ "loss": 1.2825,
143
  "step": 21
144
  },
145
  {
146
  "epoch": 0.2,
147
+ "learning_rate": 4.632352941176471e-06,
148
+ "loss": 1.3194,
149
  "step": 22
150
  },
151
  {
152
  "epoch": 0.2,
153
+ "eval_loss": 1.460469126701355,
154
+ "eval_runtime": 118.4642,
155
+ "eval_samples_per_second": 2.786,
156
+ "eval_steps_per_second": 0.093,
157
+ "eval_wer": 235.94124221619032,
158
  "step": 22
159
  },
160
  {
161
  "epoch": 0.21,
162
+ "learning_rate": 4.852941176470589e-06,
163
+ "loss": 1.2301,
164
  "step": 23
165
  },
166
  {
167
  "epoch": 0.21,
168
+ "learning_rate": 5.073529411764706e-06,
169
+ "loss": 1.3231,
170
  "step": 24
171
  },
172
  {
173
  "epoch": 0.22,
174
+ "learning_rate": 5.294117647058824e-06,
175
+ "loss": 1.2689,
176
  "step": 25
177
  },
178
  {
179
  "epoch": 0.23,
180
+ "learning_rate": 5.5147058823529415e-06,
181
+ "loss": 1.2768,
182
  "step": 26
183
  },
184
  {
185
  "epoch": 0.24,
186
+ "learning_rate": 5.735294117647058e-06,
187
+ "loss": 1.2927,
188
  "step": 27
189
  },
190
  {
191
  "epoch": 0.25,
192
+ "learning_rate": 5.955882352941176e-06,
193
+ "loss": 1.3419,
194
  "step": 28
195
  },
196
  {
197
  "epoch": 0.26,
198
+ "learning_rate": 6.176470588235294e-06,
199
+ "loss": 1.141,
200
  "step": 29
201
  },
202
  {
203
  "epoch": 0.27,
204
+ "learning_rate": 6.397058823529412e-06,
205
+ "loss": 1.2907,
206
  "step": 30
207
  },
208
  {
209
  "epoch": 0.28,
210
+ "learning_rate": 6.61764705882353e-06,
211
+ "loss": 1.2248,
212
  "step": 31
213
  },
214
  {
215
  "epoch": 0.29,
216
+ "learning_rate": 6.838235294117647e-06,
217
+ "loss": 1.149,
218
  "step": 32
219
  },
220
  {
221
  "epoch": 0.29,
222
+ "learning_rate": 7.058823529411765e-06,
223
+ "loss": 1.2636,
224
  "step": 33
225
  },
226
  {
227
  "epoch": 0.29,
228
+ "eval_loss": 1.3149287700653076,
229
+ "eval_runtime": 115.745,
230
+ "eval_samples_per_second": 2.851,
231
+ "eval_steps_per_second": 0.095,
232
+ "eval_wer": 177.19942519559316,
233
  "step": 33
234
  },
235
  {
236
  "epoch": 0.3,
237
+ "learning_rate": 7.2794117647058826e-06,
238
+ "loss": 1.2351,
239
  "step": 34
240
  },
241
  {
242
  "epoch": 0.31,
243
+ "learning_rate": 7.5e-06,
244
+ "loss": 1.2055,
245
  "step": 35
246
  },
247
  {
248
  "epoch": 0.32,
249
+ "learning_rate": 7.403846153846155e-06,
250
+ "loss": 1.1864,
251
  "step": 36
252
  },
253
  {
254
  "epoch": 0.33,
255
+ "learning_rate": 7.307692307692308e-06,
256
+ "loss": 1.148,
257
  "step": 37
258
  },
259
  {
260
  "epoch": 0.34,
261
+ "learning_rate": 7.211538461538462e-06,
262
+ "loss": 1.1505,
263
  "step": 38
264
  },
265
  {
266
  "epoch": 1.01,
267
+ "learning_rate": 7.115384615384615e-06,
268
+ "loss": 1.0859,
269
  "step": 39
270
  },
271
  {
272
  "epoch": 1.02,
273
+ "learning_rate": 7.01923076923077e-06,
274
+ "loss": 1.119,
275
  "step": 40
276
  },
277
  {
278
  "epoch": 1.03,
279
+ "learning_rate": 6.923076923076923e-06,
280
+ "loss": 1.0238,
281
  "step": 41
282
  },
283
  {
284
  "epoch": 1.04,
285
+ "learning_rate": 6.826923076923077e-06,
286
+ "loss": 1.0468,
287
  "step": 42
288
  },
289
  {
290
  "epoch": 1.04,
291
+ "learning_rate": 6.730769230769231e-06,
292
+ "loss": 1.061,
293
  "step": 43
294
  },
295
  {
296
  "epoch": 1.05,
297
+ "learning_rate": 6.6346153846153846e-06,
298
+ "loss": 1.0238,
299
  "step": 44
300
  },
301
  {
302
  "epoch": 1.05,
303
+ "eval_loss": 1.1836069822311401,
304
+ "eval_runtime": 109.1507,
305
+ "eval_samples_per_second": 3.023,
306
+ "eval_steps_per_second": 0.101,
307
+ "eval_wer": 199.80839853105542,
308
  "step": 44
309
  },
310
  {
311
  "epoch": 1.06,
312
+ "learning_rate": 6.538461538461539e-06,
313
+ "loss": 1.0016,
314
  "step": 45
315
  },
316
  {
317
  "epoch": 1.07,
318
+ "learning_rate": 6.442307692307692e-06,
319
+ "loss": 0.9635,
320
  "step": 46
321
  },
322
  {
323
  "epoch": 1.08,
324
+ "learning_rate": 6.3461538461538466e-06,
325
+ "loss": 0.9953,
326
  "step": 47
327
  },
328
  {
329
  "epoch": 1.09,
330
+ "learning_rate": 6.25e-06,
331
+ "loss": 1.0061,
332
  "step": 48
333
  },
334
  {
335
  "epoch": 1.1,
336
+ "learning_rate": 6.153846153846154e-06,
337
+ "loss": 0.9933,
338
  "step": 49
339
  },
340
  {
341
  "epoch": 1.11,
342
+ "learning_rate": 6.057692307692308e-06,
343
+ "loss": 0.9479,
344
  "step": 50
345
  },
346
  {
347
  "epoch": 1.12,
348
+ "learning_rate": 5.9615384615384615e-06,
349
+ "loss": 0.9132,
350
  "step": 51
351
  },
352
  {
353
  "epoch": 1.12,
354
+ "learning_rate": 5.865384615384615e-06,
355
+ "loss": 1.0232,
356
  "step": 52
357
  },
358
  {
359
  "epoch": 1.13,
360
+ "learning_rate": 5.76923076923077e-06,
361
+ "loss": 1.0153,
362
  "step": 53
363
  },
364
  {
365
  "epoch": 1.14,
366
+ "learning_rate": 5.673076923076923e-06,
367
+ "loss": 0.9548,
368
  "step": 54
369
  },
370
  {
371
  "epoch": 1.15,
372
+ "learning_rate": 5.576923076923077e-06,
373
+ "loss": 0.9306,
374
  "step": 55
375
  },
376
  {
377
  "epoch": 1.15,
378
+ "eval_loss": 1.1363615989685059,
379
+ "eval_runtime": 115.8861,
380
+ "eval_samples_per_second": 2.848,
381
+ "eval_steps_per_second": 0.095,
382
+ "eval_wer": 210.21874501037843,
383
  "step": 55
384
  },
385
  {
386
  "epoch": 1.16,
387
+ "learning_rate": 5.480769230769231e-06,
388
+ "loss": 0.9088,
389
  "step": 56
390
  },
391
  {
392
  "epoch": 1.17,
393
+ "learning_rate": 5.384615384615385e-06,
394
+ "loss": 0.8902,
395
  "step": 57
396
  },
397
  {
398
  "epoch": 1.18,
399
+ "learning_rate": 5.288461538461539e-06,
400
+ "loss": 0.861,
401
  "step": 58
402
  },
403
  {
404
  "epoch": 1.19,
405
+ "learning_rate": 5.192307692307692e-06,
406
+ "loss": 0.8047,
407
  "step": 59
408
  },
409
  {
410
  "epoch": 1.2,
411
+ "learning_rate": 5.096153846153847e-06,
412
+ "loss": 0.8459,
413
  "step": 60
414
  },
415
  {
416
  "epoch": 1.21,
417
+ "learning_rate": 4.9999999999999996e-06,
418
+ "loss": 0.8103,
419
  "step": 61
420
  },
421
  {
422
  "epoch": 1.21,
423
+ "learning_rate": 4.903846153846154e-06,
424
+ "loss": 0.8752,
425
  "step": 62
426
  },
427
  {
428
  "epoch": 1.22,
429
+ "learning_rate": 4.807692307692308e-06,
430
+ "loss": 0.8398,
431
  "step": 63
432
  },
433
  {
434
  "epoch": 1.23,
435
+ "learning_rate": 4.711538461538462e-06,
436
+ "loss": 0.8571,
437
  "step": 64
438
  },
439
  {
440
  "epoch": 1.24,
441
+ "learning_rate": 4.615384615384616e-06,
442
+ "loss": 0.8998,
443
  "step": 65
444
  },
445
  {
446
  "epoch": 1.25,
447
+ "learning_rate": 4.519230769230769e-06,
448
+ "loss": 0.9233,
449
  "step": 66
450
  },
451
  {
452
  "epoch": 1.25,
453
+ "eval_loss": 1.1090655326843262,
454
+ "eval_runtime": 104.9772,
455
+ "eval_samples_per_second": 3.144,
456
+ "eval_steps_per_second": 0.105,
457
+ "eval_wer": 175.20357656075362,
458
  "step": 66
459
  },
460
  {
461
  "epoch": 1.26,
462
+ "learning_rate": 4.423076923076924e-06,
463
+ "loss": 0.7878,
464
  "step": 67
465
  },
466
  {
467
  "epoch": 1.27,
468
+ "learning_rate": 4.3269230769230765e-06,
469
+ "loss": 0.8948,
470
  "step": 68
471
  },
472
  {
473
  "epoch": 1.28,
474
+ "learning_rate": 4.230769230769231e-06,
475
+ "loss": 0.8711,
476
  "step": 69
477
  },
478
  {
479
  "epoch": 1.29,
480
+ "learning_rate": 4.134615384615385e-06,
481
+ "loss": 0.8292,
482
  "step": 70
483
  },
484
  {
485
  "epoch": 1.29,
486
+ "learning_rate": 4.0384615384615385e-06,
487
+ "loss": 0.9318,
488
  "step": 71
489
  },
490
  {
491
  "epoch": 1.3,
492
+ "learning_rate": 3.942307692307692e-06,
493
+ "loss": 0.9272,
494
  "step": 72
495
  },
496
  {
497
  "epoch": 1.31,
498
+ "learning_rate": 3.846153846153846e-06,
499
+ "loss": 0.8923,
500
  "step": 73
501
  },
502
  {
503
  "epoch": 1.32,
504
+ "learning_rate": 3.75e-06,
505
+ "loss": 0.9013,
506
  "step": 74
507
  },
508
  {
509
  "epoch": 1.33,
510
+ "learning_rate": 3.653846153846154e-06,
511
+ "loss": 0.8659,
512
  "step": 75
513
  },
514
  {
515
  "epoch": 1.34,
516
+ "learning_rate": 3.5576923076923075e-06,
517
+ "loss": 0.8422,
518
  "step": 76
519
  },
520
  {
521
  "epoch": 2.01,
522
+ "learning_rate": 3.4615384615384617e-06,
523
+ "loss": 0.8482,
524
  "step": 77
525
  },
526
  {
527
  "epoch": 2.01,
528
+ "eval_loss": 1.0900639295578003,
529
+ "eval_runtime": 99.705,
530
+ "eval_samples_per_second": 3.31,
531
+ "eval_steps_per_second": 0.11,
532
+ "eval_wer": 161.85534089094685,
533
  "step": 77
534
  },
535
  {
536
  "epoch": 2.02,
537
+ "learning_rate": 3.3653846153846154e-06,
538
+ "loss": 0.8925,
539
  "step": 78
540
  },
541
  {
542
  "epoch": 2.03,
543
+ "learning_rate": 3.2692307692307696e-06,
544
+ "loss": 0.8202,
545
  "step": 79
546
  },
547
  {
548
  "epoch": 2.04,
549
+ "learning_rate": 3.1730769230769233e-06,
550
+ "loss": 0.8384,
551
  "step": 80
552
  },
553
  {
554
  "epoch": 2.04,
555
+ "learning_rate": 3.076923076923077e-06,
556
+ "loss": 0.8469,
557
  "step": 81
558
  },
559
  {
560
  "epoch": 2.05,
561
+ "learning_rate": 2.9807692307692307e-06,
562
+ "loss": 0.8172,
563
  "step": 82
564
  },
565
  {
566
  "epoch": 2.06,
567
+ "learning_rate": 2.884615384615385e-06,
568
+ "loss": 0.8119,
569
  "step": 83
570
  },
571
  {
572
  "epoch": 2.07,
573
+ "learning_rate": 2.7884615384615386e-06,
574
+ "loss": 0.7811,
575
  "step": 84
576
  },
577
  {
578
  "epoch": 2.08,
579
+ "learning_rate": 2.6923076923076923e-06,
580
+ "loss": 0.819,
581
  "step": 85
582
  },
583
  {
584
  "epoch": 2.09,
585
+ "learning_rate": 2.596153846153846e-06,
586
+ "loss": 0.8324,
587
  "step": 86
588
  },
589
  {
590
  "epoch": 2.1,
591
+ "learning_rate": 2.4999999999999998e-06,
592
+ "loss": 0.8336,
593
  "step": 87
594
  },
595
  {
596
  "epoch": 2.11,
597
+ "learning_rate": 2.403846153846154e-06,
598
+ "loss": 0.7929,
599
  "step": 88
600
  },
601
  {
602
  "epoch": 2.11,
603
+ "eval_loss": 1.0782241821289062,
604
+ "eval_runtime": 111.1236,
605
+ "eval_samples_per_second": 2.97,
606
+ "eval_steps_per_second": 0.099,
607
+ "eval_wer": 161.45617116397892,
608
  "step": 88
609
  },
610
  {
611
  "epoch": 2.12,
612
+ "learning_rate": 2.307692307692308e-06,
613
+ "loss": 0.774,
614
  "step": 89
615
  },
616
  {
617
  "epoch": 2.12,
618
+ "learning_rate": 2.211538461538462e-06,
619
+ "loss": 0.8693,
620
  "step": 90
621
  },
622
  {
623
  "epoch": 2.13,
624
+ "learning_rate": 2.1153846153846155e-06,
625
+ "loss": 0.8628,
626
  "step": 91
627
  },
628
  {
629
  "epoch": 2.14,
630
+ "learning_rate": 2.0192307692307692e-06,
631
+ "loss": 0.8199,
632
  "step": 92
633
  },
634
  {
635
  "epoch": 2.15,
636
+ "learning_rate": 1.923076923076923e-06,
637
+ "loss": 0.7995,
638
  "step": 93
639
  },
640
  {
641
  "epoch": 2.16,
642
+ "learning_rate": 1.826923076923077e-06,
643
+ "loss": 0.7824,
644
  "step": 94
645
  },
646
  {
647
  "epoch": 2.17,
648
+ "learning_rate": 1.7307692307692308e-06,
649
+ "loss": 0.7694,
650
  "step": 95
651
  },
652
  {
653
  "epoch": 2.18,
654
+ "learning_rate": 1.6346153846153848e-06,
655
+ "loss": 0.7526,
656
  "step": 96
657
  },
658
  {
659
  "epoch": 2.19,
660
+ "learning_rate": 1.5384615384615385e-06,
661
+ "loss": 0.7037,
662
  "step": 97
663
  },
664
  {
665
  "epoch": 2.2,
666
+ "learning_rate": 1.4423076923076924e-06,
667
+ "loss": 0.7366,
668
  "step": 98
669
  },
670
  {
671
  "epoch": 2.21,
672
+ "learning_rate": 1.3461538461538462e-06,
673
+ "loss": 0.7134,
674
  "step": 99
675
  },
676
  {
677
  "epoch": 2.21,
678
+ "eval_loss": 1.0737253427505493,
679
+ "eval_runtime": 96.9018,
680
+ "eval_samples_per_second": 3.406,
681
+ "eval_steps_per_second": 0.114,
682
+ "eval_wer": 177.1195912501996,
683
  "step": 99
684
  },
685
  {
686
  "epoch": 2.21,
687
+ "learning_rate": 1.2499999999999999e-06,
688
+ "loss": 0.7738,
689
  "step": 100
690
  },
691
  {
692
  "epoch": 2.22,
693
+ "learning_rate": 1.153846153846154e-06,
694
+ "loss": 0.7373,
695
  "step": 101
696
  },
697
  {
698
  "epoch": 2.23,
699
+ "learning_rate": 1.0576923076923078e-06,
700
+ "loss": 0.7623,
701
  "step": 102
702
  },
703
  {
704
  "epoch": 2.24,
705
+ "learning_rate": 9.615384615384615e-07,
706
+ "loss": 0.815,
707
  "step": 103
708
  },
709
  {
710
  "epoch": 2.25,
711
+ "learning_rate": 8.653846153846154e-07,
712
+ "loss": 0.829,
713
  "step": 104
714
  },
715
  {
716
  "epoch": 2.26,
717
+ "learning_rate": 7.692307692307693e-07,
718
+ "loss": 0.7113,
719
  "step": 105
720
  },
721
  {
722
  "epoch": 2.27,
723
+ "learning_rate": 6.730769230769231e-07,
724
+ "loss": 0.8106,
725
  "step": 106
726
  },
727
  {
728
  "epoch": 2.28,
729
+ "learning_rate": 5.76923076923077e-07,
730
+ "loss": 0.7909,
731
  "step": 107
732
  },
733
  {
734
  "epoch": 2.29,
735
+ "learning_rate": 4.807692307692307e-07,
736
+ "loss": 0.7564,
737
  "step": 108
738
  },
739
  {
740
  "epoch": 2.29,
741
+ "learning_rate": 3.8461538461538463e-07,
742
+ "loss": 0.8574,
743
  "step": 109
744
  },
745
  {
746
  "epoch": 2.3,
747
+ "learning_rate": 2.884615384615385e-07,
748
+ "loss": 0.8543,
749
  "step": 110
750
  },
751
  {
752
  "epoch": 2.3,
753
+ "eval_loss": 1.0717276334762573,
754
+ "eval_runtime": 95.923,
755
+ "eval_samples_per_second": 3.44,
756
+ "eval_steps_per_second": 0.115,
757
+ "eval_wer": 187.5459045186013,
758
  "step": 110
759
  },
760
  {
761
  "epoch": 2.31,
762
+ "learning_rate": 1.9230769230769231e-07,
763
+ "loss": 0.824,
764
  "step": 111
765
  },
766
  {
767
  "epoch": 2.32,
768
+ "learning_rate": 9.615384615384616e-08,
769
+ "loss": 0.8352,
770
  "step": 112
771
  },
772
  {
773
  "epoch": 2.32,
774
  "step": 112,
775
  "total_flos": 1.7415399333888e+17,
776
+ "train_loss": 1.0522634956453527,
777
+ "train_runtime": 1735.2585,
778
+ "train_samples_per_second": 4.131,
779
+ "train_steps_per_second": 0.065
780
  }
781
  ],
782
  "max_steps": 112,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23666f2916cd5411d616f8c83fed6cb219a0ba72c70f8943bef9ed5bb45800ed
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4578ee205fe35fdbb3b98224d5f855f806bd9eedb29095f0f731a29261db2d94
3
  size 3579