csikasote commited on
Commit
a953690
·
verified ·
1 Parent(s): bd7e5f5

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +19,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-bemgen-male-model-test
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.3060
22
- - Wer: 0.4447
23
 
24
  ## Model description
25
 
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - bemgen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-bemgen-male-model-test
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the BEMGEN - BEM dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.3060
25
+ - Wer: 0.4449
26
 
27
  ## Model description
28
 
adapter.bem.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dace1066339cb4d1a602cbf1eb359c6b201ad7f1a5296c6fbf9a18373278352b
3
+ size 8798532
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.688728024819028,
3
+ "eval_loss": 0.3059963285923004,
4
+ "eval_runtime": 31.5421,
5
+ "eval_samples": 491,
6
+ "eval_samples_per_second": 15.567,
7
+ "eval_steps_per_second": 3.9,
8
+ "eval_wer": 0.4448742746615087,
9
+ "total_flos": 6.22653785519667e+18,
10
+ "train_loss": 0.718606931246244,
11
+ "train_runtime": 2586.1395,
12
+ "train_samples": 3866,
13
+ "train_samples_per_second": 44.847,
14
+ "train_steps_per_second": 11.217
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.688728024819028,
3
+ "eval_loss": 0.3059963285923004,
4
+ "eval_runtime": 31.5421,
5
+ "eval_samples": 491,
6
+ "eval_samples_per_second": 15.567,
7
+ "eval_steps_per_second": 3.9,
8
+ "eval_wer": 0.4448742746615087
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.688728024819028,
3
+ "total_flos": 6.22653785519667e+18,
4
+ "train_loss": 0.718606931246244,
5
+ "train_runtime": 2586.1395,
6
+ "train_samples": 3866,
7
+ "train_samples_per_second": 44.847,
8
+ "train_steps_per_second": 11.217
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.30534103512763977,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-male-model-test/checkpoint-2300",
4
+ "epoch": 2.688728024819028,
5
+ "eval_steps": 100,
6
+ "global_step": 2600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10341261633919338,
13
+ "grad_norm": 3.1888983249664307,
14
+ "learning_rate": 0.00028799999999999995,
15
+ "loss": 6.9809,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.10341261633919338,
20
+ "eval_loss": 1.3138622045516968,
21
+ "eval_runtime": 31.0692,
22
+ "eval_samples_per_second": 15.803,
23
+ "eval_steps_per_second": 3.959,
24
+ "eval_wer": 0.9957016978293574,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.20682523267838676,
29
+ "grad_norm": 2.4408092498779297,
30
+ "learning_rate": 0.0002990038049117952,
31
+ "loss": 0.745,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.20682523267838676,
36
+ "eval_loss": 0.4297034740447998,
37
+ "eval_runtime": 31.0581,
38
+ "eval_samples_per_second": 15.809,
39
+ "eval_steps_per_second": 3.96,
40
+ "eval_wer": 0.5882226520524393,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.31023784901758017,
45
+ "grad_norm": 1.8350099325180054,
46
+ "learning_rate": 0.0002979661016949152,
47
+ "loss": 0.5423,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.31023784901758017,
52
+ "eval_loss": 0.3886265754699707,
53
+ "eval_runtime": 30.9037,
54
+ "eval_samples_per_second": 15.888,
55
+ "eval_steps_per_second": 3.98,
56
+ "eval_wer": 0.5643670750053729,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.4136504653567735,
61
+ "grad_norm": 4.089741230010986,
62
+ "learning_rate": 0.00029692839847803525,
63
+ "loss": 0.539,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 0.4136504653567735,
68
+ "eval_loss": 0.36830800771713257,
69
+ "eval_runtime": 30.9142,
70
+ "eval_samples_per_second": 15.883,
71
+ "eval_steps_per_second": 3.979,
72
+ "eval_wer": 0.5448098001289491,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 0.5170630816959669,
77
+ "grad_norm": 2.093287706375122,
78
+ "learning_rate": 0.0002958906952611553,
79
+ "loss": 0.5277,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.5170630816959669,
84
+ "eval_loss": 0.3528771698474884,
85
+ "eval_runtime": 31.284,
86
+ "eval_samples_per_second": 15.695,
87
+ "eval_steps_per_second": 3.932,
88
+ "eval_wer": 0.508274231678487,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 0.6204756980351603,
93
+ "grad_norm": 1.6442524194717407,
94
+ "learning_rate": 0.0002948529920442753,
95
+ "loss": 0.4708,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 0.6204756980351603,
100
+ "eval_loss": 0.3492579162120819,
101
+ "eval_runtime": 31.108,
102
+ "eval_samples_per_second": 15.784,
103
+ "eval_steps_per_second": 3.954,
104
+ "eval_wer": 0.4996776273372018,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.7238883143743536,
109
+ "grad_norm": 2.5362725257873535,
110
+ "learning_rate": 0.00029381528882739533,
111
+ "loss": 0.4889,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 0.7238883143743536,
116
+ "eval_loss": 0.3467372953891754,
117
+ "eval_runtime": 31.1038,
118
+ "eval_samples_per_second": 15.786,
119
+ "eval_steps_per_second": 3.954,
120
+ "eval_wer": 0.5095637223296797,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 0.827300930713547,
125
+ "grad_norm": 4.026537895202637,
126
+ "learning_rate": 0.00029277758561051535,
127
+ "loss": 0.4793,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 0.827300930713547,
132
+ "eval_loss": 0.34066715836524963,
133
+ "eval_runtime": 30.9432,
134
+ "eval_samples_per_second": 15.868,
135
+ "eval_steps_per_second": 3.975,
136
+ "eval_wer": 0.48183967332903505,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 0.9307135470527405,
141
+ "grad_norm": 4.126669406890869,
142
+ "learning_rate": 0.0002917398823936354,
143
+ "loss": 0.469,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 0.9307135470527405,
148
+ "eval_loss": 0.3455369174480438,
149
+ "eval_runtime": 31.0926,
150
+ "eval_samples_per_second": 15.792,
151
+ "eval_steps_per_second": 3.956,
152
+ "eval_wer": 0.49580915538362347,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 1.0341261633919339,
157
+ "grad_norm": 1.1861900091171265,
158
+ "learning_rate": 0.0002907021791767554,
159
+ "loss": 0.4407,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 1.0341261633919339,
164
+ "eval_loss": 0.3328763544559479,
165
+ "eval_runtime": 31.2336,
166
+ "eval_samples_per_second": 15.72,
167
+ "eval_steps_per_second": 3.938,
168
+ "eval_wer": 0.473457984096282,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 1.1375387797311272,
173
+ "grad_norm": 1.2025549411773682,
174
+ "learning_rate": 0.00028967485299204426,
175
+ "loss": 0.4524,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 1.1375387797311272,
180
+ "eval_loss": 0.32889580726623535,
181
+ "eval_runtime": 31.3882,
182
+ "eval_samples_per_second": 15.643,
183
+ "eval_steps_per_second": 3.919,
184
+ "eval_wer": 0.4878572963679347,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 1.2409513960703205,
189
+ "grad_norm": 1.4938141107559204,
190
+ "learning_rate": 0.0002886371497751643,
191
+ "loss": 0.4416,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 1.2409513960703205,
196
+ "eval_loss": 0.327963262796402,
197
+ "eval_runtime": 30.9988,
198
+ "eval_samples_per_second": 15.839,
199
+ "eval_steps_per_second": 3.968,
200
+ "eval_wer": 0.4910810229959166,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 1.344364012409514,
205
+ "grad_norm": 1.22685706615448,
206
+ "learning_rate": 0.0002875994465582843,
207
+ "loss": 0.4599,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 1.344364012409514,
212
+ "eval_loss": 0.3284839391708374,
213
+ "eval_runtime": 31.2403,
214
+ "eval_samples_per_second": 15.717,
215
+ "eval_steps_per_second": 3.937,
216
+ "eval_wer": 0.4764667956157318,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 1.4477766287487073,
221
+ "grad_norm": 1.3746376037597656,
222
+ "learning_rate": 0.00028656174334140434,
223
+ "loss": 0.4739,
224
+ "step": 1400
225
+ },
226
+ {
227
+ "epoch": 1.4477766287487073,
228
+ "eval_loss": 0.3221452236175537,
229
+ "eval_runtime": 31.2054,
230
+ "eval_samples_per_second": 15.734,
231
+ "eval_steps_per_second": 3.942,
232
+ "eval_wer": 0.4693745970341715,
233
+ "step": 1400
234
+ },
235
+ {
236
+ "epoch": 1.5511892450879006,
237
+ "grad_norm": 1.6073061227798462,
238
+ "learning_rate": 0.00028552404012452436,
239
+ "loss": 0.4466,
240
+ "step": 1500
241
+ },
242
+ {
243
+ "epoch": 1.5511892450879006,
244
+ "eval_loss": 0.3196486234664917,
245
+ "eval_runtime": 31.0094,
246
+ "eval_samples_per_second": 15.834,
247
+ "eval_steps_per_second": 3.967,
248
+ "eval_wer": 0.4588437567160971,
249
+ "step": 1500
250
+ },
251
+ {
252
+ "epoch": 1.654601861427094,
253
+ "grad_norm": 1.1827540397644043,
254
+ "learning_rate": 0.0002844967139398132,
255
+ "loss": 0.4483,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 1.654601861427094,
260
+ "eval_loss": 0.3144252896308899,
261
+ "eval_runtime": 31.0445,
262
+ "eval_samples_per_second": 15.816,
263
+ "eval_steps_per_second": 3.962,
264
+ "eval_wer": 0.4526112185686654,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "epoch": 1.7580144777662876,
269
+ "grad_norm": 1.335554599761963,
270
+ "learning_rate": 0.0002834590107229332,
271
+ "loss": 0.4543,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 1.7580144777662876,
276
+ "eval_loss": 0.31697967648506165,
277
+ "eval_runtime": 31.4988,
278
+ "eval_samples_per_second": 15.588,
279
+ "eval_steps_per_second": 3.905,
280
+ "eval_wer": 0.4528261336771975,
281
+ "step": 1700
282
+ },
283
+ {
284
+ "epoch": 1.861427094105481,
285
+ "grad_norm": 2.555410385131836,
286
+ "learning_rate": 0.00028242130750605327,
287
+ "loss": 0.4537,
288
+ "step": 1800
289
+ },
290
+ {
291
+ "epoch": 1.861427094105481,
292
+ "eval_loss": 0.3140595555305481,
293
+ "eval_runtime": 31.3179,
294
+ "eval_samples_per_second": 15.678,
295
+ "eval_steps_per_second": 3.927,
296
+ "eval_wer": 0.4521813883516011,
297
+ "step": 1800
298
+ },
299
+ {
300
+ "epoch": 1.9648397104446742,
301
+ "grad_norm": 1.150947093963623,
302
+ "learning_rate": 0.0002813836042891733,
303
+ "loss": 0.4293,
304
+ "step": 1900
305
+ },
306
+ {
307
+ "epoch": 1.9648397104446742,
308
+ "eval_loss": 0.31063735485076904,
309
+ "eval_runtime": 31.1712,
310
+ "eval_samples_per_second": 15.752,
311
+ "eval_steps_per_second": 3.946,
312
+ "eval_wer": 0.44530410487857297,
313
+ "step": 1900
314
+ },
315
+ {
316
+ "epoch": 2.0682523267838677,
317
+ "grad_norm": 0.674537181854248,
318
+ "learning_rate": 0.00028034590107229326,
319
+ "loss": 0.4457,
320
+ "step": 2000
321
+ },
322
+ {
323
+ "epoch": 2.0682523267838677,
324
+ "eval_loss": 0.3134273290634155,
325
+ "eval_runtime": 31.1408,
326
+ "eval_samples_per_second": 15.767,
327
+ "eval_steps_per_second": 3.95,
328
+ "eval_wer": 0.4650762948635289,
329
+ "step": 2000
330
+ },
331
+ {
332
+ "epoch": 2.1716649431230612,
333
+ "grad_norm": 0.9923437237739563,
334
+ "learning_rate": 0.00027930819785541334,
335
+ "loss": 0.4214,
336
+ "step": 2100
337
+ },
338
+ {
339
+ "epoch": 2.1716649431230612,
340
+ "eval_loss": 0.3119480609893799,
341
+ "eval_runtime": 31.4395,
342
+ "eval_samples_per_second": 15.617,
343
+ "eval_steps_per_second": 3.912,
344
+ "eval_wer": 0.4543305394369224,
345
+ "step": 2100
346
+ },
347
+ {
348
+ "epoch": 2.2750775594622543,
349
+ "grad_norm": 0.9173412919044495,
350
+ "learning_rate": 0.00027827049463853337,
351
+ "loss": 0.4103,
352
+ "step": 2200
353
+ },
354
+ {
355
+ "epoch": 2.2750775594622543,
356
+ "eval_loss": 0.3089054822921753,
357
+ "eval_runtime": 31.248,
358
+ "eval_samples_per_second": 15.713,
359
+ "eval_steps_per_second": 3.936,
360
+ "eval_wer": 0.43907156673114117,
361
+ "step": 2200
362
+ },
363
+ {
364
+ "epoch": 2.378490175801448,
365
+ "grad_norm": 3.1410484313964844,
366
+ "learning_rate": 0.0002772327914216534,
367
+ "loss": 0.407,
368
+ "step": 2300
369
+ },
370
+ {
371
+ "epoch": 2.378490175801448,
372
+ "eval_loss": 0.30534103512763977,
373
+ "eval_runtime": 31.5782,
374
+ "eval_samples_per_second": 15.549,
375
+ "eval_steps_per_second": 3.895,
376
+ "eval_wer": 0.43305394369224154,
377
+ "step": 2300
378
+ },
379
+ {
380
+ "epoch": 2.481902792140641,
381
+ "grad_norm": 1.2900283336639404,
382
+ "learning_rate": 0.0002761950882047734,
383
+ "loss": 0.4314,
384
+ "step": 2400
385
+ },
386
+ {
387
+ "epoch": 2.481902792140641,
388
+ "eval_loss": 0.30588600039482117,
389
+ "eval_runtime": 31.1067,
390
+ "eval_samples_per_second": 15.784,
391
+ "eval_steps_per_second": 3.954,
392
+ "eval_wer": 0.433698689017838,
393
+ "step": 2400
394
+ },
395
+ {
396
+ "epoch": 2.5853154084798344,
397
+ "grad_norm": 1.6208014488220215,
398
+ "learning_rate": 0.00027515738498789345,
399
+ "loss": 0.4144,
400
+ "step": 2500
401
+ },
402
+ {
403
+ "epoch": 2.5853154084798344,
404
+ "eval_loss": 0.30535265803337097,
405
+ "eval_runtime": 31.4136,
406
+ "eval_samples_per_second": 15.63,
407
+ "eval_steps_per_second": 3.915,
408
+ "eval_wer": 0.4382119062970127,
409
+ "step": 2500
410
+ },
411
+ {
412
+ "epoch": 2.688728024819028,
413
+ "grad_norm": 1.7971055507659912,
414
+ "learning_rate": 0.0002741196817710135,
415
+ "loss": 0.4099,
416
+ "step": 2600
417
+ },
418
+ {
419
+ "epoch": 2.688728024819028,
420
+ "eval_loss": 0.3059903085231781,
421
+ "eval_runtime": 31.3346,
422
+ "eval_samples_per_second": 15.67,
423
+ "eval_steps_per_second": 3.925,
424
+ "eval_wer": 0.4446593595529766,
425
+ "step": 2600
426
+ },
427
+ {
428
+ "epoch": 2.688728024819028,
429
+ "step": 2600,
430
+ "total_flos": 6.22653785519667e+18,
431
+ "train_loss": 0.718606931246244,
432
+ "train_runtime": 2586.1395,
433
+ "train_samples_per_second": 44.847,
434
+ "train_steps_per_second": 11.217
435
+ }
436
+ ],
437
+ "logging_steps": 100,
438
+ "max_steps": 29010,
439
+ "num_input_tokens_seen": 0,
440
+ "num_train_epochs": 30,
441
+ "save_steps": 400,
442
+ "stateful_callbacks": {
443
+ "EarlyStoppingCallback": {
444
+ "args": {
445
+ "early_stopping_patience": 3,
446
+ "early_stopping_threshold": 0.0
447
+ },
448
+ "attributes": {
449
+ "early_stopping_patience_counter": 1
450
+ }
451
+ },
452
+ "TrainerControl": {
453
+ "args": {
454
+ "should_epoch_stop": false,
455
+ "should_evaluate": false,
456
+ "should_log": false,
457
+ "should_save": true,
458
+ "should_training_stop": false
459
+ },
460
+ "attributes": {}
461
+ }
462
+ },
463
+ "total_flos": 6.22653785519667e+18,
464
+ "train_batch_size": 4,
465
+ "trial_name": null,
466
+ "trial_params": null
467
+ }