hanasim commited on
Commit
7bc4bab
1 Parent(s): 4f1efb4

End of training

Browse files
README.md CHANGED
@@ -1,7 +1,12 @@
1
  ---
 
 
2
  license: cc-by-nc-4.0
3
  base_model: facebook/mms-1b-all
4
  tags:
 
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - common_voice_16_0
@@ -14,15 +19,15 @@ model-index:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
- name: common_voice_16_0
18
  type: common_voice_16_0
19
  config: hi
20
  split: test
21
- args: hi
22
  metrics:
23
  - name: Wer
24
  type: wer
25
- value: 0.2522266734082688
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +35,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # wav2vec2-common_voice-hi-mms-demo
32
 
33
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the common_voice_16_0 dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.2673
36
- - Wer: 0.2522
37
 
38
  ## Model description
39
 
 
1
  ---
2
+ language:
3
+ - hi
4
  license: cc-by-nc-4.0
5
  base_model: facebook/mms-1b-all
6
  tags:
7
+ - automatic-speech-recognition
8
+ - mozilla-foundation/common_voice_16_0
9
+ - mms
10
  - generated_from_trainer
11
  datasets:
12
  - common_voice_16_0
 
19
  name: Automatic Speech Recognition
20
  type: automatic-speech-recognition
21
  dataset:
22
+ name: MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - HI
23
  type: common_voice_16_0
24
  config: hi
25
  split: test
26
+ args: 'Config: hi, Training split: train+validation, Eval split: test'
27
  metrics:
28
  - name: Wer
29
  type: wer
30
+ value: 0.2516432655283731
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
35
 
36
  # wav2vec2-common_voice-hi-mms-demo
37
 
38
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - HI dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 0.2672
41
+ - Wer: 0.2516
42
 
43
  ## Model description
44
 
adapter.hin.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e23f3d274deb915f80719d66791a0026a6214af145cea237ac7a044f816a2593
3
+ size 9162368
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_loss": 0.26723530888557434,
4
+ "eval_runtime": 757.5485,
5
+ "eval_samples": 3107,
6
+ "eval_samples_per_second": 4.101,
7
+ "eval_steps_per_second": 0.513,
8
+ "eval_wer": 0.2516432655283731,
9
+ "train_loss": 0.471761938411549,
10
+ "train_runtime": 38765.3056,
11
+ "train_samples": 7084,
12
+ "train_samples_per_second": 0.731,
13
+ "train_steps_per_second": 0.091
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_loss": 0.26723530888557434,
4
+ "eval_runtime": 757.5485,
5
+ "eval_samples": 3107,
6
+ "eval_samples_per_second": 4.101,
7
+ "eval_steps_per_second": 0.513,
8
+ "eval_wer": 0.2516432655283731
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.471761938411549,
4
+ "train_runtime": 38765.3056,
5
+ "train_samples": 7084,
6
+ "train_samples_per_second": 0.731,
7
+ "train_steps_per_second": 0.091
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "eval_steps": 100,
6
+ "global_step": 3544,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11,
13
+ "eval_loss": 0.4486841857433319,
14
+ "eval_runtime": 759.5602,
15
+ "eval_samples_per_second": 4.091,
16
+ "eval_steps_per_second": 0.512,
17
+ "eval_wer": 0.35654000233363153,
18
+ "step": 100
19
+ },
20
+ {
21
+ "epoch": 0.23,
22
+ "eval_loss": 0.3544192612171173,
23
+ "eval_runtime": 754.7374,
24
+ "eval_samples_per_second": 4.117,
25
+ "eval_steps_per_second": 0.515,
26
+ "eval_wer": 0.33168682665007193,
27
+ "step": 200
28
+ },
29
+ {
30
+ "epoch": 0.34,
31
+ "eval_loss": 0.36927035450935364,
32
+ "eval_runtime": 750.419,
33
+ "eval_samples_per_second": 4.14,
34
+ "eval_steps_per_second": 0.518,
35
+ "eval_wer": 0.308817237758158,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 0.45,
40
+ "eval_loss": 0.3404374122619629,
41
+ "eval_runtime": 754.9059,
42
+ "eval_samples_per_second": 4.116,
43
+ "eval_steps_per_second": 0.515,
44
+ "eval_wer": 0.3040332931430127,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 0.56,
49
+ "learning_rate": 0.0008853077816492451,
50
+ "loss": 1.5084,
51
+ "step": 500
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "eval_loss": 0.33462777733802795,
56
+ "eval_runtime": 756.1347,
57
+ "eval_samples_per_second": 4.109,
58
+ "eval_steps_per_second": 0.514,
59
+ "eval_wer": 0.29952160553848545,
60
+ "step": 500
61
+ },
62
+ {
63
+ "epoch": 0.68,
64
+ "eval_loss": 0.34105485677719116,
65
+ "eval_runtime": 756.3042,
66
+ "eval_samples_per_second": 4.108,
67
+ "eval_steps_per_second": 0.514,
68
+ "eval_wer": 0.2935708451635487,
69
+ "step": 600
70
+ },
71
+ {
72
+ "epoch": 0.79,
73
+ "eval_loss": 0.31745076179504395,
74
+ "eval_runtime": 748.8702,
75
+ "eval_samples_per_second": 4.149,
76
+ "eval_steps_per_second": 0.519,
77
+ "eval_wer": 0.2887480066897437,
78
+ "step": 700
79
+ },
80
+ {
81
+ "epoch": 0.9,
82
+ "eval_loss": 0.3159240484237671,
83
+ "eval_runtime": 765.2091,
84
+ "eval_samples_per_second": 4.06,
85
+ "eval_steps_per_second": 0.508,
86
+ "eval_wer": 0.2898370347322158,
87
+ "step": 800
88
+ },
89
+ {
90
+ "epoch": 1.02,
91
+ "eval_loss": 0.3138948380947113,
92
+ "eval_runtime": 752.8617,
93
+ "eval_samples_per_second": 4.127,
94
+ "eval_steps_per_second": 0.517,
95
+ "eval_wer": 0.304538913305589,
96
+ "step": 900
97
+ },
98
+ {
99
+ "epoch": 1.13,
100
+ "learning_rate": 0.000740418118466899,
101
+ "loss": 0.3485,
102
+ "step": 1000
103
+ },
104
+ {
105
+ "epoch": 1.13,
106
+ "eval_loss": 0.3067200779914856,
107
+ "eval_runtime": 749.2127,
108
+ "eval_samples_per_second": 4.147,
109
+ "eval_steps_per_second": 0.519,
110
+ "eval_wer": 0.2957877951071526,
111
+ "step": 1000
112
+ },
113
+ {
114
+ "epoch": 1.24,
115
+ "eval_loss": 0.29693612456321716,
116
+ "eval_runtime": 752.9596,
117
+ "eval_samples_per_second": 4.126,
118
+ "eval_steps_per_second": 0.517,
119
+ "eval_wer": 0.2766909105052312,
120
+ "step": 1100
121
+ },
122
+ {
123
+ "epoch": 1.35,
124
+ "eval_loss": 0.29161983728408813,
125
+ "eval_runtime": 757.6219,
126
+ "eval_samples_per_second": 4.101,
127
+ "eval_steps_per_second": 0.513,
128
+ "eval_wer": 0.2714013457275096,
129
+ "step": 1200
130
+ },
131
+ {
132
+ "epoch": 1.47,
133
+ "eval_loss": 0.2893225848674774,
134
+ "eval_runtime": 751.196,
135
+ "eval_samples_per_second": 4.136,
136
+ "eval_steps_per_second": 0.518,
137
+ "eval_wer": 0.2663451441017463,
138
+ "step": 1300
139
+ },
140
+ {
141
+ "epoch": 1.58,
142
+ "eval_loss": 0.3183298408985138,
143
+ "eval_runtime": 752.5817,
144
+ "eval_samples_per_second": 4.128,
145
+ "eval_steps_per_second": 0.517,
146
+ "eval_wer": 0.29854925907199253,
147
+ "step": 1400
148
+ },
149
+ {
150
+ "epoch": 1.69,
151
+ "learning_rate": 0.0005952380952380953,
152
+ "loss": 0.3152,
153
+ "step": 1500
154
+ },
155
+ {
156
+ "epoch": 1.69,
157
+ "eval_loss": 0.296146422624588,
158
+ "eval_runtime": 747.739,
159
+ "eval_samples_per_second": 4.155,
160
+ "eval_steps_per_second": 0.52,
161
+ "eval_wer": 0.26875656333864884,
162
+ "step": 1500
163
+ },
164
+ {
165
+ "epoch": 1.81,
166
+ "eval_loss": 0.2847990095615387,
167
+ "eval_runtime": 745.157,
168
+ "eval_samples_per_second": 4.17,
169
+ "eval_steps_per_second": 0.522,
170
+ "eval_wer": 0.26653961339504495,
171
+ "step": 1600
172
+ },
173
+ {
174
+ "epoch": 1.92,
175
+ "eval_loss": 0.28440287709236145,
176
+ "eval_runtime": 753.6747,
177
+ "eval_samples_per_second": 4.122,
178
+ "eval_steps_per_second": 0.516,
179
+ "eval_wer": 0.2656450546458714,
180
+ "step": 1700
181
+ },
182
+ {
183
+ "epoch": 2.03,
184
+ "eval_loss": 0.2854562997817993,
185
+ "eval_runtime": 748.224,
186
+ "eval_samples_per_second": 4.153,
187
+ "eval_steps_per_second": 0.52,
188
+ "eval_wer": 0.2707401501302944,
189
+ "step": 1800
190
+ },
191
+ {
192
+ "epoch": 2.14,
193
+ "eval_loss": 0.2886996865272522,
194
+ "eval_runtime": 750.8758,
195
+ "eval_samples_per_second": 4.138,
196
+ "eval_steps_per_second": 0.518,
197
+ "eval_wer": 0.26860098790400994,
198
+ "step": 1900
199
+ },
200
+ {
201
+ "epoch": 2.26,
202
+ "learning_rate": 0.0004500580720092916,
203
+ "loss": 0.3058,
204
+ "step": 2000
205
+ },
206
+ {
207
+ "epoch": 2.26,
208
+ "eval_loss": 0.28578099608421326,
209
+ "eval_runtime": 748.0813,
210
+ "eval_samples_per_second": 4.153,
211
+ "eval_steps_per_second": 0.52,
212
+ "eval_wer": 0.26568394850453114,
213
+ "step": 2000
214
+ },
215
+ {
216
+ "epoch": 2.37,
217
+ "eval_loss": 0.2813587188720703,
218
+ "eval_runtime": 756.7359,
219
+ "eval_samples_per_second": 4.106,
220
+ "eval_steps_per_second": 0.514,
221
+ "eval_wer": 0.2629224845396912,
222
+ "step": 2100
223
+ },
224
+ {
225
+ "epoch": 2.48,
226
+ "eval_loss": 0.2809281349182129,
227
+ "eval_runtime": 759.1954,
228
+ "eval_samples_per_second": 4.092,
229
+ "eval_steps_per_second": 0.512,
230
+ "eval_wer": 0.26331142312628836,
231
+ "step": 2200
232
+ },
233
+ {
234
+ "epoch": 2.6,
235
+ "eval_loss": 0.2778892517089844,
236
+ "eval_runtime": 756.4055,
237
+ "eval_samples_per_second": 4.108,
238
+ "eval_steps_per_second": 0.514,
239
+ "eval_wer": 0.26128894247598305,
240
+ "step": 2300
241
+ },
242
+ {
243
+ "epoch": 2.71,
244
+ "eval_loss": 0.2744755446910858,
245
+ "eval_runtime": 758.3998,
246
+ "eval_samples_per_second": 4.097,
247
+ "eval_steps_per_second": 0.513,
248
+ "eval_wer": 0.25806075220722646,
249
+ "step": 2400
250
+ },
251
+ {
252
+ "epoch": 2.82,
253
+ "learning_rate": 0.0003051684088269454,
254
+ "loss": 0.2861,
255
+ "step": 2500
256
+ },
257
+ {
258
+ "epoch": 2.82,
259
+ "eval_loss": 0.2769048810005188,
260
+ "eval_runtime": 757.4274,
261
+ "eval_samples_per_second": 4.102,
262
+ "eval_steps_per_second": 0.514,
263
+ "eval_wer": 0.26175566877989964,
264
+ "step": 2500
265
+ },
266
+ {
267
+ "epoch": 2.93,
268
+ "eval_loss": 0.2742438316345215,
269
+ "eval_runtime": 761.8109,
270
+ "eval_samples_per_second": 4.078,
271
+ "eval_steps_per_second": 0.511,
272
+ "eval_wer": 0.2575940259033099,
273
+ "step": 2600
274
+ },
275
+ {
276
+ "epoch": 3.05,
277
+ "eval_loss": 0.27301648259162903,
278
+ "eval_runtime": 765.5965,
279
+ "eval_samples_per_second": 4.058,
280
+ "eval_steps_per_second": 0.508,
281
+ "eval_wer": 0.2574773443273307,
282
+ "step": 2700
283
+ },
284
+ {
285
+ "epoch": 3.16,
286
+ "eval_loss": 0.27274471521377563,
287
+ "eval_runtime": 756.2486,
288
+ "eval_samples_per_second": 4.108,
289
+ "eval_steps_per_second": 0.514,
290
+ "eval_wer": 0.25638831628485864,
291
+ "step": 2800
292
+ },
293
+ {
294
+ "epoch": 3.27,
295
+ "eval_loss": 0.27257227897644043,
296
+ "eval_runtime": 762.5719,
297
+ "eval_samples_per_second": 4.074,
298
+ "eval_steps_per_second": 0.51,
299
+ "eval_wer": 0.2562716347088795,
300
+ "step": 2900
301
+ },
302
+ {
303
+ "epoch": 3.39,
304
+ "learning_rate": 0.0001599883855981417,
305
+ "loss": 0.2839,
306
+ "step": 3000
307
+ },
308
+ {
309
+ "epoch": 3.39,
310
+ "eval_loss": 0.2713397443294525,
311
+ "eval_runtime": 762.4867,
312
+ "eval_samples_per_second": 4.075,
313
+ "eval_steps_per_second": 0.51,
314
+ "eval_wer": 0.25755513204465014,
315
+ "step": 3000
316
+ },
317
+ {
318
+ "epoch": 3.5,
319
+ "eval_loss": 0.26895028352737427,
320
+ "eval_runtime": 763.0484,
321
+ "eval_samples_per_second": 4.072,
322
+ "eval_steps_per_second": 0.51,
323
+ "eval_wer": 0.2536657461786784,
324
+ "step": 3100
325
+ },
326
+ {
327
+ "epoch": 3.61,
328
+ "eval_loss": 0.2706141769886017,
329
+ "eval_runtime": 754.9996,
330
+ "eval_samples_per_second": 4.115,
331
+ "eval_steps_per_second": 0.515,
332
+ "eval_wer": 0.2539768970479561,
333
+ "step": 3200
334
+ },
335
+ {
336
+ "epoch": 3.72,
337
+ "eval_loss": 0.26873159408569336,
338
+ "eval_runtime": 760.416,
339
+ "eval_samples_per_second": 4.086,
340
+ "eval_steps_per_second": 0.512,
341
+ "eval_wer": 0.25421026019991444,
342
+ "step": 3300
343
+ },
344
+ {
345
+ "epoch": 3.84,
346
+ "eval_loss": 0.26714619994163513,
347
+ "eval_runtime": 754.0393,
348
+ "eval_samples_per_second": 4.12,
349
+ "eval_steps_per_second": 0.516,
350
+ "eval_wer": 0.2521488856909494,
351
+ "step": 3400
352
+ },
353
+ {
354
+ "epoch": 3.95,
355
+ "learning_rate": 1.5098722415795587e-05,
356
+ "loss": 0.2706,
357
+ "step": 3500
358
+ },
359
+ {
360
+ "epoch": 3.95,
361
+ "eval_loss": 0.267267644405365,
362
+ "eval_runtime": 758.6405,
363
+ "eval_samples_per_second": 4.095,
364
+ "eval_steps_per_second": 0.513,
365
+ "eval_wer": 0.2522266734082688,
366
+ "step": 3500
367
+ },
368
+ {
369
+ "epoch": 4.0,
370
+ "step": 3544,
371
+ "total_flos": 1.1766650360089125e+19,
372
+ "train_loss": 0.471761938411549,
373
+ "train_runtime": 38765.3056,
374
+ "train_samples_per_second": 0.731,
375
+ "train_steps_per_second": 0.091
376
+ }
377
+ ],
378
+ "logging_steps": 500,
379
+ "max_steps": 3544,
380
+ "num_input_tokens_seen": 0,
381
+ "num_train_epochs": 4,
382
+ "save_steps": 200,
383
+ "total_flos": 1.1766650360089125e+19,
384
+ "train_batch_size": 8,
385
+ "trial_name": null,
386
+ "trial_params": null
387
+ }