m3hrdadfi commited on
Commit
02f6139
1 Parent(s): ed5ba57

Add more info

Browse files
Files changed (4) hide show
  1. README.md +78 -19
  2. predictions.csv +0 -0
  3. sample1608.flac +0 -0
  4. sample3860.flac +0 -0
README.md CHANGED
@@ -9,10 +9,10 @@ tags:
9
  - xlsr-fine-tuning-week
10
  license: apache-2.0
11
  widget:
12
- - label: Malromur sample 11
13
- src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/sample11.flac
14
- - label: Malromur sample 74
15
- src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/sample74.flac
16
  model-index:
17
  - name: XLSR Wav2Vec2 Icelandic by Mehrdad Farahani
18
  results:
@@ -26,7 +26,7 @@ model-index:
26
  metrics:
27
  - name: Test WER
28
  type: wer
29
- value: 12.00
30
 
31
  ---
32
 
@@ -108,7 +108,7 @@ def predict(batch):
108
 
109
  pred_ids = torch.argmax(logits, dim=-1)
110
 
111
- batch["predicted"] = processor.batch_decode(pred_ids)[0]
112
  return batch
113
 
114
 
@@ -119,16 +119,16 @@ model = Wav2Vec2ForCTC.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-icelandic"
119
  dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
120
  dataset = dataset.map(
121
  normalizer,
122
- fn_kwargs={"remove_extra_space": True},
123
- remove_columns=list(set(dataset.column_names) - set(['sentence', 'path']))
124
  )
125
 
126
  dataset = dataset.map(speech_file_to_array_fn)
127
- result = dataset.map(predict)
128
 
129
  max_items = np.random.randint(0, len(result), 20).tolist()
130
  for i in max_items:
131
- reference, predicted = result["sentence"][i], result["predicted"][i]
132
  print("reference:", reference)
133
  print("predicted:", predicted)
134
  print('---')
@@ -136,13 +136,72 @@ for i in max_items:
136
 
137
  **Output:**
138
  ```text
139
- SOON
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  ```
141
 
142
 
143
  ## Evaluation
144
 
145
- The model can be evaluated as follows on the test data of Common Voice.
146
 
147
  ```python
148
  import librosa
@@ -180,7 +239,7 @@ def predict(batch):
180
 
181
  pred_ids = torch.argmax(logits, dim=-1)
182
 
183
- batch["predicted"] = processor.batch_decode(pred_ids)[0]
184
  return batch
185
 
186
 
@@ -191,21 +250,21 @@ model = Wav2Vec2ForCTC.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-icelandic"
191
  dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
192
  dataset = dataset.map(
193
  normalizer,
194
- fn_kwargs={"remove_extra_space": True},
195
- remove_columns=list(set(dataset.column_names) - set(['sentence', 'path']))
196
  )
197
 
198
  dataset = dataset.map(speech_file_to_array_fn)
199
- result = dataset.map(predict)
200
 
201
  wer = load_metric("wer")
202
 
203
- print("WER: {:.2f}".format(100 * wer.compute(predictions=result["predicted"], references=result["sentence"])))
204
  ```
205
- ]
206
 
207
  **Test Result**:
208
- - WER: 12.00%
209
 
210
 
211
  ## Training & Report
9
  - xlsr-fine-tuning-week
10
  license: apache-2.0
11
  widget:
12
+ - label: Malromur sample 1608
13
+ src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/sample1608.flac
14
+ - label: Malromur sample 3860
15
+ src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/sample3860.flac
16
  model-index:
17
  - name: XLSR Wav2Vec2 Icelandic by Mehrdad Farahani
18
  results:
26
  metrics:
27
  - name: Test WER
28
  type: wer
29
+ value: 10.74
30
 
31
  ---
32
 
108
 
109
  pred_ids = torch.argmax(logits, dim=-1)
110
 
111
+ batch["predicted"] = processor.batch_decode(pred_ids)
112
  return batch
113
 
114
 
119
  dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
120
  dataset = dataset.map(
121
  normalizer,
122
+ fn_kwargs={"do_lastspace_removing": True, "text_key_name": "cleaned_sentence"},
123
+ remove_columns=list(set(dataset.column_names) - set(['cleaned_sentence', 'path']))
124
  )
125
 
126
  dataset = dataset.map(speech_file_to_array_fn)
127
+ result = dataset.map(predict, batched=True, batch_size=8)
128
 
129
  max_items = np.random.randint(0, len(result), 20).tolist()
130
  for i in max_items:
131
+ reference, predicted = result["cleaned_sentence"][i], result["predicted"][i]
132
  print("reference:", reference)
133
  print("predicted:", predicted)
134
  print('---')
136
 
137
  **Output:**
138
  ```text
139
+ reference: lögregla rakti sporin í snjónum
140
+ predicted: lögregla rakti sporinn í snjónum
141
+ ---
142
+ reference: vaðlatúni
143
+ predicted: vaðlatúni
144
+ ---
145
+ reference: mykjunesi
146
+ predicted: mikjunesi
147
+ ---
148
+ reference: miðey
149
+ predicted: miðey
150
+ ---
151
+ reference: tveir mótmæla við stjórnarráðsbygginguna
152
+ predicted: tveir mótmæla við stjórnarráðsbegginguna
153
+ ---
154
+ reference: furðustrandir mest selda bók ársins
155
+ predicted: furðustrandir mest seldabók ársins
156
+ ---
157
+ reference: flekar brenndir í kvöld
158
+ predicted: flekar brenndir í kvöld
159
+ ---
160
+ reference: ástæðan er sögð eldgosið í grímsvötnum
161
+ predicted: ástæðan er sögð eldgosið í grímsvötnum
162
+ ---
163
+ reference: birtingur
164
+ predicted: birtingur
165
+ ---
166
+ reference: tvöþúsund og átján
167
+ predicted: tvöþúsund og átján
168
+ ---
169
+ reference: einfríður
170
+ predicted: einfríður
171
+ ---
172
+ reference: dalhúsum
173
+ predicted: dalhúsum
174
+ ---
175
+ reference: sex stútar á ferð
176
+ predicted: sex stútar á ferð
177
+ ---
178
+ reference: eyjamenn áfram í toppbaráttu
179
+ predicted: eyjamenn áfram í toppbaráttu
180
+ ---
181
+ reference: þetta októberkvöld sýndi sitt rétta andlit með hráslagakulda frá vatninu
182
+ predicted: þetta októberkvöld sýnsint réttla andlit með hráslagakulda frá vatninu
183
+ ---
184
+ reference: jes
185
+ predicted: js
186
+ ---
187
+ reference: hersveitirnar benda hvor á aðra
188
+ predicted: hersveitirnar benda hvor á aðra
189
+ ---
190
+ reference: þetta er hráskinnsleikur stórvelda eins og hver maður vissi
191
+ predicted: þetta er hráskinnsleikur stórvelda eins og hver maður vissi
192
+ ---
193
+ reference: umferð efstu deildar hófst
194
+ predicted: umferð efstu deildar hófst
195
+ ---
196
+ reference: freisting is
197
+ predicted: freisting is
198
+ ---
199
  ```
200
 
201
 
202
  ## Evaluation
203
 
204
+ The model can be evaluated as follows on the test data of Malromur.
205
 
206
  ```python
207
  import librosa
239
 
240
  pred_ids = torch.argmax(logits, dim=-1)
241
 
242
+ batch["predicted"] = processor.batch_decode(pred_ids)
243
  return batch
244
 
245
 
250
  dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
251
  dataset = dataset.map(
252
  normalizer,
253
+ fn_kwargs={"do_lastspace_removing": True, "text_key_name": "cleaned_sentence"},
254
+ remove_columns=list(set(dataset.column_names) - set(['cleaned_sentence', 'path']))
255
  )
256
 
257
  dataset = dataset.map(speech_file_to_array_fn)
258
+ result = dataset.map(predict, batched=True, batch_size=8)
259
 
260
  wer = load_metric("wer")
261
 
262
+ print("WER: {:.2f}".format(100 * wer.compute(predictions=result["predicted"], references=result["cleaned_sentence"])))
263
  ```
264
+
265
 
266
  **Test Result**:
267
+ - WER: 10.74%
268
 
269
 
270
  ## Training & Report
predictions.csv ADDED
The diff for this file is too large to render. See raw diff
sample1608.flac ADDED
Binary file (109 kB). View file
sample3860.flac ADDED
Binary file (75.7 kB). View file