m3hrdadfi commited on
Commit
c7851c5
1 Parent(s): e4e81e1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -13
README.md CHANGED
@@ -26,7 +26,7 @@ model-index:
26
  metrics:
27
  - name: Test WER
28
  type: wer
29
- value: 32.18
30
 
31
  ---
32
 
@@ -81,7 +81,7 @@ chars_to_mapping = {
81
  "ﻟ": "ل", "ﻡ": "م", "ﻢ": "م", "ﻪ": "ه", "ﻮ": "و", "ئ": "ی", 'ﺍ': "ا", 'ة': "ه",
82
  'ﯾ': "ی", 'ﯿ': "ی", 'ﺒ': "ب", 'ﺖ': "ت", 'ﺪ': "د", 'ﺮ': "ر", 'ﺴ': "س", 'ﺷ': "ش",
83
  'ﺸ': "ش", 'ﻋ': "ع", 'ﻤ': "م", 'ﻥ': "ن", 'ﻧ': "ن", 'ﻭ': "و", 'ﺭ': "ر", "ﮔ": "گ",
84
- "\u200c": " ", "\u200d": " ", "\u200e": " ", "\u200f": " ", "\ufeff": " ",
85
  }
86
 
87
  def multiple_replace(text, chars_to_mapping):
@@ -217,11 +217,6 @@ predicted: من سفر کردم را دوست دارم
217
 
218
  The model can be evaluated as follows on the Persian (Farsi) test data of Common Voice.
219
 
220
- ```bash
221
- !mkdir cer
222
- !wget -O cer/cer.py https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese/raw/main/cer.py
223
- ```
224
-
225
  ```python
226
  import librosa
227
  import torch
@@ -250,7 +245,7 @@ chars_to_mapping = {
250
  "ﻟ": "ل", "ﻡ": "م", "ﻢ": "م", "ﻪ": "ه", "ﻮ": "و", "ئ": "ی", 'ﺍ': "ا", 'ة': "ه",
251
  'ﯾ': "ی", 'ﯿ': "ی", 'ﺒ': "ب", 'ﺖ': "ت", 'ﺪ': "د", 'ﺮ': "ر", 'ﺴ': "س", 'ﺷ': "ش",
252
  'ﺸ': "ش", 'ﻋ': "ع", 'ﻤ': "م", 'ﻥ': "ن", 'ﻧ': "ن", 'ﻭ': "و", 'ﺭ': "ر", "ﮔ": "گ",
253
- "\u200c": " ", "\u200d": " ", "\u200e": " ", "\u200f": " ", "\ufeff": " ",
254
  }
255
 
256
  def multiple_replace(text, chars_to_mapping):
@@ -311,15 +306,11 @@ dataset = dataset.map(speech_file_to_array_fn)
311
  result = dataset.map(predict)
312
 
313
  wer = load_metric("wer")
314
- cer = load_metric("./cer")
315
-
316
  print("WER: {:.2f}".format(100 * wer.compute(predictions=result["predicted"], references=result["sentence"])))
317
- print("CER: {:.2f}".format(100 * cer.compute(predictions=result["predicted"], references=result["sentence"])))
318
  ```
319
 
320
  **Test Result:**
321
- - WER: 32.18%
322
- - CER: 8.27%
323
 
324
 
325
  ## Training
 
26
  metrics:
27
  - name: Test WER
28
  type: wer
29
+ value: 32.20
30
 
31
  ---
32
 
 
81
  "ﻟ": "ل", "ﻡ": "م", "ﻢ": "م", "ﻪ": "ه", "ﻮ": "و", "ئ": "ی", 'ﺍ': "ا", 'ة': "ه",
82
  'ﯾ': "ی", 'ﯿ': "ی", 'ﺒ': "ب", 'ﺖ': "ت", 'ﺪ': "د", 'ﺮ': "ر", 'ﺴ': "س", 'ﺷ': "ش",
83
  'ﺸ': "ش", 'ﻋ': "ع", 'ﻤ': "م", 'ﻥ': "ن", 'ﻧ': "ن", 'ﻭ': "و", 'ﺭ': "ر", "ﮔ": "گ",
84
+ "\\u200c": " ", "\\u200d": " ", "\\u200e": " ", "\\u200f": " ", "\\ufeff": " ",
85
  }
86
 
87
  def multiple_replace(text, chars_to_mapping):
 
217
 
218
  The model can be evaluated as follows on the Persian (Farsi) test data of Common Voice.
219
 
 
 
 
 
 
220
  ```python
221
  import librosa
222
  import torch
 
245
  "ﻟ": "ل", "ﻡ": "م", "ﻢ": "م", "ﻪ": "ه", "ﻮ": "و", "ئ": "ی", 'ﺍ': "ا", 'ة': "ه",
246
  'ﯾ': "ی", 'ﯿ': "ی", 'ﺒ': "ب", 'ﺖ': "ت", 'ﺪ': "د", 'ﺮ': "ر", 'ﺴ': "س", 'ﺷ': "ش",
247
  'ﺸ': "ش", 'ﻋ': "ع", 'ﻤ': "م", 'ﻥ': "ن", 'ﻧ': "ن", 'ﻭ': "و", 'ﺭ': "ر", "ﮔ": "گ",
248
+ "\\u200c": " ", "\\u200d": " ", "\\u200e": " ", "\\u200f": " ", "\\ufeff": " ",
249
  }
250
 
251
  def multiple_replace(text, chars_to_mapping):
 
306
  result = dataset.map(predict)
307
 
308
  wer = load_metric("wer")
 
 
309
  print("WER: {:.2f}".format(100 * wer.compute(predictions=result["predicted"], references=result["sentence"])))
 
310
  ```
311
 
312
  **Test Result:**
313
+ - WER: 32.20%
 
314
 
315
 
316
  ## Training