jonatasgrosman commited on
Commit
4f81e25
1 Parent(s): b6aa0d6

update README

Browse files

Files changed (1) hide show
  1. README.md +9 -7
README.md CHANGED
@@ -27,7 +27,7 @@ model-index:
27
  value: 16.79
28
  - name: Test CER
29
  type: cer
30
- value: 40.74
31
 
32
  ---
33
 
@@ -124,9 +124,11 @@ model.to(DEVICE)
124
  # Preprocessing the datasets.
125
  # We need to read the audio files as arrays
126
  def speech_file_to_array_fn(batch):
127
- batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
128
- speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
 
129
  batch["speech"] = speech_array
 
130
  return batch
131
 
132
  test_dataset = test_dataset.map(speech_file_to_array_fn)
@@ -143,13 +145,13 @@ def evaluate(batch):
143
  batch["pred_strings"] = processor.batch_decode(pred_ids)
144
  return batch
145
 
146
- result = test_dataset.map(evaluate, batched=True, batch_size=32)
147
 
148
- print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
149
- print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
150
  ```
151
 
152
  **Test Result**:
153
 
154
  - WER: 16.79%
155
- - CER: 40.74%
27
  value: 16.79
28
  - name: Test CER
29
  type: cer
30
+ value: 3.68
31
 
32
  ---
33
 
124
  # Preprocessing the datasets.
125
  # We need to read the audio files as arrays
126
  def speech_file_to_array_fn(batch):
127
+ with warnings.catch_warnings():
128
+ warnings.simplefilter("ignore")
129
+ speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
130
  batch["speech"] = speech_array
131
+ batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
132
  return batch
133
 
134
  test_dataset = test_dataset.map(speech_file_to_array_fn)
145
  batch["pred_strings"] = processor.batch_decode(pred_ids)
146
  return batch
147
 
148
+ result = test_dataset.map(evaluate, batched=True, batch_size=8)
149
 
150
+ print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
151
+ print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
152
  ```
153
 
154
  **Test Result**:
155
 
156
  - WER: 16.79%
157
+ - CER: 3.68%