update README

Files changed (1) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ model-index:
          value: 16.79
        - name: Test CER
          type: cer
-         value: 40.74
 ---
@@ -124,9 +124,11 @@ model.to(DEVICE)
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 def speech_file_to_array_fn(batch):
-    batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
-    speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
     batch["speech"] = speech_array
     return batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
@@ -143,13 +145,13 @@ def evaluate(batch):
 	batch["pred_strings"] = processor.batch_decode(pred_ids)
 	return batch
-result = test_dataset.map(evaluate, batched=True, batch_size=32)
-print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
-print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
 ```
 **Test Result**:
 - WER: 16.79%
-- CER: 40.74%

          value: 16.79
        - name: Test CER
          type: cer
+         value: 3.68
 ---
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 def speech_file_to_array_fn(batch):
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
     batch["speech"] = speech_array
+    batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
     return batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
 	batch["pred_strings"] = processor.batch_decode(pred_ids)
 	return batch
+result = test_dataset.map(evaluate, batched=True, batch_size=8)
+print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
+print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
 ```
 **Test Result**:
 - WER: 16.79%
+- CER: 3.68%