jonatasgrosman
commited on
Commit
•
4f81e25
1
Parent(s):
b6aa0d6
update README
Browse files
README.md
CHANGED
@@ -27,7 +27,7 @@ model-index:
|
|
27 |
value: 16.79
|
28 |
- name: Test CER
|
29 |
type: cer
|
30 |
-
value:
|
31 |
|
32 |
---
|
33 |
|
@@ -124,9 +124,11 @@ model.to(DEVICE)
|
|
124 |
# Preprocessing the datasets.
|
125 |
# We need to read the audio files as arrays
|
126 |
def speech_file_to_array_fn(batch):
|
127 |
-
|
128 |
-
|
|
|
129 |
batch["speech"] = speech_array
|
|
|
130 |
return batch
|
131 |
|
132 |
test_dataset = test_dataset.map(speech_file_to_array_fn)
|
@@ -143,13 +145,13 @@ def evaluate(batch):
|
|
143 |
batch["pred_strings"] = processor.batch_decode(pred_ids)
|
144 |
return batch
|
145 |
|
146 |
-
result = test_dataset.map(evaluate, batched=True, batch_size=
|
147 |
|
148 |
-
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=
|
149 |
-
print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=
|
150 |
```
|
151 |
|
152 |
**Test Result**:
|
153 |
|
154 |
- WER: 16.79%
|
155 |
-
- CER:
|
27 |
value: 16.79
|
28 |
- name: Test CER
|
29 |
type: cer
|
30 |
+
value: 3.68
|
31 |
|
32 |
---
|
33 |
|
124 |
# Preprocessing the datasets.
|
125 |
# We need to read the audio files as arrays
|
126 |
def speech_file_to_array_fn(batch):
|
127 |
+
with warnings.catch_warnings():
|
128 |
+
warnings.simplefilter("ignore")
|
129 |
+
speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
|
130 |
batch["speech"] = speech_array
|
131 |
+
batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
|
132 |
return batch
|
133 |
|
134 |
test_dataset = test_dataset.map(speech_file_to_array_fn)
|
145 |
batch["pred_strings"] = processor.batch_decode(pred_ids)
|
146 |
return batch
|
147 |
|
148 |
+
result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
149 |
|
150 |
+
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
|
151 |
+
print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
|
152 |
```
|
153 |
|
154 |
**Test Result**:
|
155 |
|
156 |
- WER: 16.79%
|
157 |
+
- CER: 3.68%
|