Update README.md
Browse files
README.md
CHANGED
@@ -23,7 +23,7 @@ model-index:
|
|
23 |
metrics:
|
24 |
- name: Test CER
|
25 |
type: cer
|
26 |
-
value:
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Large-XLSR-53-Chinese-zh-cn-gpt
|
@@ -114,7 +114,7 @@ processor = Wav2Vec2Processor.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-ch
|
|
114 |
model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
|
115 |
model.to("cuda")
|
116 |
|
117 |
-
chars_to_ignore_regex = '[
|
118 |
|
119 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
120 |
|
@@ -145,7 +145,7 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
|
145 |
print("CER: {:2f}".format(100 * chunked_cer(predictions=result["pred_strings"], targets=result["sentence"], chunk_size=1000)))
|
146 |
```
|
147 |
|
148 |
-
**Test Result**:
|
149 |
|
150 |
|
151 |
## Training
|
|
|
23 |
metrics:
|
24 |
- name: Test CER
|
25 |
type: cer
|
26 |
+
value: 41.99
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Large-XLSR-53-Chinese-zh-cn-gpt
|
|
|
114 |
model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
|
115 |
model.to("cuda")
|
116 |
|
117 |
+
chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:"\\“\\%\\‘\\”\\�\\.\\⋯\\!\\-\\:\\–\\。\\》\\,\\)\\,\\?\\;\\~\\~\\…\\︰\\,\\(\\」\\‧\\《\\﹔\\、\\—\\/\\,\\「\\﹖\\·\\×\\̃\\̌\\ε\\λ\\μ\\и\\т\\─\\□\\〈\\〉\\『\\』\\ア\\オ\\カ\\チ\\ド\\ベ\\ャ\\ヤ\\ン\\・\\丶\\a\\b\\f\\g\\i\\n\\p\\t' + "\\']"
|
118 |
|
119 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
120 |
|
|
|
145 |
print("CER: {:2f}".format(100 * chunked_cer(predictions=result["pred_strings"], targets=result["sentence"], chunk_size=1000)))
|
146 |
```
|
147 |
|
148 |
+
**Test Result**: 41.987498 %
|
149 |
|
150 |
|
151 |
## Training
|