patrickvonplaten
commited on
Commit
•
6f98791
1
Parent(s):
c6b7538
Update README.md
Browse files
README.md
CHANGED
@@ -82,11 +82,13 @@ import re
|
|
82 |
|
83 |
test_dataset = load_dataset("common_voice", "rm-sursilv", split="test")
|
84 |
|
|
|
|
|
85 |
processor = Wav2Vec2Processor.from_pretrained("gchhablani/wav2vec2-large-xlsr-rm-sursilv")
|
86 |
model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-rm-sursilv")
|
87 |
model.to("cuda")
|
88 |
|
89 |
-
chars_to_ignore_regex = '[
|
90 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
91 |
|
92 |
# Preprocessing the datasets.
|
82 |
|
83 |
test_dataset = load_dataset("common_voice", "rm-sursilv", split="test")
|
84 |
|
85 |
+
wer = load_metric("wer")
|
86 |
+
|
87 |
processor = Wav2Vec2Processor.from_pretrained("gchhablani/wav2vec2-large-xlsr-rm-sursilv")
|
88 |
model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-rm-sursilv")
|
89 |
model.to("cuda")
|
90 |
|
91 |
+
chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“\\%\\‘\\”\\�\\…\\«\\»\\–]'
|
92 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
93 |
|
94 |
# Preprocessing the datasets.
|