Update README.md
Browse files
README.md
CHANGED
@@ -44,8 +44,8 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
44 |
|
45 |
test_dataset = load_dataset("common_voice", "vi", split="test") #TODO: replace {lang_id} in your language code here. Make sure the code is one of the *ISO codes* of [this](https://huggingface.co/languages) site.
|
46 |
|
47 |
-
processor = Wav2Vec2Processor.from_pretrained("
|
48 |
-
model = Wav2Vec2ForCTC.from_pretrained("
|
49 |
|
50 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
51 |
|
@@ -88,7 +88,7 @@ processor = Wav2Vec2Processor.from_pretrained("not-tanh/wav2vec2-large-xlsr-53-v
|
|
88 |
model = Wav2Vec2ForCTC.from_pretrained("not-tanh/wav2vec2-large-xlsr-53-vietnamese")
|
89 |
model.to("cuda")
|
90 |
|
91 |
-
chars_to_ignore_regex = '[
|
92 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
93 |
|
94 |
# Preprocessing the datasets.
|
|
|
44 |
|
45 |
test_dataset = load_dataset("common_voice", "vi", split="test") #TODO: replace {lang_id} in your language code here. Make sure the code is one of the *ISO codes* of [this](https://huggingface.co/languages) site.
|
46 |
|
47 |
+
processor = Wav2Vec2Processor.from_pretrained("not-tanh/wav2vec2-large-xlsr-53-vietnamese")
|
48 |
+
model = Wav2Vec2ForCTC.from_pretrained("not-tanh/wav2vec2-large-xlsr-53-vietnamese")
|
49 |
|
50 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
51 |
|
|
|
88 |
model = Wav2Vec2ForCTC.from_pretrained("not-tanh/wav2vec2-large-xlsr-53-vietnamese")
|
89 |
model.to("cuda")
|
90 |
|
91 |
+
chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“%\\\\\\\'�]'
|
92 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
93 |
|
94 |
# Preprocessing the datasets.
|