fix chars
Browse files
README.md
CHANGED
@@ -92,7 +92,7 @@ processor = Wav2Vec2Processor.from_pretrained("vasilis/wav2vec2-large-xlsr-53-sw
|
|
92 |
model = Wav2Vec2ForCTC.from_pretrained("vasilis/wav2vec2-large-xlsr-53-swedish") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
|
93 |
model.to("cuda")
|
94 |
|
95 |
-
chars_to_ignore_regex = "[\,\?\.\!\-\;\:\"
|
96 |
|
97 |
resampler = {
|
98 |
48_000: torchaudio.transforms.Resample(48_000, 16_000),
|
|
|
92 |
model = Wav2Vec2ForCTC.from_pretrained("vasilis/wav2vec2-large-xlsr-53-swedish") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
|
93 |
model.to("cuda")
|
94 |
|
95 |
+
chars_to_ignore_regex = "[\,\?\.\!\-\;\:\"\“\%\‘\”\�\']" # TODO: adapt this list to include all special characters you removed from the data
|
96 |
|
97 |
resampler = {
|
98 |
48_000: torchaudio.transforms.Resample(48_000, 16_000),
|