patrickvonplaten
commited on
Commit
β’
81a3094
1
Parent(s):
c748b0c
Update README.md
Browse files
README.md
CHANGED
@@ -34,8 +34,8 @@ import torchaudio
|
|
34 |
from datasets import load_dataset
|
35 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
36 |
test_dataset = load_dataset("common_voice", "ga-IE", split="test[:2%]").
|
37 |
-
processor = Wav2Vec2Processor.from_pretrained("manandey/wav2vec2-large-xlsr-
|
38 |
-
model = Wav2Vec2ForCTC.from_pretrained("manandey/wav2vec2-large-xlsr-
|
39 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
40 |
# Preprocessing the datasets.
|
41 |
# We need to read the aduio files as arrays
|
@@ -61,10 +61,10 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
61 |
import re
|
62 |
test_dataset = load_dataset("common_voice", "ga-IE", split="test")
|
63 |
wer = load_metric("wer")
|
64 |
-
processor = Wav2Vec2Processor.from_pretrained("manandey/wav2vec2-large-xlsr-
|
65 |
-
model = Wav2Vec2ForCTC.from_pretrained("manandey/wav2vec2-large-xlsr-
|
66 |
model.to("cuda")
|
67 |
-
chars_to_ignore_regex = '[
|
68 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
69 |
# Preprocessing the datasets.
|
70 |
# We need to read the aduio files as arrays
|
|
|
34 |
from datasets import load_dataset
|
35 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
36 |
test_dataset = load_dataset("common_voice", "ga-IE", split="test[:2%]").
|
37 |
+
processor = Wav2Vec2Processor.from_pretrained("manandey/wav2vec2-large-xlsr-_irish")
|
38 |
+
model = Wav2Vec2ForCTC.from_pretrained("manandey/wav2vec2-large-xlsr-_irish")
|
39 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
40 |
# Preprocessing the datasets.
|
41 |
# We need to read the aduio files as arrays
|
|
|
61 |
import re
|
62 |
test_dataset = load_dataset("common_voice", "ga-IE", split="test")
|
63 |
wer = load_metric("wer")
|
64 |
+
processor = Wav2Vec2Processor.from_pretrained("manandey/wav2vec2-large-xlsr-_irish")
|
65 |
+
model = Wav2Vec2ForCTC.from_pretrained("manandey/wav2vec2-large-xlsr-_irish")
|
66 |
model.to("cuda")
|
67 |
+
chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\β\\%\\β\\β\\οΏ½\\β\\β\\(\\)]'
|
68 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
69 |
# Preprocessing the datasets.
|
70 |
# We need to read the aduio files as arrays
|