Add tokenizer_config.json
Browse files- README.md +19 -11
- tokenizer_config.json +1 -0
README.md
CHANGED
@@ -36,29 +36,37 @@ Evaluation is performed on the dev and test portions of the JuzneVesti dataset
|
|
36 |
Tested with `transformers==4.18.0`, `torch==1.11.0`, and `SoundFile==0.10.3.post1`.
|
37 |
|
38 |
```python
|
39 |
-
from transformers import
|
40 |
import soundfile as sf
|
41 |
import torch
|
42 |
import os
|
|
|
43 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
|
44 |
# load model and tokenizer
|
45 |
-
processor =
|
46 |
-
"
|
47 |
-
model = Wav2Vec2ForCTC.from_pretrained("
|
|
|
|
|
48 |
# download the example wav files:
|
49 |
-
os.system("wget https://huggingface.co/classla/wav2vec2-
|
|
|
50 |
# read the wav file
|
51 |
speech, sample_rate = sf.read("00020570a.flac.wav")
|
52 |
-
input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.
|
53 |
-
inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
|
54 |
-
with torch.no_grad():
|
55 |
-
logits = model(**inputs).logits
|
56 |
-
transcription = processor.batch_decode(logits.numpy()).text[0]
|
57 |
|
58 |
# remove the raw wav file
|
59 |
os.system("rm 00020570a.flac.wav")
|
60 |
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
```
|
63 |
|
64 |
|
|
|
36 |
Tested with `transformers==4.18.0`, `torch==1.11.0`, and `SoundFile==0.10.3.post1`.
|
37 |
|
38 |
```python
|
39 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
40 |
import soundfile as sf
|
41 |
import torch
|
42 |
import os
|
43 |
+
|
44 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
45 |
+
|
46 |
# load model and tokenizer
|
47 |
+
processor = Wav2Vec2Processor.from_pretrained(
|
48 |
+
"5roop/wav2vec2-xls-r-juznevesti-sr")
|
49 |
+
model = Wav2Vec2ForCTC.from_pretrained("5roop/wav2vec2-xls-r-juznevesti-sr")
|
50 |
+
|
51 |
+
|
52 |
# download the example wav files:
|
53 |
+
os.system("wget https://huggingface.co/classla/wav2vec2-xls-r-parlaspeech-hr/raw/main/00020570a.flac.wav")
|
54 |
+
|
55 |
# read the wav file
|
56 |
speech, sample_rate = sf.read("00020570a.flac.wav")
|
57 |
+
input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.to(device)
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# remove the raw wav file
|
60 |
os.system("rm 00020570a.flac.wav")
|
61 |
|
62 |
+
# retrieve logits
|
63 |
+
logits = model.to(device)(input_values).logits
|
64 |
+
|
65 |
+
# take argmax and decode
|
66 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
67 |
+
transcription = processor.decode(predicted_ids[0])
|
68 |
+
|
69 |
+
transcription # 'velik broj poslovnih subjekata posluje sa minosom velik deo'
|
70 |
```
|
71 |
|
72 |
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "pad_token": "[PAD]", "do_lower_case": true, "return_attention_mask": true, "do_normalize": true}
|