Add tokenizer_config.json

Files changed (2) hide show

README.md CHANGED Viewed

@@ -36,29 +36,37 @@ Evaluation is performed on the dev and test portions of the JuzneVesti dataset
 Tested with `transformers==4.18.0`, `torch==1.11.0`, and `SoundFile==0.10.3.post1`.
 ```python
-from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2ForCTC
 import soundfile as sf
 import torch
 import os
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # load model and tokenizer
-processor = Wav2Vec2ProcessorWithLM.from_pretrained(
-    "classla/wav2vec2-large-slavic-parlaspeech-hr-lm")
-model = Wav2Vec2ForCTC.from_pretrained("classla/wav2vec2-large-slavic-parlaspeech-hr-lm")
 # download the example wav files:
-os.system("wget https://huggingface.co/classla/wav2vec2-large-slavic-parlaspeech-hr-lm/raw/main/00020570a.flac.wav")
 # read the wav file
 speech, sample_rate = sf.read("00020570a.flac.wav")
-input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.cuda()
-inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
-with torch.no_grad():
-    logits = model(**inputs).logits
-transcription = processor.batch_decode(logits.numpy()).text[0]
 # remove the raw wav file
 os.system("rm 00020570a.flac.wav")
-transcription # 'velik broj poslovnih subjekata poslao je sa minusom velik dio'
 ```

 Tested with `transformers==4.18.0`, `torch==1.11.0`, and `SoundFile==0.10.3.post1`.
 ```python
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import soundfile as sf
 import torch
 import os
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # load model and tokenizer
+processor = Wav2Vec2Processor.from_pretrained(
+    "5roop/wav2vec2-xls-r-juznevesti-sr")
+model = Wav2Vec2ForCTC.from_pretrained("5roop/wav2vec2-xls-r-juznevesti-sr")
 # download the example wav files:
+os.system("wget https://huggingface.co/classla/wav2vec2-xls-r-parlaspeech-hr/raw/main/00020570a.flac.wav")
 # read the wav file
 speech, sample_rate = sf.read("00020570a.flac.wav")
+input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.to(device)
 # remove the raw wav file
 os.system("rm 00020570a.flac.wav")
+# retrieve logits
+logits = model.to(device)(input_values).logits
+# take argmax and decode
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.decode(predicted_ids[0])
+transcription # 'velik broj poslovnih subjekata posluje sa minosom velik deo'
 ```

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "pad_token": "[PAD]", "do_lower_case": true, "return_attention_mask": true, "do_normalize": true}