5roop commited on
Commit
0414250
1 Parent(s): cfd8171

Add tokenizer_config.json

Browse files
Files changed (2) hide show
  1. README.md +19 -11
  2. tokenizer_config.json +1 -0
README.md CHANGED
@@ -36,29 +36,37 @@ Evaluation is performed on the dev and test portions of the JuzneVesti dataset
36
  Tested with `transformers==4.18.0`, `torch==1.11.0`, and `SoundFile==0.10.3.post1`.
37
 
38
  ```python
39
- from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2ForCTC
40
  import soundfile as sf
41
  import torch
42
  import os
 
43
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
44
  # load model and tokenizer
45
- processor = Wav2Vec2ProcessorWithLM.from_pretrained(
46
- "classla/wav2vec2-large-slavic-parlaspeech-hr-lm")
47
- model = Wav2Vec2ForCTC.from_pretrained("classla/wav2vec2-large-slavic-parlaspeech-hr-lm")
 
 
48
  # download the example wav files:
49
- os.system("wget https://huggingface.co/classla/wav2vec2-large-slavic-parlaspeech-hr-lm/raw/main/00020570a.flac.wav")
 
50
  # read the wav file
51
  speech, sample_rate = sf.read("00020570a.flac.wav")
52
- input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.cuda()
53
- inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
54
- with torch.no_grad():
55
- logits = model(**inputs).logits
56
- transcription = processor.batch_decode(logits.numpy()).text[0]
57
 
58
  # remove the raw wav file
59
  os.system("rm 00020570a.flac.wav")
60
 
61
- transcription # 'velik broj poslovnih subjekata poslao je sa minusom velik dio'
 
 
 
 
 
 
 
62
  ```
63
 
64
 
 
36
  Tested with `transformers==4.18.0`, `torch==1.11.0`, and `SoundFile==0.10.3.post1`.
37
 
38
  ```python
39
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
40
  import soundfile as sf
41
  import torch
42
  import os
43
+
44
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
45
+
46
  # load model and tokenizer
47
+ processor = Wav2Vec2Processor.from_pretrained(
48
+ "5roop/wav2vec2-xls-r-juznevesti-sr")
49
+ model = Wav2Vec2ForCTC.from_pretrained("5roop/wav2vec2-xls-r-juznevesti-sr")
50
+
51
+
52
  # download the example wav files:
53
+ os.system("wget https://huggingface.co/classla/wav2vec2-xls-r-parlaspeech-hr/raw/main/00020570a.flac.wav")
54
+
55
  # read the wav file
56
  speech, sample_rate = sf.read("00020570a.flac.wav")
57
+ input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.to(device)
 
 
 
 
58
 
59
  # remove the raw wav file
60
  os.system("rm 00020570a.flac.wav")
61
 
62
+ # retrieve logits
63
+ logits = model.to(device)(input_values).logits
64
+
65
+ # take argmax and decode
66
+ predicted_ids = torch.argmax(logits, dim=-1)
67
+ transcription = processor.decode(predicted_ids[0])
68
+
69
+ transcription # 'velik broj poslovnih subjekata posluje sa minosom velik deo'
70
  ```
71
 
72
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "pad_token": "[PAD]", "do_lower_case": true, "return_attention_mask": true, "do_normalize": true}