FrexG commited on
Commit
b82438a
1 Parent(s): 86ff979

Create asr.py

Browse files
Files changed (1) hide show
  1. asr.py +27 -0
asr.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import torchaudio.functional as AF
4
+ from transformers import Wav2Vec2ForCTC, AutoProcessor
5
+ from pydub import AudioSegment
6
+ from pydub.silence import split_on_silence
7
+
8
+
9
+ class Transcribe:
10
+ def __init__(self, freq: float = 16000.0) -> None:
11
+ self.freq = freq
12
+ self.model_id = "facebook/mms-1b-fl102"
13
+ self.processor = AutoProcessor.from_pretrained(self.model_id)
14
+ self.model = Wav2Vec2ForCTC.from_pretrained(self.model_id)
15
+
16
+ @torch.inference_mode()
17
+ def __call__(self, audio_tensor: torch.tensor, lang: str = "amh"):
18
+ print(lang)
19
+ self.processor.tokenizer.set_target_lang(lang)
20
+ self.model.load_adapter(lang)
21
+
22
+ outputs = self.model(audio_tensor)
23
+ logits = outputs.logits
24
+ ids = torch.argmax(logits, dim=-1)[0]
25
+ decoded_token = self.processor.decode(ids)
26
+
27
+ return decoded_token