Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,8 @@ import soundfile as sf
|
|
3 |
from scipy import signal
|
4 |
import numpy as np
|
5 |
import torch, torchaudio
|
6 |
-
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
7 |
-
from faster_whisper import WhisperModel
|
8 |
|
9 |
MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
|
10 |
MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
|
@@ -22,9 +22,10 @@ pipe_is = pipeline(model=MODEL_IS)
|
|
22 |
pipe_fo = pipeline(model=MODEL_FO)
|
23 |
|
24 |
|
25 |
-
wdevice = "cuda" if torch.cuda.is_available() else "cpu"
|
26 |
-
whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
|
27 |
-
|
|
|
28 |
|
29 |
|
30 |
def readwav(a_f):
|
@@ -46,12 +47,17 @@ def recc(audio_file,model,processor):
|
|
46 |
xcp = processor.batch_decode(pred_ids)
|
47 |
return xcp[0]
|
48 |
|
49 |
-
def whrecc(audio_file,lang,wmodel):
|
50 |
wav = readwav(audio_file)
|
51 |
-
xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
|
52 |
-
txts = [xtp.text for xcp in xcps]
|
53 |
-
txt = ' '.join(txts)
|
54 |
-
return txt
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
|
57 |
def recis(audio_file):
|
@@ -68,7 +74,7 @@ def recfo(audio_file):
|
|
68 |
|
69 |
|
70 |
def recwhis(audio_file):
|
71 |
-
wh_output = whrecc(audio_file,"is",whm_is)
|
72 |
return(wh_output)
|
73 |
|
74 |
def pick_asrc(au_src):
|
|
|
3 |
from scipy import signal
|
4 |
import numpy as np
|
5 |
import torch, torchaudio
|
6 |
+
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline, WhisperForConditionalGeneration, WhisperProcessor
|
7 |
+
#from faster_whisper import WhisperModel
|
8 |
|
9 |
MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
|
10 |
MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
|
|
|
22 |
pipe_fo = pipeline(model=MODEL_FO)
|
23 |
|
24 |
|
25 |
+
#wdevice = "cuda" if torch.cuda.is_available() else "cpu"
|
26 |
+
#whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
|
27 |
+
whisperprocessor = WhisperProcessor.from_pretrained(MODEL_WHIS)
|
28 |
+
whispermodel = WhisperForConditionalGeneration.from_pretrained(MODEL_WHIS)
|
29 |
|
30 |
|
31 |
def readwav(a_f):
|
|
|
47 |
xcp = processor.batch_decode(pred_ids)
|
48 |
return xcp[0]
|
49 |
|
50 |
+
def whrecc(audio_file,whisperprocessor,whispermodel):#lang,wmodel):
|
51 |
wav = readwav(audio_file)
|
52 |
+
#xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
|
53 |
+
#txts = [xtp.text for xcp in xcps]
|
54 |
+
#txt = ' '.join(txts)
|
55 |
+
#return txt
|
56 |
+
input_features = whisperprocessor(wav, sampling_rate=16000, return_tensors="pt").input_features
|
57 |
+
predicted_ids = whispermodel.generate(input_features)
|
58 |
+
dec = whisperprocessor.batch_decode(predicted_ids, skip_special_tokens=True,language_id='is')
|
59 |
+
xcp = dec[0]
|
60 |
+
return(xcp)
|
61 |
|
62 |
|
63 |
def recis(audio_file):
|
|
|
74 |
|
75 |
|
76 |
def recwhis(audio_file):
|
77 |
+
wh_output = whrecc(audio_file,whisperprocessor,whispermodel)#"is",whm_is)
|
78 |
return(wh_output)
|
79 |
|
80 |
def pick_asrc(au_src):
|