clr commited on
Commit
2c4f91a
1 Parent(s): cb91338

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -11
app.py CHANGED
@@ -3,8 +3,8 @@ import soundfile as sf
3
  from scipy import signal
4
  import numpy as np
5
  import torch, torchaudio
6
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
7
- from faster_whisper import WhisperModel
8
 
9
  MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
10
  MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
@@ -22,9 +22,10 @@ pipe_is = pipeline(model=MODEL_IS)
22
  pipe_fo = pipeline(model=MODEL_FO)
23
 
24
 
25
- wdevice = "cuda" if torch.cuda.is_available() else "cpu"
26
- whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
27
-
 
28
 
29
 
30
  def readwav(a_f):
@@ -46,12 +47,17 @@ def recc(audio_file,model,processor):
46
  xcp = processor.batch_decode(pred_ids)
47
  return xcp[0]
48
 
49
- def whrecc(audio_file,lang,wmodel):
50
  wav = readwav(audio_file)
51
- xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
52
- txts = [xtp.text for xcp in xcps]
53
- txt = ' '.join(txts)
54
- return txt
 
 
 
 
 
55
 
56
 
57
  def recis(audio_file):
@@ -68,7 +74,7 @@ def recfo(audio_file):
68
 
69
 
70
  def recwhis(audio_file):
71
- wh_output = whrecc(audio_file,"is",whm_is)
72
  return(wh_output)
73
 
74
  def pick_asrc(au_src):
 
3
  from scipy import signal
4
  import numpy as np
5
  import torch, torchaudio
6
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline, WhisperForConditionalGeneration, WhisperProcessor
7
+ #from faster_whisper import WhisperModel
8
 
9
  MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
10
  MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
 
22
  pipe_fo = pipeline(model=MODEL_FO)
23
 
24
 
25
+ #wdevice = "cuda" if torch.cuda.is_available() else "cpu"
26
+ #whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
27
+ whisperprocessor = WhisperProcessor.from_pretrained(MODEL_WHIS)
28
+ whispermodel = WhisperForConditionalGeneration.from_pretrained(MODEL_WHIS)
29
 
30
 
31
  def readwav(a_f):
 
47
  xcp = processor.batch_decode(pred_ids)
48
  return xcp[0]
49
 
50
+ def whrecc(audio_file,whisperprocessor,whispermodel):#lang,wmodel):
51
  wav = readwav(audio_file)
52
+ #xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
53
+ #txts = [xtp.text for xcp in xcps]
54
+ #txt = ' '.join(txts)
55
+ #return txt
56
+ input_features = whisperprocessor(wav, sampling_rate=16000, return_tensors="pt").input_features
57
+ predicted_ids = whispermodel.generate(input_features)
58
+ dec = whisperprocessor.batch_decode(predicted_ids, skip_special_tokens=True,language_id='is')
59
+ xcp = dec[0]
60
+ return(xcp)
61
 
62
 
63
  def recis(audio_file):
 
74
 
75
 
76
  def recwhis(audio_file):
77
+ wh_output = whrecc(audio_file,whisperprocessor,whispermodel)#"is",whm_is)
78
  return(wh_output)
79
 
80
  def pick_asrc(au_src):