cotxetj commited on
Commit
37b0e3a
1 Parent(s): a426fc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -2,9 +2,25 @@ import torch
2
  from transformers import pipeline, VitsModel, VitsTokenizer
3
  import numpy as np
4
  import gradio as gr
 
5
 
6
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  # Load Whisper-small
9
  pipe = pipeline("automatic-speech-recognition",
10
  model="openai/whisper-small",
@@ -20,6 +36,7 @@ tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-fra")
20
 
21
  # Define a function to translate an audio, in english here
22
  def translate(audio):
 
23
  outputs = pipe(audio, max_new_tokens=256,
24
  generate_kwargs={"task": "transcribe", "language": "english"})
25
  return outputs["text"]
 
2
  from transformers import pipeline, VitsModel, VitsTokenizer
3
  import numpy as np
4
  import gradio as gr
5
+ import whisper
6
 
7
+ model = whisper.load_model("small")
8
 
9
+ def inference(audio):
10
+ audio = whisper.load_audio(audio)
11
+ audio = whisper.pad_or_trim(audio)
12
+
13
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
14
+
15
+ _, probs = model.detect_language(mel)
16
+
17
+ options = whisper.DecodingOptions(fp16 = False)
18
+ result = whisper.decode(model, mel, options)
19
+
20
+ print(result.text)
21
+ return result.text
22
+
23
+
24
  # Load Whisper-small
25
  pipe = pipeline("automatic-speech-recognition",
26
  model="openai/whisper-small",
 
36
 
37
  # Define a function to translate an audio, in english here
38
  def translate(audio):
39
+ return inference(audio)
40
  outputs = pipe(audio, max_new_tokens=256,
41
  generate_kwargs={"task": "transcribe", "language": "english"})
42
  return outputs["text"]