iqbalc commited on
Commit
9576ce8
1 Parent(s): 42c5999

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -3,23 +3,35 @@ os.system("pip install git+https://github.com/openai/whisper.git")
3
 
4
  import gradio as gr
5
  import whisper
 
6
 
 
7
 
8
- def speech_to_text(tmp_filename,model_size):
9
-
10
- model = whisper.load_model(model_size)
11
- result = model.transcribe(tmp_filename, fp16=False)
12
-
13
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  gr.Interface(
16
- title = 'Whisper: Speech to text Model by OpenAI',
17
- fn=speech_to_text,
18
  inputs=[
19
- gr.Audio(source="microphone", type="filepath"),
20
- gr.Dropdown(choices=["tiny","base","small","medium","large"]),
21
  ],
22
  outputs=[
23
- "text"
24
  ],
25
  live=True).launch()
 
3
 
4
  import gradio as gr
5
  import whisper
6
+ model = whisper.load_model("large")
7
 
8
+ import time
9
 
10
+ def transcribe(audio):
11
+ # load audio for 30 seconds
12
+ audio = whisper.load_audio(audio)
13
+ audio = whisper.pad_or_trim(audio)
14
+
15
+ # make log-Mel spectrogram and move to device as the model
16
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
17
+
18
+ # detect the spoken language
19
+ _, probs = model.detect_language(mel)
20
+ print(f"Detected language: {max(probs, key=probs.get)}")
21
+
22
+ # decoding the audio
23
+ options = whisper.DecodingOptions(fp16 = False)
24
+ result = whisper.decode(model, mel, options)
25
+ print(result.text)
26
+ return result.text
27
 
28
  gr.Interface(
29
+ title = 'Speech to Text with OpenAI (large)',
30
+ fn=transcribe,
31
  inputs=[
32
+ gr.inputs.Audio(source="microphone", type="filepath")
 
33
  ],
34
  outputs=[
35
+ "textbox"
36
  ],
37
  live=True).launch()