iqbalc commited on
Commit
42c5999
1 Parent(s): 3e4d5ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -3,35 +3,23 @@ os.system("pip install git+https://github.com/openai/whisper.git")
3
 
4
  import gradio as gr
5
  import whisper
6
- model = whisper.load_model("large")
7
 
8
- import time
9
 
10
- def transcribe(audio):
11
- # load audio for 30 seconds
12
- audio = whisper.load_audio(audio)
13
- audio = whisper.pad_or_trim(audio)
14
-
15
- # make log-Mel spectrogram and move to device as the model
16
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
17
-
18
- # detect the spoken language
19
- _, probs = model.detect_language(mel)
20
- print(f"Detected language: {max(probs, key=probs.get)}")
21
-
22
- # decoding the audio
23
- options = whisper.DecodingOptions(fp16 = False)
24
- result = whisper.decode(model, mel, options)
25
- print(result.text)
26
- return result.text
27
 
28
  gr.Interface(
29
- title = 'Speech to text with OpenAI (Large)',
30
- fn=transcribe,
31
  inputs=[
32
- gr.inputs.Audio(source="microphone", type="filepath")
 
33
  ],
34
  outputs=[
35
- "textbox"
36
  ],
37
- live=True).launch()
 
3
 
4
  import gradio as gr
5
  import whisper
 
6
 
 
7
 
8
+ def speech_to_text(tmp_filename,model_size):
9
+
10
+ model = whisper.load_model(model_size)
11
+ result = model.transcribe(tmp_filename, fp16=False)
12
+
13
+ return result["text"]
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  gr.Interface(
16
+ title = 'Whisper: Speech to text Model by OpenAI',
17
+ fn=speech_to_text,
18
  inputs=[
19
+ gr.Audio(source="microphone", type="filepath"),
20
+ gr.Dropdown(choices=["tiny","base","small","medium","large"]),
21
  ],
22
  outputs=[
23
+ "text"
24
  ],
25
+ live=True).launch()