aka7774 commited on
Commit
8c3ebd8
1 Parent(s): 3fe3866

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
- import whisper
3
- #from faster_whisper import WhisperModel
4
 
5
- model_size = 'large-v3'
6
- model = whisper.load_model(model_size)
7
  #model = WhisperModel(model_size, device="cuda", compute_type="float16")
8
- #model = WhisperModel(model_size, compute_type="float16")
9
 
10
  # or run on GPU with INT8
11
  # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
@@ -16,19 +16,19 @@ def speech_to_text(audio_file, _model_size):
16
  global model_size, model
17
  if model_size != _model_size:
18
  model_size = _model_size
19
- model = whisper.load_model(model_size)
20
- #model = WhisperModel(model_size, compute_type="float16")
21
 
22
- result = model.transcribe(audio_file)
23
- #segments, info = model.transcribe(audio_file, beam_size=5)
24
 
25
- return result["text"]
26
- #return "".join([segment.text for segment in segments])
27
 
28
  gr.Interface(
29
  fn=speech_to_text,
30
  inputs=[
31
  gr.Audio(source="upload", type="filepath"),
32
- gr.Dropdown(value=model_size, choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]),
33
  ],
34
  outputs="text").launch()
 
1
  import gradio as gr
2
+ #import whisper
3
+ from faster_whisper import WhisperModel
4
 
5
+ model_size = 'aka7774/whisper-large-v3-ct2'
6
+ #model = whisper.load_model(model_size)
7
  #model = WhisperModel(model_size, device="cuda", compute_type="float16")
8
+ model = WhisperModel(model_size, compute_type="float16")
9
 
10
  # or run on GPU with INT8
11
  # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
 
16
  global model_size, model
17
  if model_size != _model_size:
18
  model_size = _model_size
19
+ #model = whisper.load_model(model_size)
20
+ model = WhisperModel(model_size, compute_type="float16")
21
 
22
+ #result = model.transcribe(audio_file)
23
+ segments, info = model.transcribe(audio_file, beam_size=5)
24
 
25
+ #return result["text"]
26
+ return "".join([segment.text for segment in segments])
27
 
28
  gr.Interface(
29
  fn=speech_to_text,
30
  inputs=[
31
  gr.Audio(source="upload", type="filepath"),
32
+ gr.Dropdown(value=model_size, choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3", "aka7774/whisper-large-v3-ct2"]),
33
  ],
34
  outputs="text").launch()