aka7774 commited on
Commit
b1927c9
1 Parent(s): 007739a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -2,18 +2,19 @@ import gradio as gr
2
  from faster_whisper import WhisperModel
3
 
4
  model_size = 'large-v3'
5
- model = WhisperModel(model_size, device="auto", compute_type="float16")
6
 
7
- # or run on GPU with INT8
8
- # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
9
- # or run on CPU with INT8
10
- # model = WhisperModel(model_size, device="cpu", compute_type="int8")
 
 
11
 
12
  def speech_to_text(audio_file, _model_size):
13
  global model_size, model
14
  if model_size != _model_size:
15
  model_size = _model_size
16
- model = WhisperModel(model_size, device="auto", compute_type="float16")
17
 
18
  with torch.no_grad():
19
  segments, info = model.transcribe(
@@ -29,10 +30,13 @@ def speech_to_text(audio_file, _model_size):
29
  for segment in segments:
30
  text += "{segment.start:.2f}\t{segment.end:.2f}\t{segment.text}\n"
31
 
 
 
 
32
  gr.Interface(
33
  fn=speech_to_text,
34
  inputs=[
35
- gr.Audio(source="upload", type="filepath"),
36
  gr.Dropdown(value=model_size, choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]),
37
  ],
38
  outputs="text").launch()
 
2
  from faster_whisper import WhisperModel
3
 
4
  model_size = 'large-v3'
 
5
 
6
+ def load_model(model_size):
7
+ if torch.cuda.is_available():
8
+ model = WhisperModel(model_size, device="cuda", compute_type="float16")
9
+ # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
10
+ else:
11
+ model = WhisperModel(model_size, device="cpu", compute_type="int8")
12
 
13
  def speech_to_text(audio_file, _model_size):
14
  global model_size, model
15
  if model_size != _model_size:
16
  model_size = _model_size
17
+ model = load_model(model_size)
18
 
19
  with torch.no_grad():
20
  segments, info = model.transcribe(
 
30
  for segment in segments:
31
  text += "{segment.start:.2f}\t{segment.end:.2f}\t{segment.text}\n"
32
 
33
+
34
+ load_model(model_size)
35
+
36
  gr.Interface(
37
  fn=speech_to_text,
38
  inputs=[
39
+ gr.Audio(sources="upload", type="filepath"),
40
  gr.Dropdown(value=model_size, choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]),
41
  ],
42
  outputs="text").launch()