aka7774 commited on
Commit
62e6f5b
1 Parent(s): bdf10a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -1,28 +1,29 @@
1
  import gradio as gr
2
- #import whisper
3
- from faster_whisper import WhisperModel
4
 
5
- model_size = 'aka7774/whisper-large-v3-ct2'
6
- #model = whisper.load_model(model_size)
 
7
 
8
- #model = WhisperModel(model_size, device="cuda", compute_type="float16")
9
  # or run on GPU with INT8
10
  # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
11
  # or run on CPU with INT8
12
- model = WhisperModel(model_size, device="cpu", compute_type="int8")
13
 
14
  def speech_to_text(audio_file, _model_size):
15
  global model_size, model
16
  if model_size != _model_size:
17
  model_size = _model_size
18
- #model = whisper.load_model(model_size)
19
- model = WhisperModel(model_size, compute_type="float16")
20
 
21
- #result = model.transcribe(audio_file)
22
- segments, info = model.transcribe(audio_file, beam_size=5)
23
 
24
- #return result["text"]
25
- return "".join([segment.text for segment in segments])
26
 
27
  gr.Interface(
28
  fn=speech_to_text,
 
1
  import gradio as gr
2
+ import whisper
3
+ #from faster_whisper import WhisperModel
4
 
5
+ #model_size = 'aka7774/whisper-large-v3-ct2'
6
+ model_size = 'large-v3'
7
+ model = whisper.load_model(model_size)
8
 
9
+ # model = WhisperModel(model_size, device="cuda", compute_type="float16")
10
  # or run on GPU with INT8
11
  # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
12
  # or run on CPU with INT8
13
+ # model = WhisperModel(model_size, device="cpu", compute_type="int8")
14
 
15
  def speech_to_text(audio_file, _model_size):
16
  global model_size, model
17
  if model_size != _model_size:
18
  model_size = _model_size
19
+ model = whisper.load_model(model_size)
20
+ #model = WhisperModel(model_size, compute_type="float16")
21
 
22
+ result = model.transcribe(audio_file)
23
+ #segments, info = model.transcribe(audio_file, beam_size=5)
24
 
25
+ return result["text"]
26
+ #return "".join([segment.text for segment in segments])
27
 
28
  gr.Interface(
29
  fn=speech_to_text,