support manual multilang detection

#1
by abielzulio - opened
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -4,10 +4,10 @@ from pytube import YouTube
4
 
5
  loaded_model = whisper.load_model("base")
6
  current_size = 'base'
7
- def inference(link):
8
  yt = YouTube(link)
9
  path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
10
- options = whisper.DecodingOptions(without_timestamps=True)
11
  results = loaded_model.transcribe(path)
12
  return results['text']
13
 
@@ -42,6 +42,8 @@ with block:
42
  with gr.Box():
43
  sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
44
 
 
 
45
  link = gr.Textbox(label="YouTube Link")
46
 
47
  with gr.Row().style(mobile_collapse=False, equal_height=True):
@@ -55,7 +57,7 @@ with block:
55
  btn = gr.Button("Transcribe")
56
 
57
  # Events
58
- btn.click(inference, inputs=[link], outputs=[text])
59
  link.change(populate_metadata, inputs=[link], outputs=[img, title])
60
  sz.change(change_model, inputs=[sz], outputs=[])
61
 
 
4
 
5
  loaded_model = whisper.load_model("base")
6
  current_size = 'base'
7
+ def inference(link, lang):
8
  yt = YouTube(link)
9
  path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
10
+ options = whisper.DecodingOptions(without_timestamps=True, language=lang)
11
  results = loaded_model.transcribe(path)
12
  return results['text']
13
 
 
42
  with gr.Box():
43
  sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
44
 
45
+ lang = gr.Textbox(label="Languange")
46
+
47
  link = gr.Textbox(label="YouTube Link")
48
 
49
  with gr.Row().style(mobile_collapse=False, equal_height=True):
 
57
  btn = gr.Button("Transcribe")
58
 
59
  # Events
60
+ btn.click(inference, inputs=[link, lang], outputs=[text])
61
  link.change(populate_metadata, inputs=[link], outputs=[img, title])
62
  sz.change(change_model, inputs=[sz], outputs=[])
63