Ubuntu commited on
Commit
0566ea1
Β·
1 Parent(s): 75f8860

add whisper v3 support

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -69,7 +69,7 @@ def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = None) -
69
  print("Error:", response.status_code, response.text)
70
  return None # Return None on failure
71
 
72
- def run_asr(audio_file, youtube_url, with_timestamp):
73
  temp_file = None
74
  try:
75
  if youtube_url:
@@ -80,7 +80,14 @@ def run_asr(audio_file, youtube_url, with_timestamp):
80
  return "Please provide either an audio file or a YouTube URL."
81
 
82
  files = {'file': open(audio_file, 'rb')}
83
- data = {'language': 'en', 'model_name': 'whisper-large-v2-imda', 'with_timestamp': with_timestamp}
 
 
 
 
 
 
 
84
  response = requests.post(f"{API_URL}/asr", data=data, files=files)
85
 
86
  if response.status_code == 200:
@@ -120,6 +127,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
120
  youtube_input = gr.Textbox(label="YouTube URL", placeholder="Or paste a YouTube URL here...")
121
  video_player = gr.HTML(visible=False)
122
  timestamp_toggle = gr.Checkbox(label="Include Timestamps", value=False)
 
123
  with gr.Column(scale=3):
124
  result = gr.Textbox(
125
  label="Transcription Result",
@@ -128,7 +136,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
128
  )
129
 
130
  run_button = gr.Button("πŸš€ Transcribe Audio", variant="primary")
131
- run_button.click(run_asr, inputs=[audio_input, youtube_input, timestamp_toggle], outputs=[result])
132
 
133
  # Update video player and clear transcription and audio input when YouTube URL is entered
134
  youtube_input.change(
 
69
  print("Error:", response.status_code, response.text)
70
  return None # Return None on failure
71
 
72
+ def run_asr(audio_file, youtube_url, with_timestamp, model_choice):
73
  temp_file = None
74
  try:
75
  if youtube_url:
 
80
  return "Please provide either an audio file or a YouTube URL."
81
 
82
  files = {'file': open(audio_file, 'rb')}
83
+
84
+ # Update model_name based on the user's choice
85
+ if model_choice == "whisper_v3":
86
+ model_name = "official_v3"
87
+ else:
88
+ model_name = "whisper-large-v2-imda"
89
+
90
+ data = {'language': 'en', 'model_name': model_name, 'with_timestamp': with_timestamp}
91
  response = requests.post(f"{API_URL}/asr", data=data, files=files)
92
 
93
  if response.status_code == 200:
 
127
  youtube_input = gr.Textbox(label="YouTube URL", placeholder="Or paste a YouTube URL here...")
128
  video_player = gr.HTML(visible=False)
129
  timestamp_toggle = gr.Checkbox(label="Include Timestamps", value=False)
130
+ model_choice = gr.Radio(["local_whisper", "whisper_v3"], label="Model Selection", value="local_whisper")
131
  with gr.Column(scale=3):
132
  result = gr.Textbox(
133
  label="Transcription Result",
 
136
  )
137
 
138
  run_button = gr.Button("πŸš€ Transcribe Audio", variant="primary")
139
+ run_button.click(run_asr, inputs=[audio_input, youtube_input, timestamp_toggle, model_choice], outputs=[result])
140
 
141
  # Update video player and clear transcription and audio input when YouTube URL is entered
142
  youtube_input.change(