vumichien commited on
Commit
c5a0faa
1 Parent(s): 93d1452

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -172,11 +172,11 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
172
  # Read and convert youtube video
173
  _,file_ending = os.path.splitext(f'{video_file_path}')
174
  print(f'file enging is {file_ending}')
 
175
  print("starting conversion to wav")
176
- os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{video_file_path.replace(file_ending, ".wav")}"')
177
 
178
  # Get duration
179
- audio_file = video_file_path.replace(file_ending, ".wav")
180
  with contextlib.closing(wave.open(audio_file,'r')) as f:
181
  frames = f.getnframes()
182
  rate = f.getframerate()
@@ -184,10 +184,9 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
184
  print(f"conversion to wav ready, duration of audio file: {duration}")
185
 
186
  # Transcribe audio
187
- # options = dict(language=selected_source_lang, beam_size=5, best_of=5)
188
- # transcribe_options = dict(task="transcribe", **options)
189
- # result = model.transcribe(audio_file, **transcribe_options)
190
- result = model.transcribe(audio_file, task="transcribe", language=selected_source_lang)
191
  segments = result["segments"]
192
  print("starting whisper done with whisper")
193
  except Exception as e:
@@ -243,6 +242,7 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
243
 
244
 
245
  # ---- Gradio Layout -----
 
246
  video_in = gr.Video(label="Video file", mirror_webcam=False)
247
  youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
248
  video_out = gr.Video(label="Video Out", mirror_webcam=False)
@@ -305,8 +305,8 @@ with demo:
305
  with gr.Column():
306
  gr.Markdown('''
307
  ##### Here you can start the transcription process.
308
- ##### Please select source language for transcription.
309
- ##### Please select number of speakers for getting better results.
310
  ''')
311
  selected_source_lang.render()
312
  selected_whisper_model.render()
 
172
  # Read and convert youtube video
173
  _,file_ending = os.path.splitext(f'{video_file_path}')
174
  print(f'file enging is {file_ending}')
175
+ audio_file = video_file_path.replace(file_ending, ".wav")
176
  print("starting conversion to wav")
177
+ os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
178
 
179
  # Get duration
 
180
  with contextlib.closing(wave.open(audio_file,'r')) as f:
181
  frames = f.getnframes()
182
  rate = f.getframerate()
 
184
  print(f"conversion to wav ready, duration of audio file: {duration}")
185
 
186
  # Transcribe audio
187
+ options = dict(language=selected_source_lang, beam_size=5, best_of=5)
188
+ transcribe_options = dict(task="transcribe", **options)
189
+ result = model.transcribe(audio_file, **transcribe_options)
 
190
  segments = result["segments"]
191
  print("starting whisper done with whisper")
192
  except Exception as e:
 
242
 
243
 
244
  # ---- Gradio Layout -----
245
+ # Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
246
  video_in = gr.Video(label="Video file", mirror_webcam=False)
247
  youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
248
  video_out = gr.Video(label="Video Out", mirror_webcam=False)
 
305
  with gr.Column():
306
  gr.Markdown('''
307
  ##### Here you can start the transcription process.
308
+ ##### Please select the source language for transcription.
309
+ ##### You should select a number of speakers for getting better results.
310
  ''')
311
  selected_source_lang.render()
312
  selected_whisper_model.render()