vumichien commited on
Commit
3c72edb
1 Parent(s): 97ff720

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -139,7 +139,7 @@ pipe = pipeline(
139
  chunk_length_s=30,
140
  device=device,
141
  )
142
-
143
  pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
144
 
145
  embedding_model = PretrainedSpeakerEmbedding(
@@ -286,8 +286,10 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
286
  *Processing time: {time_diff:.5} seconds.*
287
  *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}MiB.*
288
  """
289
-
290
- return pd.DataFrame(objects), system_info
 
 
291
 
292
  except Exception as e:
293
  raise RuntimeError("Error Running inference with local model", e)
@@ -303,6 +305,7 @@ selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", v
303
  selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
304
  number_speakers = gr.Number(precision=0, value=2, label="Selected number of speakers", interactive=True)
305
  system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
 
306
  transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
307
  title = "Whisper speaker diarization"
308
  demo = gr.Blocks(title=title)
@@ -358,8 +361,9 @@ with demo:
358
  selected_source_lang.render()
359
  selected_whisper_model.render()
360
  number_speakers.render()
 
361
  transcribe_btn = gr.Button("Transcribe audio and diarization")
362
- transcribe_btn.click(speech_to_text, [video_in, selected_source_lang, selected_whisper_model, number_speakers], [transcription_df, system_info])
363
 
364
 
365
  with gr.Row():
 
139
  chunk_length_s=30,
140
  device=device,
141
  )
142
+ os.makedirs('output', exist_ok=True)
143
  pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
144
 
145
  embedding_model = PretrainedSpeakerEmbedding(
 
286
  *Processing time: {time_diff:.5} seconds.*
287
  *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}MiB.*
288
  """
289
+ save_path = "output/transcript_result.csv"
290
+ df_results = pd.DataFrame(objects)
291
+ df_results.to_csv(save_path)
292
+ return df_results, system_info, save_path
293
 
294
  except Exception as e:
295
  raise RuntimeError("Error Running inference with local model", e)
 
305
  selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
306
  number_speakers = gr.Number(precision=0, value=2, label="Selected number of speakers", interactive=True)
307
  system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
308
+ download_transcript = gr.File(label="Download transcript")
309
  transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
310
  title = "Whisper speaker diarization"
311
  demo = gr.Blocks(title=title)
 
361
  selected_source_lang.render()
362
  selected_whisper_model.render()
363
  number_speakers.render()
364
+ download_transcript.render()
365
  transcribe_btn = gr.Button("Transcribe audio and diarization")
366
+ transcribe_btn.click(speech_to_text, [video_in, selected_source_lang, selected_whisper_model, number_speakers], [transcription_df, system_info, download_transcript])
367
 
368
 
369
  with gr.Row():