Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -139,7 +139,7 @@ pipe = pipeline(
|
|
139 |
chunk_length_s=30,
|
140 |
device=device,
|
141 |
)
|
142 |
-
|
143 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
|
144 |
|
145 |
embedding_model = PretrainedSpeakerEmbedding(
|
@@ -286,8 +286,10 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
286 |
*Processing time: {time_diff:.5} seconds.*
|
287 |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}MiB.*
|
288 |
"""
|
289 |
-
|
290 |
-
|
|
|
|
|
291 |
|
292 |
except Exception as e:
|
293 |
raise RuntimeError("Error Running inference with local model", e)
|
@@ -303,6 +305,7 @@ selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", v
|
|
303 |
selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
|
304 |
number_speakers = gr.Number(precision=0, value=2, label="Selected number of speakers", interactive=True)
|
305 |
system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
|
|
|
306 |
transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
|
307 |
title = "Whisper speaker diarization"
|
308 |
demo = gr.Blocks(title=title)
|
@@ -358,8 +361,9 @@ with demo:
|
|
358 |
selected_source_lang.render()
|
359 |
selected_whisper_model.render()
|
360 |
number_speakers.render()
|
|
|
361 |
transcribe_btn = gr.Button("Transcribe audio and diarization")
|
362 |
-
transcribe_btn.click(speech_to_text, [video_in, selected_source_lang, selected_whisper_model, number_speakers], [transcription_df, system_info])
|
363 |
|
364 |
|
365 |
with gr.Row():
|
|
|
139 |
chunk_length_s=30,
|
140 |
device=device,
|
141 |
)
|
142 |
+
os.makedirs('output', exist_ok=True)
|
143 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
|
144 |
|
145 |
embedding_model = PretrainedSpeakerEmbedding(
|
|
|
286 |
*Processing time: {time_diff:.5} seconds.*
|
287 |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}MiB.*
|
288 |
"""
|
289 |
+
save_path = "output/transcript_result.csv"
|
290 |
+
df_results = pd.DataFrame(objects)
|
291 |
+
df_results.to_csv(save_path)
|
292 |
+
return df_results, system_info, save_path
|
293 |
|
294 |
except Exception as e:
|
295 |
raise RuntimeError("Error Running inference with local model", e)
|
|
|
305 |
selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
|
306 |
number_speakers = gr.Number(precision=0, value=2, label="Selected number of speakers", interactive=True)
|
307 |
system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
|
308 |
+
download_transcript = gr.File(label="Download transcript")
|
309 |
transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
|
310 |
title = "Whisper speaker diarization"
|
311 |
demo = gr.Blocks(title=title)
|
|
|
361 |
selected_source_lang.render()
|
362 |
selected_whisper_model.render()
|
363 |
number_speakers.render()
|
364 |
+
download_transcript.render()
|
365 |
transcribe_btn = gr.Button("Transcribe audio and diarization")
|
366 |
+
transcribe_btn.click(speech_to_text, [video_in, selected_source_lang, selected_whisper_model, number_speakers], [transcription_df, system_info, download_transcript])
|
367 |
|
368 |
|
369 |
with gr.Row():
|