Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,3 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
import os
|
4 |
-
import datetime
|
5 |
-
import subprocess
|
6 |
# import whisper
|
7 |
from faster_whisper import WhisperModel
|
8 |
import datetime
|
@@ -350,33 +345,70 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
350 |
raise RuntimeError("Error Running inference with local model", e)
|
351 |
|
352 |
|
353 |
-
#
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
-
|
369 |
-
|
370 |
-
|
|
|
|
|
|
|
371 |
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
df_results, system_info, save_path = speech_to_text(video_file, selected_source_lang, selected_whisper_model, number_of_speakers)
|
376 |
-
st.dataframe(df_results)
|
377 |
-
st.markdown(system_info)
|
378 |
-
st.download_button('Download Transcript', data=pd.read_csv(save_path).to_csv(), file_name='transcript.csv')
|
379 |
-
else:
|
380 |
-
st.error("Please upload a video file or download one from YouTube.")
|
381 |
|
382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# import whisper
|
2 |
from faster_whisper import WhisperModel
|
3 |
import datetime
|
|
|
345 |
raise RuntimeError("Error Running inference with local model", e)
|
346 |
|
347 |
|
348 |
+
# ---- Gradio Layout -----
|
349 |
+
# Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
|
350 |
+
video_in = gr.Video(label="Video file", mirror_webcam=False)
|
351 |
+
youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
|
352 |
+
df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
|
353 |
+
memory = psutil.virtual_memory()
|
354 |
+
selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="en", label="Spoken language in video", interactive=True)
|
355 |
+
selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
|
356 |
+
number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
|
357 |
+
system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
|
358 |
+
download_transcript = gr.File(label="Download transcript")
|
359 |
+
transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
|
360 |
+
title = "Whisper speaker diarization"
|
361 |
+
demo = gr.Blocks(title=title)
|
362 |
+
demo.encrypt = False
|
363 |
+
|
364 |
+
|
365 |
+
with demo:
|
366 |
+
with gr.Tab("Med Speech Pro"):
|
367 |
+
gr.Markdown('''
|
368 |
+
<div>
|
369 |
+
<h1 style='text-align: center'>Med Speech Pro : Lightning-Fast</h1>
|
370 |
+
Description: Experience Rapid Speech Recognition and Seamless Speaker identification With SpeechPro, a cutting-edge solution for accurate Medical Transcription
|
371 |
+
</div>
|
372 |
+
''')
|
373 |
+
with gr.Row():
|
374 |
+
with gr.Column():
|
375 |
+
youtube_url_in.render()
|
376 |
+
download_youtube_btn = gr.Button("Download Youtube video")
|
377 |
+
download_youtube_btn.click(get_youtube, [youtube_url_in], [
|
378 |
+
video_in])
|
379 |
+
print(video_in)
|
380 |
+
|
381 |
+
|
382 |
+
with gr.Row():
|
383 |
+
with gr.Column():
|
384 |
+
video_in.render()
|
385 |
+
with gr.Column():
|
386 |
+
gr.Markdown('''.
|
387 |
+
''')
|
388 |
+
selected_source_lang.render()
|
389 |
+
selected_whisper_model.render()
|
390 |
+
number_speakers.render()
|
391 |
+
transcribe_btn = gr.Button("Transcribe Now")
|
392 |
+
transcribe_btn.click(speech_to_text,
|
393 |
+
[video_in, selected_source_lang, selected_whisper_model, number_speakers],
|
394 |
+
[transcription_df, system_info, download_transcript]
|
395 |
+
)
|
396 |
+
|
397 |
+
with gr.Row():
|
398 |
+
gr.Markdown('''
|
399 |
+
##### Results
|
400 |
+
##### ''')
|
401 |
+
|
402 |
|
403 |
+
with gr.Row():
|
404 |
+
with gr.Column():
|
405 |
+
download_transcript.render()
|
406 |
+
transcription_df.render()
|
407 |
+
system_info.render()
|
408 |
+
gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
|
409 |
|
410 |
+
|
411 |
+
|
412 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
|
414 |
+
demo.launch(debug=True,share=True)
|