0mid commited on
Commit
2674a36
β€’
1 Parent(s): 16a9a7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -64
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  # import whisper
2
  from faster_whisper import WhisperModel
3
  import datetime
@@ -345,70 +350,33 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
345
  raise RuntimeError("Error Running inference with local model", e)
346
 
347
 
348
- # ---- Gradio Layout -----
349
- # Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
350
- video_in = gr.Video(label="Video file", mirror_webcam=False)
351
- youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
352
- df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
353
- memory = psutil.virtual_memory()
354
- selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="en", label="Spoken language in video", interactive=True)
355
- selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
356
- number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
357
- system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
358
- download_transcript = gr.File(label="Download transcript")
359
- transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
360
- title = "Whisper speaker diarization"
361
- demo = gr.Blocks(title=title)
362
- demo.encrypt = False
363
-
364
-
365
- with demo:
366
- with gr.Tab("Med Speech Pro"):
367
- gr.Markdown('''
368
- <div>
369
- <h1 style='text-align: center'>Med Speech Pro : Lightning-Fast</h1>
370
- Description: Experience Rapid Speech Recognition and Seamless Speaker identification With SpeechPro, a cutting-edge solution for accurate Medical Transcription
371
- </div>
372
- ''')
373
- with gr.Row():
374
- with gr.Column():
375
- youtube_url_in.render()
376
- download_youtube_btn = gr.Button("Download Youtube video")
377
- download_youtube_btn.click(get_youtube, [youtube_url_in], [
378
- video_in])
379
- print(video_in)
380
-
381
-
382
- with gr.Row():
383
- with gr.Column():
384
- video_in.render()
385
- with gr.Column():
386
- gr.Markdown('''.
387
- ''')
388
- selected_source_lang.render()
389
- selected_whisper_model.render()
390
- number_speakers.render()
391
- transcribe_btn = gr.Button("Transcribe Now")
392
- transcribe_btn.click(speech_to_text,
393
- [video_in, selected_source_lang, selected_whisper_model, number_speakers],
394
- [transcription_df, system_info, download_transcript]
395
- )
396
-
397
- with gr.Row():
398
- gr.Markdown('''
399
- ##### Results
400
- ##### ''')
401
-
402
 
403
- with gr.Row():
404
- with gr.Column():
405
- download_transcript.render()
406
- transcription_df.render()
407
- system_info.render()
408
- gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
409
 
410
-
411
-
412
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
 
414
- demo.launch(debug=True,share=True)
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import datetime
5
+ import subprocess
6
  # import whisper
7
  from faster_whisper import WhisperModel
8
  import datetime
 
350
  raise RuntimeError("Error Running inference with local model", e)
351
 
352
 
353
+ # Streamlit app layout
354
+ st.title('Med Speech Pro : Lightning-Fast')
355
+ st.markdown('Experience Rapid Speech Recognition and Seamless Speaker identification With SpeechPro, a cutting-edge solution for accurate Medical Transcription')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
+ # Handling YouTube URL input
358
+ youtube_url = st.text_input("Enter YouTube URL")
 
 
 
 
359
 
360
+ if st.button('Download YouTube Video'):
361
+ # Call your function to handle YouTube video downloading
362
+ video_file_path = get_youtube(youtube_url)
363
+ st.video(video_file_path)
364
+
365
+ # File Uploader for videos
366
+ video_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov"])
367
+
368
+ selected_source_lang = st.selectbox("Select Spoken Language in Video", source_language_list)
369
+ selected_whisper_model = st.selectbox("Select Whisper Model", whisper_models)
370
+ number_of_speakers = st.number_input("Number of Speakers (0 for automatic detection)", min_value=0, value=0)
371
+
372
+ if st.button('Transcribe Video'):
373
+ if video_file is not None:
374
+ # Process the video file
375
+ df_results, system_info, save_path = speech_to_text(video_file, selected_source_lang, selected_whisper_model, number_of_speakers)
376
+ st.dataframe(df_results)
377
+ st.markdown(system_info)
378
+ st.download_button('Download Transcript', data=pd.read_csv(save_path).to_csv(), file_name='transcript.csv')
379
+ else:
380
+ st.error("Please upload a video file or download one from YouTube.")
381
 
382
+ # Additional components and functionalities can be added here as needed.