DiegoLigtenberg commited on
Commit
2652f0e
1 Parent(s): 5d15781

Add application file

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from models import BagOfModels, SoundToText, TextToSummary
3
+ from settings import MODEL_PARSER
4
+ args = MODEL_PARSER
5
+
6
+ st.set_page_config(
7
+ page_title="TTS Applications | Incore Solutions",
8
+ layout="wide",
9
+ menu_items={
10
+ "About": """This is a simple GUI for OpenAI's Whisper.""",
11
+ },
12
+ )
13
+
14
+ def open_instructions():
15
+ with open("instructions.md", "r") as f:
16
+ st.write(f.read())
17
+
18
+ # Render input type selection on the sidebar & the form
19
+ input_type = st.sidebar.selectbox("Input Type", ["YouTube", "File"])
20
+
21
+ with st.sidebar.form("input_form"):
22
+ if input_type == "YouTube":
23
+ youtube_url = st.text_input("Youtube URL")
24
+ elif input_type == "File":
25
+ input_file = st.file_uploader("File", type=["mp3", "wav"])
26
+
27
+ whisper_model = st.selectbox("Whisper model", options = [whisper for whisper in BagOfModels.get_model_names() if "whisper" in whisper] , index=1)
28
+
29
+ summary = st.checkbox("summarize")
30
+ if summary:
31
+ min_sum = st.number_input("Minimum words in the summary", min_value=1, step=1)
32
+ max_sum = min(min_sum,st.number_input("Maximum words in the summary", min_value=2, step=1))
33
+ st.form_submit_button(label="Save settings")
34
+ with st.sidebar.form("save settings"):
35
+ transcribe = st.form_submit_button(label="Transcribe!")
36
+
37
+
38
+ if transcribe:
39
+ if input_type == "YouTube":
40
+ if youtube_url and youtube_url.startswith("http"):
41
+ model = BagOfModels.load_model(whisper_model,**vars(args))
42
+ st.session_state.transcription = model.predict_stt(source=youtube_url,source_type=input_type,model_task="stt")
43
+ else:
44
+ st.error("Please enter a valid YouTube URL")
45
+ open_instructions()
46
+
47
+ elif input_type == "File":
48
+ if input_file:
49
+ model = BagOfModels.load_model(whisper_model,**vars(args))
50
+ st.session_state.transcription = model.predict_stt(source=input_file,source_type=input_type,model_task="stt")
51
+ else:
52
+ st.error("Please upload a file")
53
+
54
+ if "transcription" in st.session_state:
55
+ st.session_state.transcription.whisper()
56
+
57
+ # create two columns to separate page and youtube video
58
+ transcription_col, media_col = st.columns(2, gap="large")
59
+
60
+ transcription_col.markdown("#### Audio")
61
+ with open(st.session_state.transcription.audio_path, "rb") as f:
62
+ transcription_col.audio(f.read())
63
+ transcription_col.markdown("---")
64
+ transcription_col.markdown(f"#### Transcription (whisper model - `{whisper_model}`)")
65
+ transcription_col.markdown(f"##### Language: `{st.session_state.transcription.language}`")
66
+
67
+ # Trim raw transcribed output off tokens to simplify
68
+ raw_output = transcription_col.expander("Raw output")
69
+ raw_output.markdown(st.session_state.transcription.raw_output["text"])
70
+
71
+ if summary:
72
+ summarized_output = transcription_col.expander("summarized output")
73
+ # CURRENTLY ONLY SUPPORTS 1024 WORD TOKENS -> TODO: FIND METHOD TO INCREASE SUMMARY FOR LONGER VIDS -> 1024 * 4 = aprox 800 words within 1024 range
74
+ text_summary = TextToSummary(str(st.session_state.transcription.text[:1024*4]),min_sum,max_sum).get_summary()
75
+ summarized_output.markdown(text_summary[0]["summary_text"])
76
+
77
+ # Show transcription in format with timers added to text
78
+ time_annotated_output = transcription_col.expander("time_annotated_output")
79
+ for segment in st.session_state.transcription.segments:
80
+ time_annotated_output.markdown(
81
+ f"""[{round(segment["start"], 1)} - {round(segment["end"], 1)}] - {segment["text"]}"""
82
+ )
83
+
84
+ # Show input youtube video
85
+ if input_type == "YouTube":
86
+ media_col.markdown("---")
87
+ media_col.markdown("#### Original YouTube Video")
88
+ media_col.video(st.session_state.transcription.source)
89
+ else:
90
+ pass
91
+