Harshit commited on
Commit
7007c20
1 Parent(s): 38b55ce
Files changed (1) hide show
  1. app.py +212 -2
app.py CHANGED
@@ -1,4 +1,214 @@
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
1
+ import subprocess
2
+ # # Run the pip install command
3
+ subprocess.check_call(['pip', 'install', 'wordcloud'])
4
+ subprocess.check_call(['pip', 'install', 'git+https://github.com/openai/whisper.git'])
5
+ subprocess.check_call(['pip', 'install', 'transformers'])
6
+ subprocess.check_call(['pip', 'install', 'imageio==2.4.1'])
7
+ subprocess.check_call(['pip', 'install', 'moviepy'])
8
+ subprocess.check_call(['pip', 'install', 'keybert'])
9
+ subprocess.check_call(['pip', 'install', 'pytube'])
10
+
11
  import streamlit as st
12
+ import os
13
+ from wordcloud import WordCloud
14
+ from keybert import KeyBERT
15
+ import pandas as pd
16
+ import matplotlib.pyplot as plt
17
+ # //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
18
+
19
+
20
+ from moviepy.editor import *
21
+ from tqdm import tqdm
22
+ import os
23
+ import math
24
+ import nltk
25
+ nltk.download('punkt')
26
+ import whisper
27
+ from transformers import pipeline
28
+
29
+ from pytube import YouTube
30
+ def process_video(path):
31
+ whisper_model = whisper.load_model("base")
32
+
33
+ def SpeechToTextEng(aud_path):
34
+ result = whisper_model.transcribe(aud_path)
35
+ return result["text"]
36
+
37
+ def run_range(duration):
38
+ time=duration/60
39
+ floor=math.ceil(time)
40
+ return floor
41
+
42
+ time_range=60
43
+ clip_run_range=0
44
+ clip_duration=0
45
+
46
+ def audio_generator(path,aud=0,vid=0):
47
+ if vid==1:
48
+ clip=VideoFileClip(path)
49
+ clip_duration = clip.duration
50
+ clip_run_range=run_range(clip_duration)
51
+ for i in range(clip_run_range):
52
+ left=i*time_range
53
+ right=left+time_range
54
+ # print(left,right)
55
+
56
+ crop_clip=clip.subclip(left,right)
57
+ try:
58
+ crop_clip.audio.write_audiofile("vid_to_aud"+str(i)+".mp3")
59
+ except:
60
+ pass
61
+
62
+ if aud==1:
63
+ audio_clip=AudioFileClip(path)
64
+ clip_duration = audio_clip.duration
65
+ print(clip_duration)
66
+ clip_run_range=run_range(clip_duration)
67
+ print(clip_run_range)
68
+ for i in range(clip_run_range):
69
+ left=i*time_range
70
+ right=left+time_range
71
+ # print(left,right)
72
+ crop_clip=audio_clip.subclip(left,right)
73
+ try:
74
+ crop_clip.write_audiofile("vid_to_aud"+str(i)+".mp3")
75
+ except:
76
+ pass
77
+
78
+
79
+
80
+
81
+ # YouTube video URL
82
+ video_url = path
83
+
84
+ # Create a YouTube object
85
+ yt = YouTube(video_url)
86
+
87
+ # Get the highest resolution video stream
88
+ stream = yt.streams.get_lowest_resolution()
89
+
90
+ # Download the video
91
+ stream.download(filename='meeting.mp4')
92
+
93
+ audio_generator("./meeting.mp4",vid=1)
94
+ transcribed_lit=[]
95
+ label_lit=[]
96
+ translated_lit=[]
97
+
98
+ for i in tqdm(range(clip_run_range)):
99
+ transcribed=SpeechToTextEng("./vid_to_aud"+str(i)+".mp3")
100
+ transcribed_lit.append(transcribed)
101
+ os.remove("./vid_to_aud"+str(i)+".mp3")
102
+
103
+
104
+ data = pd.DataFrame(
105
+ {'transcriptions': transcribed_lit
106
+ })
107
+
108
+ summarizer = pipeline("summarization")
109
+
110
+ sentiment_analyzer = pipeline("sentiment-analysis")
111
+
112
+ sumarized_lit=[]
113
+ sentiment_lit=[]
114
+ for i in tqdm(range(len(data))):
115
+ summarized=summarizer(data.iloc[i,0],min_length=75, max_length=300)[0]['summary_text']
116
+ sentiment = sentiment_analyzer(data.iloc[i,0])[0]['label']
117
+ sumarized_lit.append(summarized)
118
+ sentiment_lit.append(sentiment)
119
+
120
+ data['summary']=sumarized_lit
121
+ data['sentiment']=sentiment_lit
122
+ data.to_csv('output2.csv', index=False)
123
+ tot_text=""
124
+ for i in range(len(data)):
125
+ tot_text=tot_text+data.iloc[i,0]
126
+
127
+ key_model = KeyBERT('distilbert-base-nli-mean-tokens')
128
+ def extract_keywords(text, top_n=50):
129
+ keywords = key_model.extract_keywords(text, top_n=top_n)
130
+ return [keyword[0] for keyword in keywords]
131
+
132
+ tot_keywords=extract_keywords(tot_text)
133
+
134
+ def get_500_words(text,left,right):
135
+ words = text.split()
136
+ first_500_words = ' '.join(words[left:right])
137
+ return first_500_words
138
+
139
+ def summarize_text(text):
140
+ chunk_size = 500 # Number of words per chunk
141
+ total_summary = "" # Total summary
142
+
143
+ words = text.split() # Split the text into individual words
144
+ num_chunks = len(words) // chunk_size + 1 # Calculate the number of chunks
145
+
146
+ for i in tqdm(range(num_chunks)):
147
+ start_index = i * chunk_size
148
+ end_index = start_index + chunk_size
149
+ chunk = " ".join(words[start_index:end_index])
150
+
151
+ # Pass the chunk to the summarizer (replace with your summarization code)
152
+ chunk_summary = summarizer(chunk,min_length=75, max_length=200)[0]['summary_text']
153
+ # print(chunk_summary)
154
+ total_summary += chunk_summary
155
+
156
+ return total_summary
157
+
158
+ tot_summary=summarize_text(tot_text)
159
+ return tot_text,tot_summary,tot_keywords
160
+
161
+
162
+
163
+
164
+ # //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
165
+ def generate_word_cloud(text):
166
+ # Create a WordCloud object
167
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
168
+
169
+ # Display the generated word cloud
170
+ fig, ax = plt.subplots(figsize=(10, 5))
171
+
172
+ # Plot the word cloud on the axis
173
+ ax.imshow(wordcloud, interpolation='bilinear')
174
+ ax.axis('off')
175
+ st.pyplot(fig)
176
+
177
+
178
+ def main():
179
+ st.title("Meeting Summary Web App")
180
+
181
+ # YouTube link input
182
+ youtube_url = st.text_input("Enter the YouTube video link")
183
+
184
+ if st.button("Process Video"):
185
+ if youtube_url:
186
+ # Process the YouTube video
187
+ tot_text, tot_summary, tot_keywords = process_video(youtube_url)
188
+
189
+ # Display the output
190
+ if os.path.exists("output2.csv"):
191
+ output_df = pd.read_csv("output2.csv")
192
+ st.subheader("Transcriptions:")
193
+ st.write(output_df["transcriptions"])
194
+
195
+ st.subheader("Labels:")
196
+ st.write(output_df["labels"])
197
+
198
+ st.subheader("Word Cloud:")
199
+ generate_word_cloud(output_df["transcriptions"].str.cat(sep=' '))
200
+
201
+ st.subheader("tot_text:")
202
+ st.write(tot_text)
203
+
204
+ st.subheader("tot_summary:")
205
+ st.write(tot_summary)
206
+
207
+ st.subheader("tot_keywords:")
208
+ st.write(tot_keywords)
209
+
210
+ else:
211
+ st.write("No output file found.")
212
 
213
+ if __name__ == "__main__":
214
+ main()