import subprocess # # Run the pip install command subprocess.check_call(['pip', 'install', 'wordcloud']) subprocess.check_call(['pip', 'install', 'git+https://github.com/openai/whisper.git']) subprocess.check_call(['pip', 'install', 'transformers']) subprocess.check_call(['pip', 'install', 'imageio==2.4.1']) subprocess.check_call(['pip', 'install', 'moviepy']) subprocess.check_call(['pip', 'install', 'keybert']) subprocess.check_call(['pip', 'install', 'pytube']) import streamlit as st import os from wordcloud import WordCloud from keybert import KeyBERT import pandas as pd import matplotlib.pyplot as plt # ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// from moviepy.editor import * from tqdm import tqdm import os import math import nltk nltk.download('punkt') import whisper from transformers import pipeline from pytube import YouTube def process_video(path): whisper_model = whisper.load_model("base") def SpeechToTextEng(aud_path): result = whisper_model.transcribe(aud_path) return result["text"] def run_range(duration): time=duration/60 floor=math.ceil(time) return floor time_range=60 clip_run_range=0 clip_duration=0 def audio_generator(path,aud=0,vid=0): if vid==1: clip=VideoFileClip(path) clip_duration = clip.duration clip_run_range=run_range(clip_duration) for i in range(clip_run_range): left=i*time_range right=left+time_range # print(left,right) crop_clip=clip.subclip(left,right) try: crop_clip.audio.write_audiofile("vid_to_aud"+str(i)+".mp3") except: pass if aud==1: audio_clip=AudioFileClip(path) clip_duration = audio_clip.duration print(clip_duration) clip_run_range=run_range(clip_duration) print(clip_run_range) for i in range(clip_run_range): left=i*time_range right=left+time_range # print(left,right) crop_clip=audio_clip.subclip(left,right) try: crop_clip.write_audiofile("vid_to_aud"+str(i)+".mp3") except: pass # YouTube video URL video_url = path # Create a YouTube object yt = YouTube(video_url) # Get the highest resolution video stream stream = yt.streams.get_lowest_resolution() # Download the video stream.download(filename='meeting.mp4') audio_generator("./meeting.mp4",vid=1) transcribed_lit=[] label_lit=[] translated_lit=[] for i in tqdm(range(clip_run_range)): transcribed=SpeechToTextEng("./vid_to_aud"+str(i)+".mp3") transcribed_lit.append(transcribed) os.remove("./vid_to_aud"+str(i)+".mp3") data = pd.DataFrame( {'transcriptions': transcribed_lit }) summarizer = pipeline("summarization") sentiment_analyzer = pipeline("sentiment-analysis") sumarized_lit=[] sentiment_lit=[] for i in tqdm(range(len(data))): summarized=summarizer(data.iloc[i,0],min_length=75, max_length=300)[0]['summary_text'] sentiment = sentiment_analyzer(data.iloc[i,0])[0]['label'] sumarized_lit.append(summarized) sentiment_lit.append(sentiment) data['summary']=sumarized_lit data['sentiment']=sentiment_lit data.to_csv('output2.csv', index=False) tot_text="" for i in range(len(data)): tot_text=tot_text+data.iloc[i,0] key_model = KeyBERT('distilbert-base-nli-mean-tokens') def extract_keywords(text, top_n=50): keywords = key_model.extract_keywords(text, top_n=top_n) return [keyword[0] for keyword in keywords] tot_keywords=extract_keywords(tot_text) def get_500_words(text,left,right): words = text.split() first_500_words = ' '.join(words[left:right]) return first_500_words def summarize_text(text): chunk_size = 500 # Number of words per chunk total_summary = "" # Total summary words = text.split() # Split the text into individual words num_chunks = len(words) // chunk_size + 1 # Calculate the number of chunks for i in tqdm(range(num_chunks)): start_index = i * chunk_size end_index = start_index + chunk_size chunk = " ".join(words[start_index:end_index]) # Pass the chunk to the summarizer (replace with your summarization code) chunk_summary = summarizer(chunk,min_length=75, max_length=200)[0]['summary_text'] # print(chunk_summary) total_summary += chunk_summary return total_summary tot_summary=summarize_text(tot_text) return tot_text,tot_summary,tot_keywords # ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// def generate_word_cloud(text): # Create a WordCloud object wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) # Display the generated word cloud fig, ax = plt.subplots(figsize=(10, 5)) # Plot the word cloud on the axis ax.imshow(wordcloud, interpolation='bilinear') ax.axis('off') st.pyplot(fig) def main(): st.title("Meeting Summary Web App") # YouTube link input youtube_url = st.text_input("Enter the YouTube video link") if st.button("Process Video"): if youtube_url: # Process the YouTube video tot_text, tot_summary, tot_keywords = process_video(youtube_url) # Display the output if os.path.exists("output2.csv"): output_df = pd.read_csv("output2.csv") st.subheader("Transcriptions:") st.write(output_df["transcriptions"]) st.subheader("Labels:") st.write(output_df["labels"]) st.subheader("Word Cloud:") generate_word_cloud(output_df["transcriptions"].str.cat(sep=' ')) st.subheader("tot_text:") st.write(tot_text) st.subheader("tot_summary:") st.write(tot_summary) st.subheader("tot_keywords:") st.write(tot_keywords) else: st.write("No output file found.") if __name__ == "__main__": main()