Spaces:

Tihsrah-CD
/

Meeting_test

Runtime error

App Files Files Community

Meeting_test / app.py

Tihsrah

first commit

90ddd8e about 2 years ago

raw

history blame contribute delete

6.64 kB

	import subprocess
	# # Run the pip install command
	subprocess.check_call(['pip', 'install', 'wordcloud'])
	subprocess.check_call(['pip', 'install', 'git+https://github.com/openai/whisper.git'])
	subprocess.check_call(['pip', 'install', 'transformers'])
	subprocess.check_call(['pip', 'install', 'imageio==2.4.1'])
	subprocess.check_call(['pip', 'install', 'moviepy'])
	subprocess.check_call(['pip', 'install', 'keybert'])

	subprocess.check_call(['pip', 'install', 'pytube'])

	import streamlit as st
	import os
	from wordcloud import WordCloud
	from keybert import KeyBERT
	import pandas as pd
	import matplotlib.pyplot as plt
	# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////


	from moviepy.editor import *
	from tqdm import tqdm
	import os
	import math
	import nltk
	nltk.download('punkt')
	import whisper
	from transformers import pipeline

	from pytube import YouTube
	def process_video(path):
	whisper_model = whisper.load_model("base")

	def SpeechToTextEng(aud_path):
	result = whisper_model.transcribe(aud_path)
	return result["text"]

	def run_range(duration):
	time=duration/60
	floor=math.ceil(time)
	return floor

	time_range=60
	clip_run_range=0
	clip_duration=0

	def audio_generator(path,aud=0,vid=0):
	if vid==1:
	clip=VideoFileClip(path)
	clip_duration = clip.duration
	clip_run_range=run_range(clip_duration)
	for i in range(clip_run_range):
	left=i*time_range
	right=left+time_range
	# print(left,right)

	crop_clip=clip.subclip(left,right)
	try:
	crop_clip.audio.write_audiofile("vid_to_aud"+str(i)+".mp3")
	except:
	pass

	if aud==1:
	audio_clip=AudioFileClip(path)
	clip_duration = audio_clip.duration
	print(clip_duration)
	clip_run_range=run_range(clip_duration)
	print(clip_run_range)
	for i in range(clip_run_range):
	left=i*time_range
	right=left+time_range
	# print(left,right)
	crop_clip=audio_clip.subclip(left,right)
	try:
	crop_clip.write_audiofile("vid_to_aud"+str(i)+".mp3")
	except:
	pass




	# YouTube video URL
	video_url = path

	# Create a YouTube object
	yt = YouTube(video_url)

	# Get the highest resolution video stream
	stream = yt.streams.get_lowest_resolution()

	# Download the video
	stream.download(filename='meeting.mp4')

	audio_generator("./meeting.mp4",vid=1)
	transcribed_lit=[]
	label_lit=[]
	translated_lit=[]

	for i in tqdm(range(clip_run_range)):
	transcribed=SpeechToTextEng("./vid_to_aud"+str(i)+".mp3")
	transcribed_lit.append(transcribed)
	os.remove("./vid_to_aud"+str(i)+".mp3")


	data = pd.DataFrame(
	{'transcriptions': transcribed_lit
	})

	summarizer = pipeline("summarization")

	sentiment_analyzer = pipeline("sentiment-analysis")

	sumarized_lit=[]
	sentiment_lit=[]
	for i in tqdm(range(len(data))):
	summarized=summarizer(data.iloc[i,0],min_length=75, max_length=300)[0]['summary_text']
	sentiment = sentiment_analyzer(data.iloc[i,0])[0]['label']
	sumarized_lit.append(summarized)
	sentiment_lit.append(sentiment)

	data['summary']=sumarized_lit
	data['sentiment']=sentiment_lit
	data.to_csv('output2.csv', index=False)
	tot_text=""
	for i in range(len(data)):
	tot_text=tot_text+data.iloc[i,0]

	key_model = KeyBERT('distilbert-base-nli-mean-tokens')
	def extract_keywords(text, top_n=50):
	keywords = key_model.extract_keywords(text, top_n=top_n)
	return [keyword[0] for keyword in keywords]

	tot_keywords=extract_keywords(tot_text)

	def get_500_words(text,left,right):
	words = text.split()
	first_500_words = ' '.join(words[left:right])
	return first_500_words

	def summarize_text(text):
	chunk_size = 500 # Number of words per chunk
	total_summary = "" # Total summary

	words = text.split() # Split the text into individual words
	num_chunks = len(words) // chunk_size + 1 # Calculate the number of chunks

	for i in tqdm(range(num_chunks)):
	start_index = i * chunk_size
	end_index = start_index + chunk_size
	chunk = " ".join(words[start_index:end_index])

	# Pass the chunk to the summarizer (replace with your summarization code)
	chunk_summary = summarizer(chunk,min_length=75, max_length=200)[0]['summary_text']
	# print(chunk_summary)
	total_summary += chunk_summary

	return total_summary

	tot_summary=summarize_text(tot_text)
	return tot_text,tot_summary,tot_keywords




	# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	def generate_word_cloud(text):
	# Create a WordCloud object
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)

	# Display the generated word cloud
	fig, ax = plt.subplots(figsize=(10, 5))

	# Plot the word cloud on the axis
	ax.imshow(wordcloud, interpolation='bilinear')
	ax.axis('off')
	st.pyplot(fig)


	def main():
	st.title("Meeting Summary Web App")

	# YouTube link input
	youtube_url = st.text_input("Enter the YouTube video link")

	if st.button("Process Video"):
	if youtube_url:
	# Process the YouTube video
	tot_text, tot_summary, tot_keywords = process_video(youtube_url)

	# Display the output
	if os.path.exists("output2.csv"):
	output_df = pd.read_csv("output2.csv")
	st.subheader("Transcriptions:")
	st.write(output_df["transcriptions"])

	st.subheader("Labels:")
	st.write(output_df["labels"])

	st.subheader("Word Cloud:")
	generate_word_cloud(output_df["transcriptions"].str.cat(sep=' '))

	st.subheader("tot_text:")
	st.write(tot_text)

	st.subheader("tot_summary:")
	st.write(tot_summary)

	st.subheader("tot_keywords:")
	st.write(tot_keywords)

	else:
	st.write("No output file found.")

	if __name__ == "__main__":
	main()