Spaces:

kajalag
/

Whatsapp_Chat_Analyzer

Build error

App Files Files Community

Whatsapp_Chat_Analyzer / app.py

kajalag

Upload app.py

7571eef over 1 year ago

raw

history blame

5.49 kB

	import streamlit as st
	from transformers import pipeline
	from transformers import AutoTokenizer
	from transformers import AutoModelForSequenceClassification
	import warnings
	warnings.filterwarnings("ignore")
	import nltk
	nltk.download('all')
	import matplotlib.pyplot as plt
	import helper
	import preprocessor
	from mtranslate import translate
	import pandas as pd
	import os
	from gtts import gTTS
	import base64
	import torch
	import seaborn as sns
	st.sidebar.title("Whatsapp Chat analyzer")

	uploaded_file= st.sidebar.file_uploader("Choose a file")

	if uploaded_file is not None:

	bytes_data = uploaded_file.getvalue()
	data=bytes_data.decode("utf-8")
	df_new= preprocessor.preprocess(data)

	user_list= df_new['users'].unique().tolist()
	user_list.sort()
	user_list.insert(0,"Group analysis")
	selected_user=st.sidebar.selectbox("show analysis wrt",user_list)
	if st.sidebar.button("Show Analysis"):
	num_messages,words,num_links=helper.fetch_stats(selected_user,df_new)
	st.title("Top Statistics")
	col1,col2,col3=st.columns(3)

	with col1:
	st.header("Total Messages")
	st.title(num_messages)
	with col2:
	st.header("Total Words")
	st.title(words)
	with col3:
	st.header("Links Shared")
	st.title(num_links)

	st.title("Timeline")
	col1, col2 = st.columns(2)

	with col1:
	st.header("Monthly ")
	timeline = helper.monthly_timeline(selected_user, df_new)
	fig, ax = plt.subplots()
	ax.plot(timeline['time'], timeline['message'])
	plt.xticks(rotation='vertical')
	st.pyplot(fig)
	with col2:
	st.title("Daily")
	daily_timeline = helper.Daily_timeline(selected_user, df_new)
	fig, ax = plt.subplots()
	ax.plot(daily_timeline['Date'], daily_timeline['message'], color='black')
	plt.xticks(rotation='vertical')
	st.pyplot(fig)

	st.title("Activity Map")
	col1,col2=st.columns(2)

	with col1:
	st.header("Most busy day")
	busy_day=helper.week_activity_map(selected_user, df_new)
	fig,ax=plt.subplots()
	ax.bar(busy_day.index,busy_day.values,color=('violet','indigo','blue','green','yellow','orange','red'))
	plt.xticks(rotation='vertical')
	st.pyplot(fig)
	with col2:
	st.header("Most busy Month")
	busy_day = helper.month_activity_map(selected_user, df_new)
	fig, ax = plt.subplots()
	ax.bar(busy_day.index, busy_day.values,color=('indigo','blue','green','red'))
	plt.xticks(rotation='vertical')
	st.pyplot(fig)

	st.title("Weekly Activity HeatMap")
	Activity_heatmap=helper.activity_heatmap(selected_user,df_new)
	fig,ax=plt.subplots()
	ax=sns.heatmap(Activity_heatmap,cmap='RdBu',linewidths=1,linecolor='black')
	st.pyplot(fig)

	if selected_user == "Group analysis":
	st.title("Most busy user")
	x,new_df=helper.most_busy_users(df_new)
	fig,ax=plt.subplots()
	col1,col2=st.columns(2)

	with col1:
	ax.bar(x.index, x.values,color=('blue','red','pink','orange','green'))
	plt.xticks(rotation='vertical')
	st.pyplot(fig)
	with col2:
	st.dataframe(new_df)

	st.title("Chat Sentiment Analysis")
	col1, col2, col3 = st.columns(3)

	with col1:
	st.header("Positive")
	pos_words = helper.pos_words(selected_user, df_new)
	st.dataframe(pos_words)
	with col2:
	st.header("Negative")
	neg_words = helper.neg_words(selected_user, df_new)
	st.dataframe(neg_words)
	with col3:
	st.header("Neutral")
	neu_words = helper.neu_words(selected_user, df_new)
	st.dataframe(neu_words)


	st.title("Word cloud")
	df_wc = helper.word_cloud(selected_user, df_new)
	fig, ax = plt.subplots()
	ax.imshow(df_wc)
	plt.axis('off')
	st.pyplot(fig)

	st.title("Most Common Words")
	most_common_df=helper.most_common_words(selected_user,df_new)
	fig,ax=plt.subplots()
	ax.barh(most_common_df[0],most_common_df[1])
	st.pyplot(fig)
	st.dataframe(most_common_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))

	emoji_df=helper.emoji_helper(selected_user,df_new)
	st.title("Emoji Analysis")
	st.dataframe(emoji_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))


	st.title("Sentiment Analysis")
	@st.cache(allow_output_mutation=True)
	def get_model():
	MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
	tokenizer = AutoTokenizer.from_pretrained(MODEL)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL)
	return tokenizer,model


	tokenizer, model = get_model()

	user_input = st.text_area('Enter Text to Analyze')
	button = st.button("Analyze")

	sent_pipeline = pipeline("sentiment-analysis")
	if user_input and button:
	test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt')
	# test_sample
	output = model(**test_sample)
	st.write("Prediction: ", sent_pipeline(user_input))
	showWarningOnDirectExecution = False