import streamlit as st
import pandas as pd
import string
import nltk
import re'stopwords')"vader_lexicon")
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from io import StringIO
from streamlit.runtime.state import session_state
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt
import as px
import emoji
import calendar
from streamlit_option_menu import option_menu
def load_data():
data = pd.read_csv("Preprocessed_tweet.csv",low_memory=True, usecols=[*range(1,10)])
data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')
return data
def date_range():
st.header("Filter by Date")
start = st.date_input("Start Date:- (Please input on or after 2008-05-08)",pd.to_datetime("2014-01-01",format="%Y-%m-%d"))
end = st.date_input("End Date:- (Please input on or before 2017-12-03)",pd.to_datetime("2014-12-31",format="%Y-%m-%d"))
return start,end;
def preprocess(text):
text = text.lower()
text = re.sub('http://\S+|https://\S+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
text = ''.join(i for i in text if not i.isdigit())
text = emoji.demojize(text,delimiters=("",""))
text = text.replace('_',' ').replace('-',' ')
stopWord = nltk.corpus.stopwords.words('english')
text = [word for word in text.split() if word not in stopWord]
text = ' '.join(text)
return text
def wordcloud(text,title):
st.set_option('deprecation.showPyplotGlobalUse', False)
text = WordCloud().generate(str(text))
def sentimentGenerator(text):
analyzer = SentimentIntensityAnalyzer()
result = analyzer.polarity_scores(text)
# if result['compound'] > 0.35:
# st.success("Sentiment is Positive")
# elif result['compound'] < (-0.25):
# st.error("Sentiment is Negative")
# else:
#"Sentiment is Neutral")
if max(result['pos'],result['neu'],result['neg']) == result['pos']:
st.success("Sentiment is Positive")
elif max(result['pos'],result['neu'],result['neg']) == result['neg']:
st.error("Sentiment is Negative")
else:"Sentiment is Neutral")
st.write(f"Positive - {round(result['pos']*100,2)}%")
st.write(f"Neutral - {round(result['neu']*100,2)}%")
st.write(f"Negative - {round(result['neg']*100,2)}%")
def DownloadDataFrame(df,fileName):
download = st.download_button(label="Download data as CSV", data=df.to_csv().encode('utf-8'),
file_name=fileName, mime='text/csv',)
if download:
st.success("DataFrame saved as an .csv file")
st.set_page_config(page_title="Chat Analysis",page_icon="🕵️♂️",layout="wide",initial_sidebar_state="expanded")
#------------------------------------------------------- Main Menu -------------------------------------------------------
selected = option_menu(
options=["Home","EDA","Sentiment Generator"],
menu_icon = 'cast',
#------------------------------------------------------- Home Page -------------------------------------------------------
if selected == "Home":
st.title("Sentiment Analysis for Customer Support Data on Twitter")
st.write("""Here is the streamlit dashboard to display sentiment analysis of customer support data on twitter.\n
**Target Data Set** : Customer Support Data on Twitter - 2.8 million of data\n
**Description of data :**\n
**tweet_id** : A unique, anonymized ID for the Tweet. Referenced by response_tweet_id and in_response_to_tweet_id.\n
**Author_id** : A unique, anonymized user ID. @s in the dataset have been replaced with their associated anonymized user ID.\n
**Inbound** : Whether the tweet is "inbound" to a company doing customer support on Twitter. This feature is useful when re-organizing data for training conversational models.\n
**Created_at** : Date and time when the tweet was sent.\n
**Text** : Tweet content. Sensitive information like phone numbers and email addresses are replaced with mask values like email.\n
***Response_tweet_id*** : IDs of tweets that are responses to this tweet, comma-separated.\n
***In_response_to_tweet_id*** : ID of the tweet this tweet is in response to, if any.""")
st.markdown("## Overview of Sentiments for Customer Support Data on Twitter")
st.write('''<span style="font-family: cursive; font-size: 3rem; color: green">**POSITIVE 28%** </span>
<span style="font-family: cursive; font-size: 3rem; color: blue">**NEUTRAL 39%** </span>
<span style="font-family: cursive; font-size: 3rem; color: red">**NEGATIVE 33%**</span>''', unsafe_allow_html = True)
#------------------------------------------------------- EDA Page -------------------------------------------------------
elif selected == "EDA":
data = load_data()
start,end = date_range()
extract = st.button('Extract data')
if st.session_state.get('button') != True:
st.session_state['button'] = extract
if st.session_state['button'] == True:
date_range_df = data.loc[data["Date"].between(str(start), str(end))]
st.header("Extracted Data set")
ExtractedData = date_range_df
fileName = 'Date Range ([%s] - [%s]).csv'%(str(start), str(end))
#-------------------------------------- Filter by Author --------------------------------------
st.sidebar.header("Filter by Author")
author_list = date_range_df['Author_ID'].value_counts().index.tolist()
author = st.sidebar.selectbox("Select Author :",['All'] + author_list)
if author != 'All':
date_range_df = date_range_df[date_range_df['Author_ID']==author]
st.header("Filterd Data set")
st.subheader(f"Author :- {author}")
fileName = '%s ([%s] - [%s]).csv'%(author,str(start), str(end))
author = "All Author"
#-------------------------------------- WordCloud for Sentiments --------------------------------------
st.sidebar.header("Word Cloud")
word_sentiment ='Display word cloud for what sentiment?', (None,'All','Positive', 'Neutral', 'Negative'),key='1')
if word_sentiment != None:
if word_sentiment == 'All':
st.subheader("Word Cloud for All sentiments")
text = date_range_df['Messege'].tolist()
text = [str(x) for x in text]
text = ' '.join(text)
title = "Word cloud for All sentiments"
st.subheader(f"Word cloud for {author}'s {word_sentiment} sentiments")
text = date_range_df[date_range_df['NLTK_Tag']==word_sentiment]['Messege'].tolist()
text = [str(x) for x in text]
text = ' '.join(text)
title = "Word cloud for %s's %s sentiments" % (author,word_sentiment)
st.error(f"There is no {word_sentiment} sentiment tweets on {author}'s tweets")
#-------------------------------------- Draw a Bar and Pie Chart --------------------------------------
st.sidebar.header("Bar Chart/Pie Chart")
select ='What visualization type do you want to display number of sentiments ?', (None,'Bar Chart', 'Pie Chart'))
if select != None:
sentiment = date_range_df['NLTK_Tag'].value_counts().index.tolist()
sentiment_count = date_range_df['NLTK_Tag'].value_counts().tolist()
percentage = [i*100/sum(sentiment_count) for i in sentiment_count]
percentage = [str(round(i,2))+'%' for i in percentage]
sentiment_count = pd.DataFrame({'Sentiment':sentiment, 'Tweets':sentiment_count})
st.markdown("### Number of tweets by sentiment")
st.subheader(f"Author :- {author}")
if select == 'Bar Chart':
fig =, x='Sentiment', y='Tweets',text = percentage, color='Sentiment')
fig = px.pie(sentiment_count, values='Tweets', names='Sentiment')
#-------------------------------------- line chart --------------------------------------
st.sidebar.header("Line Chart")
year = st.sidebar.selectbox("What year do you want to see the sentiment changes monthly ?",[None]+list(range(2008, 2018)))
if year != None:
df = data[['Date','NLTK_Tag']]
df['Date'] = pd.to_datetime(df['Date'],errors='coerce')
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df = df[df['Year']==year]
for month in range(1,13):
df0 = df[df['Month']==month]
pos_cnt,neu_cnt,neg_cnt = 0,0,0
for sntmnt in df0['NLTK_Tag'].tolist():
if sntmnt == 'Positive':
pos_cnt += 1
elif sntmnt == 'Neutral':
neu_cnt += 1
neg_cnt += 1
line_chart_data = pd.DataFrame({'Positive':pos,'Neutral':neu,'Negative':neg},index = list(calendar.month_name)[1:])
st.markdown("### Monthly Changes of Sentiments Customer Support Data over Year")
fig = px.line(line_chart_data,color_discrete_map={"Positive": "green","Neutral": "white","Negative": "red"}).update_layout(
title = {'text':f"Year - {year}",'x':0.5}, xaxis_title="Month", yaxis_title="Number of Tweets",legend_title="Sentiment")
st.plotly_chart(fig, use_container_width=True)
#------------------------------------------------------- Sentiment Generator Page -------------------------------------------------------
elif selected == "Sentiment Generator":
st.title("Sentiment Generator")
option ="Select your input option :",("Type a text","import .txt file"))
#-------------------------------------- Input as a text and .txt file--------------------------------------
text = ''
if option == "Type a text":
text = st.text_input("Please enter your text in ***english*** for analysis :")
file = st.file_uploader("Choose a file : ")
if file != None:
stringio = StringIO(file.getvalue().decode("utf-8"))
text =
#-------------------------------------- Generate sentiment and wordCloud --------------------------------------
generate = st.button('Generate')
if st.session_state.get('generate') != True:
st.session_state['generate'] = generate
if st.session_state['generate']:
if text != '':
text = preprocess(text)
st.markdown("### Do You Want to Draw a WordCloud for Generated Text?")
check = st.checkbox("Draw a wordcloud")
if check:
title = "WordCloud for Generated Text"