Spaces:
Sleeping
Sleeping
File size: 11,374 Bytes
b83c0cc 4ec4a3c 440d662 4ec4a3c 440d662 4ec4a3c b83c0cc c9ab569 b83c0cc 973fa74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
import streamlit as st
import pandas as pd
import string
import nltk
import re
nltk.download('stopwords')
nltk.download("vader_lexicon")
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from io import StringIO
from streamlit.runtime.state import session_state
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt
import plotly.express as px
import emoji
import calendar
from streamlit_option_menu import option_menu
@st.cache_data(persist=True)
def load_data():
data = pd.read_csv("Preprocessed_tweet.csv",low_memory=True, usecols=[*range(1,10)])
data['Date']=pd.to_datetime(data['Date'],errors='coerce')
data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')
return data
def date_range():
st.header("Filter by Date")
start = st.date_input("Start Date:- (Please input on or after 2008-05-08)",pd.to_datetime("2014-01-01",format="%Y-%m-%d"))
end = st.date_input("End Date:- (Please input on or before 2017-12-03)",pd.to_datetime("2014-12-31",format="%Y-%m-%d"))
return start,end;
def preprocess(text):
text = text.lower()
text = re.sub('http://\S+|https://\S+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
text = ''.join(i for i in text if not i.isdigit())
text = emoji.demojize(text,delimiters=("",""))
text = text.replace('_',' ').replace('-',' ')
stopWord = nltk.corpus.stopwords.words('english')
text = [word for word in text.split() if word not in stopWord]
text = ' '.join(text)
return text
def wordcloud(text,title):
st.set_option('deprecation.showPyplotGlobalUse', False)
text = WordCloud().generate(str(text))
plt.imshow(text)
plt.axis('off')
plt.title(title)
st.pyplot()
def sentimentGenerator(text):
analyzer = SentimentIntensityAnalyzer()
result = analyzer.polarity_scores(text)
# if result['compound'] > 0.35:
# st.success("Sentiment is Positive")
# elif result['compound'] < (-0.25):
# st.error("Sentiment is Negative")
# else:
# st.info("Sentiment is Neutral")
if max(result['pos'],result['neu'],result['neg']) == result['pos']:
st.success("Sentiment is Positive")
elif max(result['pos'],result['neu'],result['neg']) == result['neg']:
st.error("Sentiment is Negative")
else:
st.info("Sentiment is Neutral")
st.write(f"Positive - {round(result['pos']*100,2)}%")
st.write(f"Neutral - {round(result['neu']*100,2)}%")
st.write(f"Negative - {round(result['neg']*100,2)}%")
def DownloadDataFrame(df,fileName):
download = st.download_button(label="Download data as CSV", data=df.to_csv().encode('utf-8'),
file_name=fileName, mime='text/csv',)
if download:
st.success("DataFrame saved as an .csv file")
st.set_page_config(page_title="Chat Analysis",page_icon="🕵️♂️",layout="wide",initial_sidebar_state="expanded")
#------------------------------------------------------- Main Menu -------------------------------------------------------
selected = option_menu(
menu_title=None,
options=["Home","EDA","Sentiment Generator"],
icons=['house','bar-chart','emoji-heart-eyes'],
menu_icon = 'cast',
orientation='horizontal',
styles={
"icon":{"color":"red","font-size":"25px"},
"nav-link":{"font-size":"25px","--hover-color":"#417C76",},
"nav-link-selected":{"background-color":"#0c7c72"}
},
)
#------------------------------------------------------- Home Page -------------------------------------------------------
if selected == "Home":
st.title("Sentiment Analysis for Customer Support Data on Twitter")
st.write("""Here is the streamlit dashboard to display sentiment analysis of customer support data on twitter.\n
**Target Data Set** : Customer Support Data on Twitter - 2.8 million of data\n
**Description of data :**\n
**tweet_id** : A unique, anonymized ID for the Tweet. Referenced by response_tweet_id and in_response_to_tweet_id.\n
**Author_id** : A unique, anonymized user ID. @s in the dataset have been replaced with their associated anonymized user ID.\n
**Inbound** : Whether the tweet is "inbound" to a company doing customer support on Twitter. This feature is useful when re-organizing data for training conversational models.\n
**Created_at** : Date and time when the tweet was sent.\n
**Text** : Tweet content. Sensitive information like phone numbers and email addresses are replaced with mask values like email.\n
***Response_tweet_id*** : IDs of tweets that are responses to this tweet, comma-separated.\n
***In_response_to_tweet_id*** : ID of the tweet this tweet is in response to, if any.""")
st.markdown("## Overview of Sentiments for Customer Support Data on Twitter")
st.write('''<span style="font-family: cursive; font-size: 3rem; color: green">**POSITIVE 28%** </span>
<span style="font-family: cursive; font-size: 3rem; color: blue">**NEUTRAL 39%** </span>
<span style="font-family: cursive; font-size: 3rem; color: red">**NEGATIVE 33%**</span>''', unsafe_allow_html = True)
#------------------------------------------------------- EDA Page -------------------------------------------------------
elif selected == "EDA":
data = load_data()
start,end = date_range()
extract = st.button('Extract data')
if st.session_state.get('button') != True:
st.session_state['button'] = extract
if st.session_state['button'] == True:
date_range_df = data.loc[data["Date"].between(str(start), str(end))]
st.header("Extracted Data set")
st.dataframe(date_range_df)
ExtractedData = date_range_df
fileName = 'Date Range ([%s] - [%s]).csv'%(str(start), str(end))
DownloadDataFrame(ExtractedData,fileName)
#-------------------------------------- Filter by Author --------------------------------------
st.sidebar.header("Filter by Author")
author_list = date_range_df['Author_ID'].value_counts().index.tolist()
author = st.sidebar.selectbox("Select Author :",['All'] + author_list)
if author != 'All':
date_range_df = date_range_df[date_range_df['Author_ID']==author]
st.header("Filterd Data set")
st.subheader(f"Author :- {author}")
st.dataframe(date_range_df)
fileName = '%s ([%s] - [%s]).csv'%(author,str(start), str(end))
DownloadDataFrame(date_range_df,fileName)
else:
author = "All Author"
#-------------------------------------- WordCloud for Sentiments --------------------------------------
st.sidebar.header("Word Cloud")
word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?', (None,'All','Positive', 'Neutral', 'Negative'),key='1')
if word_sentiment != None:
try:
if word_sentiment == 'All':
st.subheader("Word Cloud for All sentiments")
text = date_range_df['Messege'].tolist()
text = [str(x) for x in text]
text = ' '.join(text)
title = "Word cloud for All sentiments"
wordcloud(text,title)
else:
st.subheader(f"Word cloud for {author}'s {word_sentiment} sentiments")
text = date_range_df[date_range_df['NLTK_Tag']==word_sentiment]['Messege'].tolist()
text = [str(x) for x in text]
text = ' '.join(text)
title = "Word cloud for %s's %s sentiments" % (author,word_sentiment)
wordcloud(text,title)
except:
st.error(f"There is no {word_sentiment} sentiment tweets on {author}'s tweets")
#-------------------------------------- Draw a Bar and Pie Chart --------------------------------------
st.sidebar.header("Bar Chart/Pie Chart")
select = st.sidebar.radio('What visualization type do you want to display number of sentiments ?', (None,'Bar Chart', 'Pie Chart'))
if select != None:
sentiment = date_range_df['NLTK_Tag'].value_counts().index.tolist()
sentiment_count = date_range_df['NLTK_Tag'].value_counts().tolist()
percentage = [i*100/sum(sentiment_count) for i in sentiment_count]
percentage = [str(round(i,2))+'%' for i in percentage]
sentiment_count = pd.DataFrame({'Sentiment':sentiment, 'Tweets':sentiment_count})
st.markdown("### Number of tweets by sentiment")
st.subheader(f"Author :- {author}")
if select == 'Bar Chart':
fig = px.bar(sentiment_count, x='Sentiment', y='Tweets',text = percentage, color='Sentiment')
st.plotly_chart(fig)
else:
fig = px.pie(sentiment_count, values='Tweets', names='Sentiment')
st.plotly_chart(fig)
#-------------------------------------- line chart --------------------------------------
st.sidebar.header("Line Chart")
year = st.sidebar.selectbox("What year do you want to see the sentiment changes monthly ?",[None]+list(range(2008, 2018)))
if year != None:
df = data[['Date','NLTK_Tag']]
df['Date'] = pd.to_datetime(df['Date'],errors='coerce')
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df = df[df['Year']==year]
pos,neg,neu=[],[],[]
for month in range(1,13):
df0 = df[df['Month']==month]
pos_cnt,neu_cnt,neg_cnt = 0,0,0
for sntmnt in df0['NLTK_Tag'].tolist():
if sntmnt == 'Positive':
pos_cnt += 1
elif sntmnt == 'Neutral':
neu_cnt += 1
else:
neg_cnt += 1
pos.append(pos_cnt)
neu.append(neu_cnt)
neg.append(neg_cnt)
line_chart_data = pd.DataFrame({'Positive':pos,'Neutral':neu,'Negative':neg},index = list(calendar.month_name)[1:])
st.markdown("### Monthly Changes of Sentiments Customer Support Data over Year")
fig = px.line(line_chart_data,color_discrete_map={"Positive": "green","Neutral": "white","Negative": "red"}).update_layout(
title = {'text':f"Year - {year}",'x':0.5}, xaxis_title="Month", yaxis_title="Number of Tweets",legend_title="Sentiment")
st.plotly_chart(fig, use_container_width=True)
#------------------------------------------------------- Sentiment Generator Page -------------------------------------------------------
elif selected == "Sentiment Generator":
st.title("Sentiment Generator")
option = st.radio("Select your input option :",("Type a text","import .txt file"))
#-------------------------------------- Input as a text and .txt file--------------------------------------
text = ''
if option == "Type a text":
text = st.text_input("Please enter your text in ***english*** for analysis :")
else:
file = st.file_uploader("Choose a file : ")
if file != None:
stringio = StringIO(file.getvalue().decode("utf-8"))
text = stringio.read()
#-------------------------------------- Generate sentiment and wordCloud --------------------------------------
generate = st.button('Generate')
if st.session_state.get('generate') != True:
st.session_state['generate'] = generate
if st.session_state['generate']:
if text != '':
text = preprocess(text)
sentimentGenerator(text)
st.markdown("### Do You Want to Draw a WordCloud for Generated Text?")
check = st.checkbox("Draw a wordcloud")
if check:
title = "WordCloud for Generated Text"
wordcloud(text,title)
|