Files changed (1) hide show
  1. app.py +0 -159
app.py DELETED
@@ -1,159 +0,0 @@
1
- import streamlit as st
2
- from transformers import pipeline
3
- from transformers import AutoTokenizer
4
- from transformers import AutoModelForSequenceClassification
5
- import warnings
6
- warnings.filterwarnings("ignore")
7
- import nltk
8
- nltk.download('all')
9
- import matplotlib.pyplot as plt
10
- import helper
11
- import preprocessor
12
- from mtranslate import translate
13
- import pandas as pd
14
- import os
15
- from gtts import gTTS
16
- import base64
17
- import torch
18
- import seaborn as sns
19
- st.sidebar.title("Whatsapp Chat analyzer")
20
-
21
- uploaded_file= st.sidebar.file_uploader("Choose a file")
22
-
23
- if uploaded_file is not None:
24
-
25
- bytes_data = uploaded_file.getvalue()
26
- data=bytes_data.decode("utf-8")
27
- df_new= preprocessor.preprocess(data)
28
-
29
- user_list= df_new['users'].unique().tolist()
30
- user_list.sort()
31
- user_list.insert(0,"Group analysis")
32
- selected_user=st.sidebar.selectbox("show analysis wrt",user_list)
33
- if st.sidebar.button("Show Analysis"):
34
- num_messages,words,num_links=helper.fetch_stats(selected_user,df_new)
35
- st.title("Top Statistics")
36
- col1,col2,col3=st.columns(3)
37
-
38
- with col1:
39
- st.header("Total Messages")
40
- st.title(num_messages)
41
- with col2:
42
- st.header("Total Words")
43
- st.title(words)
44
- with col3:
45
- st.header("Links Shared")
46
- st.title(num_links)
47
-
48
- st.title("Timeline")
49
- col1, col2 = st.columns(2)
50
-
51
- with col1:
52
- st.header("Monthly ")
53
- timeline = helper.monthly_timeline(selected_user, df_new)
54
- fig, ax = plt.subplots()
55
- ax.plot(timeline['time'], timeline['message'])
56
- plt.xticks(rotation='vertical')
57
- st.pyplot(fig)
58
- with col2:
59
- st.title("Daily")
60
- daily_timeline = helper.Daily_timeline(selected_user, df_new)
61
- fig, ax = plt.subplots()
62
- ax.plot(daily_timeline['Date'], daily_timeline['message'], color='black')
63
- plt.xticks(rotation='vertical')
64
- st.pyplot(fig)
65
-
66
- st.title("Activity Map")
67
- col1,col2=st.columns(2)
68
-
69
- with col1:
70
- st.header("Most busy day")
71
- busy_day=helper.week_activity_map(selected_user, df_new)
72
- fig,ax=plt.subplots()
73
- ax.bar(busy_day.index,busy_day.values,color=('violet','indigo','blue','green','yellow','orange','red'))
74
- plt.xticks(rotation='vertical')
75
- st.pyplot(fig)
76
- with col2:
77
- st.header("Most busy Month")
78
- busy_day = helper.month_activity_map(selected_user, df_new)
79
- fig, ax = plt.subplots()
80
- ax.bar(busy_day.index, busy_day.values,color=('indigo','blue','green','red'))
81
- plt.xticks(rotation='vertical')
82
- st.pyplot(fig)
83
-
84
- st.title("Weekly Activity HeatMap")
85
- Activity_heatmap=helper.activity_heatmap(selected_user,df_new)
86
- fig,ax=plt.subplots()
87
- ax=sns.heatmap(Activity_heatmap,cmap='RdBu',linewidths=1,linecolor='black')
88
- st.pyplot(fig)
89
-
90
- if selected_user == "Group analysis":
91
- st.title("Most busy user")
92
- x,new_df=helper.most_busy_users(df_new)
93
- fig,ax=plt.subplots()
94
- col1,col2=st.columns(2)
95
-
96
- with col1:
97
- ax.bar(x.index, x.values,color=('blue','red','pink','orange','green'))
98
- plt.xticks(rotation='vertical')
99
- st.pyplot(fig)
100
- with col2:
101
- st.dataframe(new_df)
102
-
103
- st.title("Chat Sentiment Analysis")
104
- col1, col2, col3 = st.columns(3)
105
-
106
- with col1:
107
- st.header("Positive")
108
- pos_words = helper.pos_words(selected_user, df_new)
109
- st.dataframe(pos_words)
110
- with col2:
111
- st.header("Negative")
112
- neg_words = helper.neg_words(selected_user, df_new)
113
- st.dataframe(neg_words)
114
- with col3:
115
- st.header("Neutral")
116
- neu_words = helper.neu_words(selected_user, df_new)
117
- st.dataframe(neu_words)
118
-
119
-
120
- st.title("Word cloud")
121
- df_wc = helper.word_cloud(selected_user, df_new)
122
- fig, ax = plt.subplots()
123
- ax.imshow(df_wc)
124
- plt.axis('off')
125
- st.pyplot(fig)
126
-
127
- st.title("Most Common Words")
128
- most_common_df=helper.most_common_words(selected_user,df_new)
129
- fig,ax=plt.subplots()
130
- ax.barh(most_common_df[0],most_common_df[1])
131
- st.pyplot(fig)
132
- st.dataframe(most_common_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
133
-
134
- emoji_df=helper.emoji_helper(selected_user,df_new)
135
- st.title("Emoji Analysis")
136
- st.dataframe(emoji_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
137
-
138
-
139
- st.title("Sentiment Analysis")
140
- @st.cache(allow_output_mutation=True)
141
- def get_model():
142
- MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
143
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
144
- model = AutoModelForSequenceClassification.from_pretrained(MODEL)
145
- return tokenizer,model
146
-
147
-
148
- tokenizer, model = get_model()
149
-
150
- user_input = st.text_area('Enter Text to Analyze')
151
- button = st.button("Analyze")
152
-
153
- sent_pipeline = pipeline("sentiment-analysis")
154
- if user_input and button:
155
- test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt')
156
- # test_sample
157
- output = model(**test_sample)
158
- st.write("Prediction: ", sent_pipeline(user_input))
159
- showWarningOnDirectExecution = False