Koli98 commited on
Commit
5d6f83b
1 Parent(s): 75dfe3a

Uploaded required files

Browse files
Files changed (3) hide show
  1. base_model.py +37 -0
  2. sentiment_app.py +244 -0
  3. sentiment_model.py +38 -0
base_model.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score, classification_report
2
+ from sklearn.preprocessing import LabelEncoder
3
+
4
+ class TextClassifier:
5
+
6
+ def __init__(self, train_features, train_targets, test_features, test_targets):
7
+ self.train_features = train_features
8
+ self.train_targets = train_targets
9
+ self.test_features = test_features
10
+ self.test_targets = test_targets
11
+
12
+ self.model = None
13
+ self.classification_report = None
14
+ self.accuracy = None
15
+ self.precision = None
16
+ self.recall = None
17
+ self.f1 = None
18
+
19
+ def train(self) -> None:
20
+ raise NotImplementedError
21
+
22
+ def predict(self, text_samples:list, inverse_transform:bool=True) -> list:
23
+ raise NotImplementedError
24
+
25
+ def evaluate(self) -> dict:
26
+
27
+ predictions = self.predict(self.test_features, inverse_transform=False)
28
+
29
+ self.accuracy = accuracy_score(self.test_targets, predictions)
30
+ self.precision = precision_score(self.test_targets, predictions, average='weighted')
31
+ self.recall = recall_score(self.test_targets, predictions, average='weighted')
32
+ self.f1 = f1_score(self.test_targets, predictions, average='weighted')
33
+ self.classification_report = classification_report(self.test_targets, predictions)
34
+
35
+ return {'accuracy' : self.accuracy,
36
+ 'precision' : self.precision,
37
+ 'recall' : self.recall}
sentiment_app.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sentiment_model import PretrainedSentimentAnalyzer
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import plotly.graph_objects as go
6
+ import pandas as pd
7
+ import re
8
+
9
+
10
+ # Create an instance of the PretrainedSentimentAnalyzer class
11
+ analyzer = PretrainedSentimentAnalyzer(None, None, None, None)
12
+ # Define the Streamlit app
13
+ def main():
14
+ st.title("Early Depression Detection System")
15
+
16
+ # Get user input
17
+ option = st.selectbox("Select an option:", ("Enter a sentence", "Upload a CSV file"))
18
+
19
+ if option == "Enter a sentence":
20
+ text = st.text_input("Enter a sentence:")
21
+
22
+ # Perform sentiment analysis
23
+ if st.button("Analyze"):
24
+ if not text or len(text) == 1:
25
+ st.write("Enter valid text")
26
+ else:
27
+ sentiment = analyzer.predict([text], inverse_transform=True)[0]
28
+ st.write("Sentiment Analysis Results:")
29
+ st.write("Sentiment:", sentiment['label'])
30
+ #st.write("Score:", sentiment['score'])
31
+ if sentiment['label'] == 'negative':
32
+ st.write(scan(text))
33
+ st.write("Please call the helpline number:")
34
+ st.write("Beyond Blue: 1300 659 467")
35
+ st.write("Mental Health Emergency: 13 14 65")
36
+ else:
37
+ st.write("No Depression Detected")
38
+ elif option == "Upload a CSV file":
39
+ file = st.file_uploader("Upload a CSV file:")
40
+ if file is not None:
41
+ # Check file format
42
+ if file.name.endswith('.csv'):
43
+ # Perform sentiment analysis on the uploaded file
44
+ df = pd.read_csv(file)
45
+ if st.button("Analyze"):
46
+ # with st.spinner("Predicting..."):
47
+ # column = df.columns[1]
48
+ # data = df[column].astype(str).tolist()
49
+ # sentiments = analyzer.predict(data, inverse_transform=True)
50
+ # df['sentiment'] = [s['label'] for s in sentiments]
51
+ spinner_placeholder = st.markdown("""
52
+ <style>
53
+ .spinner-container {
54
+ display: flex;
55
+ justify-content: center;
56
+ align-items: center;
57
+ flex-direction: column;
58
+ }
59
+
60
+ .spinner {
61
+ border: 16px solid #f3f3f3;
62
+ border-radius: 50%;
63
+ border-top: 16px solid #3498db;
64
+ width: 100px;
65
+ height: 100px;
66
+ -webkit-animation: spin 2s linear infinite;
67
+ animation: spin 2s linear infinite;
68
+ }
69
+
70
+ @-webkit-keyframes spin {
71
+ 0% { -webkit-transform: rotate(0deg); }
72
+ 100% { -webkit-transform: rotate(360deg); }
73
+ }
74
+
75
+ @keyframes spin {
76
+ 0% { transform: rotate(0deg); }
77
+ 100% { transform: rotate(360deg); }
78
+ }
79
+
80
+ </style>
81
+ <div class="spinner-container">
82
+ <div class="spinner"></div>
83
+ <p>Predicting...</p>
84
+ </div>
85
+ """, unsafe_allow_html=True)
86
+
87
+ column = df.columns[1]
88
+ data = df[column].astype(str).tolist()
89
+ sentiments = analyzer.predict(data, inverse_transform=True)
90
+ df['Prediction'] = [s['label'] for s in sentiments]
91
+
92
+ # Clear the spinner
93
+ spinner_placeholder.empty()
94
+ st.write("Sentiment Analysis Results:")
95
+ st.write(df)
96
+
97
+ sentiment_counts = df['Prediction'].value_counts()
98
+ sentiment_dict = {'Positive': 0, 'Neutral': 0, 'Negative': 0}
99
+
100
+ for sentiment in sentiment_counts.index:
101
+ if sentiment == 'positive':
102
+ sentiment_dict['Positive'] += sentiment_counts[sentiment]
103
+ elif sentiment == 'neutral':
104
+ sentiment_dict['Neutral'] += sentiment_counts[sentiment]
105
+ elif sentiment == 'negative':
106
+ sentiment_dict['Negative'] += sentiment_counts[sentiment]
107
+ st.write("Sentiment Counts:")
108
+ for sentiment, count in sentiment_dict.items():
109
+ st.write(sentiment + ":", count)
110
+
111
+ labels = list(sentiment_dict.keys())
112
+ sizes = list(sentiment_dict.values())
113
+ colors = ['green', 'white', 'red'] # colors for Positive, Neutral, Negative
114
+
115
+ fig = go.Figure(data=[go.Pie(labels=labels, values=sizes,hole=.2, marker=dict(colors=colors))])
116
+
117
+ fig.update_layout(
118
+ title="Sentiment Analysis",
119
+ showlegend=True,
120
+ legend_title="Sentiment",
121
+ uniformtext_minsize=12,
122
+ uniformtext_mode='hide'
123
+ )
124
+
125
+ st.plotly_chart(fig)
126
+ negative_data = df[df['Prediction'] == 'negative']
127
+ # Create a df that only has the tweets column
128
+ tweets_data = negative_data['Tweets']
129
+ st.write("Depressed Tweets")
130
+ st.write(scan(tweets_data))
131
+ st.write("Please call the helpline number:")
132
+ st.write("Beyond Blue: 1300 659 467")
133
+ st.write("Mental Health Emergency: 13 14 65")
134
+ else:
135
+ st.write("File type not supported")
136
+ def scan(tweets_data):
137
+
138
+ contractions_dict = { "ain't": "are not","'s":" is","aren't": "are not",
139
+ "can't": "cannot","can't've": "cannot have",
140
+ "'cause": "because","could've": "could have","couldn't": "could not",
141
+ "couldn't've": "could not have", "didn't": "did not","doesn't": "does not",
142
+ "don't": "do not","hadn't": "had not","hadn't've": "had not have",
143
+ "hasn't": "has not","haven't": "have not","he'd": "he would",
144
+ "he'd've": "he would have","he'll": "he will", "he'll've": "he will have",
145
+ "how'd": "how did","how'd'y": "how do you","how'll": "how will",
146
+ "I'd": "I would", "I'd've": "I would have","I'll": "I will",
147
+ "I'll've": "I will have","I'm": "I am","I've": "I have", "isn't": "is not",
148
+ "it'd": "it would","it'd've": "it would have","it'll": "it will",
149
+ "it'll've": "it will have", "let's": "let us","ma'am": "madam",
150
+ "mayn't": "may not","might've": "might have","mightn't": "might not",
151
+ "mightn't've": "might not have","must've": "must have","mustn't": "must not",
152
+ "mustn't've": "must not have", "needn't": "need not",
153
+ "needn't've": "need not have","o'clock": "of the clock","oughtn't": "ought not",
154
+ "oughtn't've": "ought not have","shan't": "shall not","sha'n't": "shall not",
155
+ "shan't've": "shall not have","she'd": "she would","she'd've": "she would have",
156
+ "she'll": "she will", "she'll've": "she will have","should've": "should have",
157
+ "shouldn't": "should not", "shouldn't've": "should not have","so've": "so have",
158
+ "that'd": "that would","that'd've": "that would have", "there'd": "there would",
159
+ "there'd've": "there would have", "they'd": "they would",
160
+ "they'd've": "they would have","they'll": "they will",
161
+ "they'll've": "they will have", "they're": "they are","they've": "they have",
162
+ "to've": "to have","wasn't": "was not","we'd": "we would",
163
+ "we'd've": "we would have","we'll": "we will","we'll've": "we will have",
164
+ "we're": "we are","we've": "we have", "weren't": "were not","what'll": "what will",
165
+ "what'll've": "what will have","what're": "what are", "what've": "what have",
166
+ "when've": "when have","where'd": "where did", "where've": "where have",
167
+ "who'll": "who will","who'll've": "who will have","who've": "who have",
168
+ "why've": "why have","will've": "will have","won't": "will not",
169
+ "won't've": "will not have", "would've": "would have","wouldn't": "would not",
170
+ "wouldn't've": "would not have","y'all": "you all", "y'all'd": "you all would",
171
+ "y'all'd've": "you all would have","y'all're": "you all are",
172
+ "y'all've": "you all have","you'd": "you would","you'd've": "you would have",
173
+ "you'll": "you will","you'll've": "you will have", "you're": "you are",
174
+ "you've": "you have"}
175
+ absolute_words = ["I", "feeling", "feels", "always", "never", "completely", "totally",
176
+ "absolutely", "must", "should", "all", "every", "none", "nothing",
177
+ "everyone", "everything", "only", "impossible", "forever", "can’t",
178
+ "cannot", "won’t", "will not", "no one", "no-one", "every time",
179
+ "low", "everytime", "difficult", "difficulty", "nightmare", "bored",
180
+ "disaster", "irritate", "broken", "hurt", "lost", "book", "alone",
181
+ "journal", "tool", "fight", "highway", "stupid", "disgusted", "stress",
182
+ "hostile", "reserved", "danger", "funeral", "respect", "news", "toothache",
183
+ "vomit", "sick", "beach", "office", "window", "phase", "plant", "hotel",
184
+ "friend", "teacher", "betray", "poster", "grief", "puppy", "safe", "kiss",
185
+ "home", "treat", "confident", "peace", "paper", "custom", "rain", "lucky",
186
+ "win", "proud", "beauty", "city", "museum", "success", "laughter", "party",
187
+ "key", "justice", "respect", "sorry", "apologize", "kill","help"]
188
+
189
+
190
+ # Regular expression for finding contractions
191
+ contractions_re=re.compile('(%s)' % '|'.join(contractions_dict.keys()))
192
+
193
+ # Function for expanding contractions
194
+ def expand_contractions(text,contractions_dict=contractions_dict):
195
+ def replace(match):
196
+ return contractions_dict[match.group(0)]
197
+ return contractions_re.sub(replace, text)
198
+
199
+ # Preprocessing function
200
+ def preprocess_text(text):
201
+ # Convert the input to a string
202
+ text = str(text)
203
+ # Expand contractions
204
+ text = expand_contractions(text)
205
+ # Remove punctuations
206
+ text = re.sub(r'[^\w\s]', '', text)
207
+ # Convert to lowercase
208
+ text = text.lower()
209
+ # If the resulting text is empty, return None
210
+ if text == "":
211
+ return None
212
+ return text
213
+
214
+ if isinstance(tweets_data, str):
215
+ preprocessed_tweets=preprocess_text(tweets_data)
216
+ count = 0
217
+ for word in absolute_words:
218
+ if word in preprocessed_tweets:
219
+ count += 1
220
+ if count >= 2:
221
+ break
222
+ return("Depression Detected")
223
+ else:
224
+ # Apply the preprocess_text function to the tweet df
225
+ preprocessed_tweets = tweets_data.apply(preprocess_text)
226
+ print(preprocessed_tweets)
227
+
228
+
229
+ # Check if the preprocessed tweets have at least 2 of the absolute words
230
+ Early_dep = pd.DataFrame(columns=['Tweets'])
231
+ for tweet in preprocessed_tweets:
232
+ count = 0
233
+ for word in absolute_words:
234
+ if word in tweet:
235
+ count += 1
236
+ if count >= 2:
237
+ Early_dep = pd.concat([Early_dep, pd.DataFrame({'Tweets': [tweet]})], ignore_index=True)
238
+ break
239
+ return(Early_dep)
240
+
241
+
242
+ # Run the app
243
+ if __name__ == "__main__":
244
+ main()
sentiment_model.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from base_model import TextClassifier
2
+ import torch
3
+ from transformers import pipeline
4
+
5
+ class PretrainedSentimentAnalyzer(TextClassifier):
6
+
7
+ def __init__(self, train_features, train_targets, test_features, test_targets, min_threshold=0.7):
8
+
9
+ super().__init__(train_features, train_targets, test_features, test_targets)
10
+
11
+
12
+
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+ self.model = pipeline("text-classification",
16
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
17
+ device=device)
18
+
19
+ self.prediction_map = {'positive' : 'positive',
20
+ 'negative' : 'negative',
21
+ 'neutral' : 'neutral'}
22
+
23
+ self.threshold = min_threshold
24
+
25
+ def train(self):
26
+ pass
27
+
28
+ def predict(self, text_samples:list, inverse_transform:bool, proba:bool=True) -> list:
29
+
30
+ predictions = self.model(text_samples, batch_size=128)
31
+ if proba:
32
+ return predictions
33
+
34
+ predictions = [self.prediction_map[prediction['label']] if prediction['score'] > self.threshold else 'neutral'
35
+ for prediction in predictions]
36
+
37
+ return predictions
38
+