Adrian8a commited on
Commit
d9f3788
1 Parent(s): c266022

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -160
app.py DELETED
@@ -1,160 +0,0 @@
1
- from statistics import mode
2
- from joblib import load
3
- from tqdm import tqdm
4
-
5
- import pandas as pd
6
- import gradio as gr
7
- import numpy as np
8
- import regex as re
9
-
10
- stopwords = load('stopwords.data')
11
- nlp = load('nlp.path')
12
-
13
-
14
- class Preprocessor:
15
- def __init__(self, stopwords=stopwords):
16
- self.vectorizer = load('vectorizer.model')
17
- self.stopwords = stopwords
18
- self.vectorizer_fitted = True
19
-
20
- def remove_urls(self, texts):
21
- print('Removing URLs...')
22
- pattern = re.compile('(\w+\.com ?/ ?.+)|(http\S+)')
23
- return [re.sub(pattern, '', text) for text in texts]
24
-
25
- def remove_double_space(self, texts):
26
- print('Removing double space...')
27
- pattern = re.compile(' +')
28
- return [re.sub(pattern, ' ', text) for text in texts]
29
-
30
- def remove_punctuation(self, texts):
31
- print('Removing Punctuation...')
32
- pattern = re.compile('[^a-z ]')
33
- return [re.sub(pattern, ' ', text) for text in texts]
34
-
35
- def remove_stopwords(self, texts):
36
- print('Removing stopwords...')
37
- return [[w for w in text.split(' ') if w not in self.stopwords] for text in tqdm(texts)]
38
-
39
- def remove_numbers(self, texts):
40
- print('Removing numbers...')
41
- return [' '.join([w for w in text if not w.isdigit()]) for text in tqdm(texts)]
42
-
43
- def remove_emojis(self, texts):
44
- print('Removing emojis...')
45
- pattern = re.compile("["
46
- u"\U0001F600-\U0001F64F" # emoticons
47
- u"\U0001F300-\U0001F5FF" # symbols & pictographs
48
- u"\U0001F680-\U0001F6FF" # transport & map symbols
49
- u"\U0001F1E0-\U0001F1FF" # flags (iOS)
50
- "]+", flags=re.UNICODE)
51
- return [re.sub(pattern, r'', text) for text in texts]
52
-
53
- def lemmatize(self, texts):
54
- print('Lemmatizing...')
55
- lemmatized_texts = []
56
- for text in tqdm(texts):
57
- doc = nlp(text)
58
- lemmatized_texts.append(' '.join([token.lemma_ for token in doc]))
59
-
60
- return lemmatized_texts
61
-
62
- def transform(self, X, y=None, mode='train'):
63
- X = X.copy()
64
-
65
- print('Removing Nans...')
66
- X = X[~X.isnull()]
67
- X = X[~X.duplicated()]
68
-
69
- if mode == 'train':
70
- self.train_idx = X.index
71
- else:
72
- self.test_idx = X.index
73
-
74
- print('Counting capitalized...')
75
- capitalized = [np.sum([t.isupper() for t in text.split()])
76
- for text in np.array(X.values)]
77
-
78
- print('Lowering...')
79
- X = [text.lower() for text in X]
80
-
81
- X = self.remove_urls(X)
82
- X = self.remove_punctuation(X)
83
- X = self.remove_double_space(X)
84
- X = self.remove_emojis(X)
85
- X = self.remove_stopwords(X)
86
- X = self.remove_numbers(X)
87
- X = self.lemmatize(X)
88
-
89
- if not self.vectorizer_fitted:
90
- self.vectorizer_fitted = True
91
- print('Fitting vectorizer...')
92
- self.vectorizer.fit(X)
93
-
94
- print('Vectorizing...')
95
- X = self.vectorizer.transform(X)
96
-
97
- return X
98
-
99
-
100
- def gettext(r):
101
-
102
- pred = mode(r)
103
-
104
- if pred == 0:
105
- text = 'Irrelevant'
106
- elif pred == 1:
107
- text = 'Negative'
108
- elif pred == 2:
109
- text = 'Neutral'
110
- else:
111
- text = 'Positive'
112
-
113
- return text
114
-
115
-
116
- def greet(text):
117
-
118
- df_new = pd.DataFrame([text])
119
-
120
- pr = Preprocessor()
121
- X_test = pr.transform(df_new[0])
122
-
123
- log_reg = load('log_reg.model')
124
- y_lr = log_reg.predict(X_test)
125
-
126
- tree = load('tree.model')
127
- y_tree = tree.predict(X_test)
128
-
129
- forest = load('forest.model')
130
- y_forest = forest.predict(X_test)
131
-
132
- r = [y_lr[0], y_tree[0], y_forest[0]]
133
-
134
- text = gettext(r)
135
-
136
- return text
137
-
138
-
139
- interface = gr.Interface(
140
- title = "😄 Twitter Sentiment Analysis 😡 - UMG",
141
- description = "<h3>The idea is to classify a text provided by the user according to the emotion contained in that text. "+
142
- "The possible outputs are the following: Irrelevant, Negative, Neutral, and Positive. </h3>"+
143
- "<b>Models:</b> Logistic Regression, Decision Trees and Random Forest"+
144
- "<br><b>Metrics:</b> Accuracy: 0.95, Precision: 0.953, Recall: 0.945, F1 Score: 0.948 <br> <br><b>Please provide a text example:</b>",
145
- article='Step-by-step on GitHub <a href="https://github.com/Adrian8aS/-Twitter-Sentiment-Analysis/blob/4558716d85e18bb18dde25f597f010af13a5deb5/Exam%20JAOS.ipynb"> notebook </a> <br> ~ José Adrián Ochoa Sánchez',
146
- allow_flagging = "never",
147
- fn = greet,
148
- inputs = [
149
- gr.Text(label="Write a tweet")],
150
- outputs = [
151
- gr.Text(label="Sentiment detected")],
152
- examples = [
153
- ['I mentioned on Facebook that I was struggling for motivation to go for a run the other day, which has been translated by Tom’s great auntie as ‘Hayley can’t get out of bed’ and told to his grandma, who now thinks I’m a lazy, terrible person 🤣'],
154
- ['BBC News - Amazon boss Jeff Bezos rejects claims company acted like a drug dealer bbc.co.uk/news/av/busine…'],
155
- ['@Microsoft Why do I pay for WORD when it functions so poorly on my @SamsungUS Chromebook? 🙄'],
156
- ['FUCKKKKKK I CANT WAIT']
157
- ]
158
- )
159
-
160
- interface.launch(share = True)