Spaces:

Adrian8a
/

Test

Runtime error

App Files Files Community

Adrian8a commited on Nov 9, 2022

Commit

0b3f2a8

•

1 Parent(s): 3a90b83

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
app.py +160 -0
forest.model +3 -0
log_reg.model +3 -0
nlp.path +3 -0
stopwords.data +0 -0
tree.model +3 -0
vectorizer.model +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+nlp.path filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+from statistics import mode
+from joblib import load
+from tqdm import tqdm
+import pandas as pd
+import gradio as gr
+import numpy as np
+import regex as re
+stopwords = load('stopwords.data')
+nlp = load('nlp.path')
+class Preprocessor:
+    def __init__(self, stopwords=stopwords):
+        self.vectorizer = load('vectorizer.model')
+        self.stopwords = stopwords
+        self.vectorizer_fitted = True
+    def remove_urls(self, texts):
+        print('Removing URLs...')
+        pattern = re.compile('(\w+\.com ?/ ?.+)|(http\S+)')
+        return [re.sub(pattern, '', text) for text in texts]
+    def remove_double_space(self, texts):
+        print('Removing double space...')
+        pattern = re.compile(' +')
+        return [re.sub(pattern, ' ', text) for text in texts]
+    def remove_punctuation(self, texts):
+        print('Removing Punctuation...')
+        pattern = re.compile('[^a-z ]')
+        return [re.sub(pattern, ' ', text) for text in texts]
+    def remove_stopwords(self, texts):
+        print('Removing stopwords...')
+        return [[w for w in text.split(' ') if w not in self.stopwords] for text in tqdm(texts)]
+    def remove_numbers(self, texts):
+        print('Removing numbers...')
+        return [' '.join([w for w in text if not w.isdigit()]) for text in tqdm(texts)]
+    def remove_emojis(self, texts):
+        print('Removing emojis...')
+        pattern = re.compile("["
+                u"\U0001F600-\U0001F64F"  # emoticons
+                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+                u"\U0001F680-\U0001F6FF"  # transport & map symbols
+                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+                                   "]+", flags=re.UNICODE)
+        return [re.sub(pattern, r'', text) for text in texts]
+    def lemmatize(self, texts):
+        print('Lemmatizing...')
+        lemmatized_texts = []
+        for text in tqdm(texts):
+            doc = nlp(text)
+            lemmatized_texts.append(' '.join([token.lemma_ for token in doc]))
+        return lemmatized_texts
+    def transform(self, X, y=None, mode='train'):
+        X = X.copy()
+        print('Removing Nans...')
+        X = X[~X.isnull()]
+        X = X[~X.duplicated()]
+        if mode == 'train':
+            self.train_idx = X.index
+        else:
+            self.test_idx = X.index
+        print('Counting capitalized...')
+        capitalized = [np.sum([t.isupper() for t in text.split()])
+                           for text in np.array(X.values)]
+        print('Lowering...')
+        X = [text.lower() for text in X]
+        X = self.remove_urls(X)
+        X = self.remove_punctuation(X)
+        X = self.remove_double_space(X)
+        X = self.remove_emojis(X)
+        X = self.remove_stopwords(X)
+        X = self.remove_numbers(X)
+        X = self.lemmatize(X)
+        if not self.vectorizer_fitted:
+            self.vectorizer_fitted = True
+            print('Fitting vectorizer...')
+            self.vectorizer.fit(X)
+        print('Vectorizing...')
+        X = self.vectorizer.transform(X)
+        return X
+def gettext(r):
+    pred  = mode(r)
+    if pred == 0:
+        text = 'Irrelevant'
+    elif pred == 1:
+        text = 'Negative'
+    elif pred == 2:
+        text = 'Neutral'
+    else:
+        text = 'Positive'
+    return text
+def greet(text):
+    df_new = pd.DataFrame([text])
+    pr = Preprocessor()
+    X_test = pr.transform(df_new[0])
+    log_reg = load('log_reg.model')
+    y_lr  = log_reg.predict(X_test)
+    tree = load('tree.model')
+    y_tree  = tree.predict(X_test)
+    forest = load('forest.model')
+    y_forest  = forest.predict(X_test)
+    r = [y_lr[0], y_tree[0], y_forest[0]]
+    text = gettext(r)
+    return text
+interface = gr.Interface(
+    title = "😄 Twitter Sentiment Analysis 😡 - UMG",
+    description = "<h3>The idea is to classify a text provided by the user according to the emotion contained in that text. "+
+    "The possible outputs are the following: Irrelevant, Negative, Neutral, and Positive. </h3>"+
+    "<b>Models:</b> Logistic Regression, Decision Trees and Random Forest"+
+    "<br><b>Metrics:</b> Accuracy: 0.95, Precision: 0.953, Recall: 0.945, F1 Score: 0.948 <br>  <br><b>Please provide a text example:</b>",
+    article='Step-by-step on GitHub <a href="https://github.com/Adrian8aS/-Twitter-Sentiment-Analysis/blob/4558716d85e18bb18dde25f597f010af13a5deb5/Exam%20JAOS.ipynb"> notebook </a> <br> ~ José Adrián Ochoa Sánchez',
+    allow_flagging = "never",
+    fn = greet,
+    inputs = [
+        gr.Text(label="Write a tweet")],
+    outputs = [
+        gr.Text(label="Sentiment detected")],
+    examples = [
+        ['I mentioned on Facebook that I was struggling for motivation to go for a run the other day, which has been translated by Tom’s great auntie as ‘Hayley can’t get out of bed’ and told to his grandma, who now thinks I’m a lazy, terrible person 🤣'],
+        ['BBC News - Amazon boss Jeff Bezos rejects claims company acted like a drug dealer bbc.co.uk/news/av/busine…'],
+        ['@Microsoft Why do I pay for WORD when it functions so poorly on my @SamsungUS Chromebook? 🙄'],
+        ['FUCKKKKKK I CANT WAIT']
+    ]
+)
+interface.launch(share = True)

forest.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0a2a8ab5baa096bec20274cbe7a491e7b4043722aad5bf493f4d329706fd78a
+size 317318393

log_reg.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06643ef7dd1bc3d5602e5f0ea640e77a3ae3a7c08c159e3f3144110e478c020f
+size 401812

nlp.path ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:541a40e7a0a0592389a37f1598dfe6922458e6f634eb3fbc19c8a590c3d1e724
+size 17042384

stopwords.data ADDED Viewed

Binary file (1.31 kB). View file

tree.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a28fbe3807527ce0f507b5351a33a2e29d76745a36ad9dbb3a936aa7d4eb5741
+size 1922126

vectorizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57852a03c4c7f7675ad62b77e3e9b94edf63d54ac60aea045edd9eeb1cf6a49a
+size 11457423