Spaces:

Adrian8a
/

Test

Runtime error

App Files Files Community

Adrian8a commited on Nov 9, 2022

Commit

d9f3788

•

1 Parent(s): c266022

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -160

app.py DELETED Viewed

@@ -1,160 +0,0 @@
-from statistics import mode
-from joblib import load
-from tqdm import tqdm
-import pandas as pd
-import gradio as gr
-import numpy as np
-import regex as re
-stopwords = load('stopwords.data')
-nlp = load('nlp.path')
-class Preprocessor:
-    def __init__(self, stopwords=stopwords):
-        self.vectorizer = load('vectorizer.model')
-        self.stopwords = stopwords
-        self.vectorizer_fitted = True
-    def remove_urls(self, texts):
-        print('Removing URLs...')
-        pattern = re.compile('(\w+\.com ?/ ?.+)|(http\S+)')
-        return [re.sub(pattern, '', text) for text in texts]
-    def remove_double_space(self, texts):
-        print('Removing double space...')
-        pattern = re.compile(' +')
-        return [re.sub(pattern, ' ', text) for text in texts]
-    def remove_punctuation(self, texts):
-        print('Removing Punctuation...')
-        pattern = re.compile('[^a-z ]')
-        return [re.sub(pattern, ' ', text) for text in texts]
-    def remove_stopwords(self, texts):
-        print('Removing stopwords...')
-        return [[w for w in text.split(' ') if w not in self.stopwords] for text in tqdm(texts)]
-    def remove_numbers(self, texts):
-        print('Removing numbers...')
-        return [' '.join([w for w in text if not w.isdigit()]) for text in tqdm(texts)]
-    def remove_emojis(self, texts):
-        print('Removing emojis...')
-        pattern = re.compile("["
-                u"\U0001F600-\U0001F64F"  # emoticons
-                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
-                u"\U0001F680-\U0001F6FF"  # transport & map symbols
-                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
-                                   "]+", flags=re.UNICODE)
-        return [re.sub(pattern, r'', text) for text in texts]
-    def lemmatize(self, texts):
-        print('Lemmatizing...')
-        lemmatized_texts = []
-        for text in tqdm(texts):
-            doc = nlp(text)
-            lemmatized_texts.append(' '.join([token.lemma_ for token in doc]))
-        return lemmatized_texts
-    def transform(self, X, y=None, mode='train'):
-        X = X.copy()
-        print('Removing Nans...')
-        X = X[~X.isnull()]
-        X = X[~X.duplicated()]
-        if mode == 'train':
-            self.train_idx = X.index
-        else:
-            self.test_idx = X.index
-        print('Counting capitalized...')
-        capitalized = [np.sum([t.isupper() for t in text.split()])
-                           for text in np.array(X.values)]
-        print('Lowering...')
-        X = [text.lower() for text in X]
-        X = self.remove_urls(X)
-        X = self.remove_punctuation(X)
-        X = self.remove_double_space(X)
-        X = self.remove_emojis(X)
-        X = self.remove_stopwords(X)
-        X = self.remove_numbers(X)
-        X = self.lemmatize(X)
-        if not self.vectorizer_fitted:
-            self.vectorizer_fitted = True
-            print('Fitting vectorizer...')
-            self.vectorizer.fit(X)
-        print('Vectorizing...')
-        X = self.vectorizer.transform(X)
-        return X
-def gettext(r):
-    pred  = mode(r)
-    if pred == 0:
-        text = 'Irrelevant'
-    elif pred == 1:
-        text = 'Negative'
-    elif pred == 2:
-        text = 'Neutral'
-    else:
-        text = 'Positive'
-    return text
-def greet(text):
-    df_new = pd.DataFrame([text])
-    pr = Preprocessor()
-    X_test = pr.transform(df_new[0])
-    log_reg = load('log_reg.model')
-    y_lr  = log_reg.predict(X_test)
-    tree = load('tree.model')
-    y_tree  = tree.predict(X_test)
-    forest = load('forest.model')
-    y_forest  = forest.predict(X_test)
-    r = [y_lr[0], y_tree[0], y_forest[0]]
-    text = gettext(r)
-    return text
-interface = gr.Interface(
-    title = "😄 Twitter Sentiment Analysis 😡 - UMG",
-    description = "<h3>The idea is to classify a text provided by the user according to the emotion contained in that text. "+
-    "The possible outputs are the following: Irrelevant, Negative, Neutral, and Positive. </h3>"+
-    "<b>Models:</b> Logistic Regression, Decision Trees and Random Forest"+
-    "<br><b>Metrics:</b> Accuracy: 0.95, Precision: 0.953, Recall: 0.945, F1 Score: 0.948 <br>  <br><b>Please provide a text example:</b>",
-    article='Step-by-step on GitHub <a href="https://github.com/Adrian8aS/-Twitter-Sentiment-Analysis/blob/4558716d85e18bb18dde25f597f010af13a5deb5/Exam%20JAOS.ipynb"> notebook </a> <br> ~ José Adrián Ochoa Sánchez',
-    allow_flagging = "never",
-    fn = greet,
-    inputs = [
-        gr.Text(label="Write a tweet")],
-    outputs = [
-        gr.Text(label="Sentiment detected")],
-    examples = [
-        ['I mentioned on Facebook that I was struggling for motivation to go for a run the other day, which has been translated by Tom’s great auntie as ‘Hayley can’t get out of bed’ and told to his grandma, who now thinks I’m a lazy, terrible person 🤣'],
-        ['BBC News - Amazon boss Jeff Bezos rejects claims company acted like a drug dealer bbc.co.uk/news/av/busine…'],
-        ['@Microsoft Why do I pay for WORD when it functions so poorly on my @SamsungUS Chromebook? 🙄'],
-        ['FUCKKKKKK I CANT WAIT']
-    ]
-)
-interface.launch(share = True)