vibha-mah's picture
Update app.py
64f1623
raw
history blame contribute delete
No virus
5.14 kB
# utilities
import re
import pickle
import numpy as np
import pandas as pd
# plotting
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# nltk
from nltk.stem import WordNetLemmatizer
# sklearn
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, classification_report
from datasets import load_dataset
dataset = load_dataset('Twitter_Emoticon_Analysis_NLP/training.1600000.processed.noemoticon.csv')
DATASET_COLUMNS = ["sentiment", "ids", "date", "flag", "user", "text"]
DATASET_ENCODING = "ISO-8859-1"
dataset = pd.read_csv('training.1600000.processed.noemoticon.csv',
encoding=DATASET_ENCODING , names=DATASET_COLUMNS)
# Removing the unnecessary columns.
dataset = dataset[['sentiment','text']]
# Replacing the values to ease understanding.
dataset['sentiment'] = dataset['sentiment'].replace(4,1)
# Storing data in lists.
text, sentiment = list(dataset['text']), list(dataset['sentiment'])
def preprocess(textdata):
processedText = []
# Create Lemmatizer and Stemmer.
wordLemm = WordNetLemmatizer()
# Defining regex patterns.
urlPattern = r"((http://)[^ ]*|(https://)[^ ]*|( www\.)[^ ]*)"
userPattern = '@[^\s]+'
alphaPattern = "[^a-zA-Z0-9]"
sequencePattern = r"(.)\1\1+"
seqReplacePattern = r"\1\1"
for tweet in textdata:
tweet = tweet.lower()
# Replace all URls with 'URL'
tweet = re.sub(urlPattern,' URL',tweet)
# Replace all emojis.
for emoji in emojis.keys():
tweet = tweet.replace(emoji, "EMOJI" + emojis[emoji])
# Replace @USERNAME to 'USER'.
tweet = re.sub(userPattern,' USER', tweet)
# Replace all non alphabets.
tweet = re.sub(alphaPattern, " ", tweet)
# Replace 3 or more consecutive letters by 2 letter.
tweet = re.sub(sequencePattern, seqReplacePattern, tweet)
tweetwords = ''
for word in tweet.split():
# Checking if the word is a stopword.
#if word not in stopwordlist:
if len(word)>1:
# Lemmatizing the word.
word = wordLemm.lemmatize(word)
tweetwords += (word+' ')
processedText.append(tweetwords)
return processedText
def preprocess(textdata):
processedText = []
# Create Lemmatizer and Stemmer.
wordLemm = WordNetLemmatizer()
# Defining regex patterns.
urlPattern = r"((http://)[^ ]*|(https://)[^ ]*|( www\.)[^ ]*)"
userPattern = '@[^\s]+'
alphaPattern = "[^a-zA-Z0-9]"
sequencePattern = r"(.)\1\1+"
seqReplacePattern = r"\1\1"
for tweet in textdata:
tweet = tweet.lower()
# Replace all URls with 'URL'
tweet = re.sub(urlPattern,' URL',tweet)
# Replace all emojis.
for emoji in emojis.keys():
tweet = tweet.replace(emoji, "EMOJI" + emojis[emoji])
# Replace @USERNAME to 'USER'.
tweet = re.sub(userPattern,' USER', tweet)
# Replace all non alphabets.
tweet = re.sub(alphaPattern, " ", tweet)
# Replace 3 or more consecutive letters by 2 letter.
tweet = re.sub(sequencePattern, seqReplacePattern, tweet)
tweetwords = ''
for word in tweet.split():
# Checking if the word is a stopword.
#if word not in stopwordlist:
if len(word)>1:
# Lemmatizing the word.
word = wordLemm.lemmatize(word)
tweetwords += (word+' ')
processedText.append(tweetwords)
return processedText
import gradio as gr
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Download required NLTK resources
nltk.download('vader_lexicon')
# Load the pre-trained sentiment intensity analyzer
sia = SentimentIntensityAnalyzer()
def get_sentiment(tweet):
# Preprocess the tweet
processed_tweet = preprocess([tweet])
# Get the sentiment score using VADER sentiment analyzer
sentiment_score = sia.polarity_scores(processed_tweet[0])
# Determine the sentiment label based on the compound score
compound_score = sentiment_score['compound']
if compound_score >= 0.05:
sentiment = 'Positive'
elif compound_score <= -0.05:
sentiment = 'Negative'
else:
sentiment = 'Neutral'
return sentiment
# Create a Gradio interface
iface = gr.Interface(
fn=get_sentiment,
inputs='text',
outputs='text',
title='Tweet Sentiment Analyzer',
description='Enter a tweet with text or emoticon or both, and get the sentiment prediction.',
examples=[['I love this movie!', 'This weather is terrible.']],
theme='Soft'
)
# Launch the interface
iface.launch(share = True)