# utilities import re import pickle import numpy as np import pandas as pd # plotting import seaborn as sns from wordcloud import WordCloud import matplotlib.pyplot as plt # nltk from nltk.stem import WordNetLemmatizer # sklearn from sklearn.svm import LinearSVC from sklearn.naive_bayes import BernoulliNB from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics import confusion_matrix, classification_report from datasets import load_dataset dataset = load_dataset('Twitter_Emoticon_Analysis_NLP/training.1600000.processed.noemoticon.csv') DATASET_COLUMNS = ["sentiment", "ids", "date", "flag", "user", "text"] DATASET_ENCODING = "ISO-8859-1" dataset = pd.read_csv('training.1600000.processed.noemoticon.csv', encoding=DATASET_ENCODING , names=DATASET_COLUMNS) # Removing the unnecessary columns. dataset = dataset[['sentiment','text']] # Replacing the values to ease understanding. dataset['sentiment'] = dataset['sentiment'].replace(4,1) # Storing data in lists. text, sentiment = list(dataset['text']), list(dataset['sentiment']) def preprocess(textdata): processedText = [] # Create Lemmatizer and Stemmer. wordLemm = WordNetLemmatizer() # Defining regex patterns. urlPattern = r"((http://)[^ ]*|(https://)[^ ]*|( www\.)[^ ]*)" userPattern = '@[^\s]+' alphaPattern = "[^a-zA-Z0-9]" sequencePattern = r"(.)\1\1+" seqReplacePattern = r"\1\1" for tweet in textdata: tweet = tweet.lower() # Replace all URls with 'URL' tweet = re.sub(urlPattern,' URL',tweet) # Replace all emojis. for emoji in emojis.keys(): tweet = tweet.replace(emoji, "EMOJI" + emojis[emoji]) # Replace @USERNAME to 'USER'. tweet = re.sub(userPattern,' USER', tweet) # Replace all non alphabets. tweet = re.sub(alphaPattern, " ", tweet) # Replace 3 or more consecutive letters by 2 letter. tweet = re.sub(sequencePattern, seqReplacePattern, tweet) tweetwords = '' for word in tweet.split(): # Checking if the word is a stopword. #if word not in stopwordlist: if len(word)>1: # Lemmatizing the word. word = wordLemm.lemmatize(word) tweetwords += (word+' ') processedText.append(tweetwords) return processedText def preprocess(textdata): processedText = [] # Create Lemmatizer and Stemmer. wordLemm = WordNetLemmatizer() # Defining regex patterns. urlPattern = r"((http://)[^ ]*|(https://)[^ ]*|( www\.)[^ ]*)" userPattern = '@[^\s]+' alphaPattern = "[^a-zA-Z0-9]" sequencePattern = r"(.)\1\1+" seqReplacePattern = r"\1\1" for tweet in textdata: tweet = tweet.lower() # Replace all URls with 'URL' tweet = re.sub(urlPattern,' URL',tweet) # Replace all emojis. for emoji in emojis.keys(): tweet = tweet.replace(emoji, "EMOJI" + emojis[emoji]) # Replace @USERNAME to 'USER'. tweet = re.sub(userPattern,' USER', tweet) # Replace all non alphabets. tweet = re.sub(alphaPattern, " ", tweet) # Replace 3 or more consecutive letters by 2 letter. tweet = re.sub(sequencePattern, seqReplacePattern, tweet) tweetwords = '' for word in tweet.split(): # Checking if the word is a stopword. #if word not in stopwordlist: if len(word)>1: # Lemmatizing the word. word = wordLemm.lemmatize(word) tweetwords += (word+' ') processedText.append(tweetwords) return processedText import gradio as gr import nltk from nltk.sentiment.vader import SentimentIntensityAnalyzer # Download required NLTK resources nltk.download('vader_lexicon') # Load the pre-trained sentiment intensity analyzer sia = SentimentIntensityAnalyzer() def get_sentiment(tweet): # Preprocess the tweet processed_tweet = preprocess([tweet]) # Get the sentiment score using VADER sentiment analyzer sentiment_score = sia.polarity_scores(processed_tweet[0]) # Determine the sentiment label based on the compound score compound_score = sentiment_score['compound'] if compound_score >= 0.05: sentiment = 'Positive' elif compound_score <= -0.05: sentiment = 'Negative' else: sentiment = 'Neutral' return sentiment # Create a Gradio interface iface = gr.Interface( fn=get_sentiment, inputs='text', outputs='text', title='Tweet Sentiment Analyzer', description='Enter a tweet with text or emoticon or both, and get the sentiment prediction.', examples=[['I love this movie!', 'This weather is terrible.']], theme='Soft' ) # Launch the interface iface.launch(share = True)