Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""app.py | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0 | |
""" | |
# %pip install gradio transformers -q | |
# %pip install nltk | |
# Import the key libraries | |
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from scipy.special import softmax | |
import nltk | |
import re | |
from nltk.corpus import stopwords | |
from nltk.stem import WordNetLemmatizer | |
# Download NLTK resources (if not already downloaded) | |
nltk.download('stopwords') | |
nltk.download('wordnet') | |
# Load the tokenizer and model from Hugging Face | |
model_path = "kobbyeduah/NLP_Sentiment_Analysis" | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
# Preprocess text (username and link placeholders, and text preprocessing) | |
def preprocess(text): | |
# Convert text to lowercase | |
text = text.lower() | |
# Remove special characters, numbers, and extra whitespaces | |
text = re.sub(r'[^a-zA-Z\s]', '', text) | |
# Remove stopwords (common words that don't carry much meaning) | |
stop_words = set(stopwords.words('english')) | |
words = text.split() # Split text into words | |
words = [word for word in words if word not in stop_words] | |
# Lemmatize words to their base form | |
lemmatizer = WordNetLemmatizer() | |
words = [lemmatizer.lemmatize(word) for word in words] | |
# Rejoin the preprocessed words into a single string | |
processed_text = ' '.join(words) | |
# Process placeholders | |
new_text = [] | |
for t in processed_text.split(" "): | |
t = '@user' if t.startswith('@') and len(t) > 1 else t | |
t = 'http' if t.startswith('http') else t | |
new_text.append(t) | |
return " ".join(new_text) | |
# Perform sentiment analysis | |
def sentiment_analysis(text): | |
text = preprocess(text) | |
# Tokenize input text | |
inputs = tokenizer(text, return_tensors='pt') | |
# Forward pass through the model | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Get predicted probabilities | |
scores_ = outputs.logits[0].detach().numpy() | |
scores_ = softmax(scores_) | |
# Define labels and corresponding colors | |
labels = ['Negative', 'Neutral', 'Positive'] | |
colors = ['red', 'yellow', 'green'] | |
font_colors = ['white', 'black', 'white'] | |
# Find the label with the highest percentage | |
max_label = labels[scores_.argmax()] | |
max_percentage = scores_.max() * 100 | |
# Create HTML for the label with the specified style | |
label_html = f'<div style="display: flex; justify-content: center;"><button style="text-align: center; font-size: 16px; padding: 10px; border-radius: 15px; background-color: {colors[labels.index(max_label)]}; color: {font_colors[labels.index(max_label)]};">{max_label}({max_percentage:.2f}%)</button></div>' | |
return label_html | |
# Create a Gradio interface | |
interface = gr.Interface( | |
fn=sentiment_analysis, | |
inputs=gr.Textbox(placeholder="Write your tweet here..."), | |
outputs=gr.HTML(), | |
title="COVID-19 Sentiment Analysis App", | |
description="This App Analyzes the sentiment of COVID-19 related tweets. Negative: Indicates a negative sentiment, Neutral: Indicates a neutral sentiment, Positive: Indicates a positive sentiment.", | |
theme="default", | |
layout="horizontal", | |
examples=[ | |
["This vaccine is terrible!"], | |
["I don't have a strong opinion about this vaccines."], | |
["The Vaccine is Good I have had no issues!"] | |
] | |
) | |
# Launch the Gradio app | |
interface.launch() | |