Spaces:
Sleeping
Sleeping
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.pipeline import make_pipeline | |
from sklearn.pipeline import Pipeline | |
import joblib | |
import re | |
import string | |
import nltk | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
import streamlit as st | |
# Preprocess function | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
def preprocess_text(text): | |
# Remove URLs | |
url_pattern = re.compile(r'https?://\S+') | |
text = url_pattern.sub(' ', text) | |
# Remove HTML Tags | |
html_pattern = re.compile(r'<[^<>]+>') | |
text = html_pattern.sub(' ', text) | |
# Remove punctuation and digits | |
text = re.sub(r'[^\w\s]', ' ', text) | |
# Remove emojis | |
emoji_pattern = re.compile("[" | |
u"\U0001F600-\U0001F64F" | |
u"\U0001F300-\U0001F5FF" | |
u"\U0001F680-\U0001F6FF" | |
u"\U0001F1E0-\U0001F1FF" | |
u"\U0001F1F2-\U0001F1F4" | |
u"\U0001F1E6-\U0001F1FF" | |
u"\U0001F600-\U0001F64F" | |
u"\U00002702-\U000027B0" | |
u"\U000024C2-\U0001F251" | |
u"\U0001f926-\U0001f937" | |
u"\U0001F1F2" | |
u"\U0001F1F4" | |
u"\U0001F620" | |
u"\u200d" | |
u"\u2640-\u2642" | |
"]+", flags=re.UNICODE) | |
text = emoji_pattern.sub(' ', text) | |
# Convert to lowercase | |
text = text.lower() | |
# Tokenize and remove stopwords | |
stop_words = set(stopwords.words('english')) | |
tokens = word_tokenize(text) | |
tokens = [token for token in tokens if token not in stop_words] | |
# Join tokens back into text | |
text = ' '.join(tokens) | |
return text | |
# Main function | |
model_NB_path = './model_NB.sav' | |
model_NB = joblib.load(model_NB_path) | |
model_LR_path = './model_LR.sav' | |
model_LR = joblib.load(model_LR_path) | |
def sentiment_analysis_LR(input): | |
# Assuming you have a Logistic Regression model and TfidfVectorizer in the pipeline | |
input = preprocess_text(input) | |
vectorizer = model_LR.named_steps['tfidfvectorizer'] | |
lr_classifier = model_LR.named_steps['logisticregression'] | |
# Transform the user input using the TF-IDF vectorizer | |
user_input_tfidf = vectorizer.transform([input]) | |
# Make predictions | |
user_pred = lr_classifier.predict(user_input_tfidf) | |
# Display the prediction | |
if user_pred[0] == 0: | |
return 0 | |
else: | |
return 1 | |
def sentiment_analysis_NB(input): | |
input = preprocess_text(input) | |
vectorizer = model_NB.named_steps['tfidf'] | |
nb_classifier = model_NB.named_steps['nb'] | |
# Transform the user input using the TF-IDF vectorizer | |
user_input_tfidf = vectorizer.transform([input]) | |
# Make predictions | |
user_pred = nb_classifier.predict(user_input_tfidf) | |
# Display the prediction | |
if user_pred[0] == 0: | |
return 0 | |
else: | |
return 1 | |
text = st.text_area('Enter some text !!! (English text : D )') | |
if text: | |
out = sentiment_analysis_LR(text) | |
if out == 0: | |
st.write('The sentence is negative') | |
else: | |
st.write('The sentence is positive') |