perman2011's picture
Update app.py
c4dc8e0
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
import joblib
import re
import string
import nltk
nltk.download('stopwords')
nltk.download('punkt')
import streamlit as st
# Preprocess function
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
def preprocess_text(text):
# Remove URLs
url_pattern = re.compile(r'https?://\S+')
text = url_pattern.sub(' ', text)
# Remove HTML Tags
html_pattern = re.compile(r'<[^<>]+>')
text = html_pattern.sub(' ', text)
# Remove punctuation and digits
text = re.sub(r'[^\w\s]', ' ', text)
# Remove emojis
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F"
u"\U0001F300-\U0001F5FF"
u"\U0001F680-\U0001F6FF"
u"\U0001F1E0-\U0001F1FF"
u"\U0001F1F2-\U0001F1F4"
u"\U0001F1E6-\U0001F1FF"
u"\U0001F600-\U0001F64F"
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
u"\U0001f926-\U0001f937"
u"\U0001F1F2"
u"\U0001F1F4"
u"\U0001F620"
u"\u200d"
u"\u2640-\u2642"
"]+", flags=re.UNICODE)
text = emoji_pattern.sub(' ', text)
# Convert to lowercase
text = text.lower()
# Tokenize and remove stopwords
stop_words = set(stopwords.words('english'))
tokens = word_tokenize(text)
tokens = [token for token in tokens if token not in stop_words]
# Join tokens back into text
text = ' '.join(tokens)
return text
# Main function
model_NB_path = './model_NB.sav'
model_NB = joblib.load(model_NB_path)
model_LR_path = './model_LR.sav'
model_LR = joblib.load(model_LR_path)
def sentiment_analysis_LR(input):
# Assuming you have a Logistic Regression model and TfidfVectorizer in the pipeline
input = preprocess_text(input)
vectorizer = model_LR.named_steps['tfidfvectorizer']
lr_classifier = model_LR.named_steps['logisticregression']
# Transform the user input using the TF-IDF vectorizer
user_input_tfidf = vectorizer.transform([input])
# Make predictions
user_pred = lr_classifier.predict(user_input_tfidf)
# Display the prediction
if user_pred[0] == 0:
return 0
else:
return 1
def sentiment_analysis_NB(input):
input = preprocess_text(input)
vectorizer = model_NB.named_steps['tfidf']
nb_classifier = model_NB.named_steps['nb']
# Transform the user input using the TF-IDF vectorizer
user_input_tfidf = vectorizer.transform([input])
# Make predictions
user_pred = nb_classifier.predict(user_input_tfidf)
# Display the prediction
if user_pred[0] == 0:
return 0
else:
return 1
text = st.text_area('Enter some text !!! (English text : D )')
if text:
out = sentiment_analysis_LR(text)
if out == 0:
st.write('The sentence is negative')
else:
st.write('The sentence is positive')