Spaces:
Sleeping
Sleeping
File size: 5,576 Bytes
998c037 81df020 998c037 81df020 998c037 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
#!/usr/bin/env python
# coding: utf-8
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem import WordNetLemmatizer
import streamlit as st
import pickle
import pandas as pd
import numpy as np
import nltk
import regex as re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.ensemble import RandomForestClassifier
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from scipy.special import softmax
import matplotlib.pyplot as plt
import seaborn as sns
import ast
nltk.download('stopwords')
nltk.download('wordnet')
# Load the model
def load_model():
with open('random_forest_model.pkl', 'rb') as file:
loaded_model = pickle.load(file)
return loaded_model
def load_vectorizer():
with open('tfidf_vectorizer.pkl', 'rb') as file:
loaded_vectorizer = pickle.load(file)
return loaded_vectorizer
def ratings(list_of_reviews):
xidf = []
stopwords = nltk.corpus.stopwords.words('english')
lemmatizer = WordNetLemmatizer()
review = re.sub('[^a-zA-Z]', ' ', list_of_reviews)
review = review.lower()
review = review.split()
review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords)]
review = ' '.join(review)
xidf.append(review)
tf_idf_vectorizer = load_vectorizer()
# Transform the new review using the loaded vectorizer
tf_review = tf_idf_vectorizer.transform(xidf)
model = load_model()
prediction = model.predict(tf_review)
return prediction
def sentiment_analysis(texts):
MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
task = 'sentiment'
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
# PT
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
results = []
for text in texts:
encoded_input = tokenizer(text, return_tensors='pt', max_length=512, truncation=True)
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
results.append(scores.tolist())
return results
def get_sentiment_label(row):
if row['positive_score'] > row['neutral_score'] and row['positive_score'] > row['negative_score']:
return 'positive'
elif row['negative_score'] > row['neutral_score'] and row['negative_score'] > row['positive_score']:
return 'negative'
else:
return 'neutral'
st.set_option('deprecation.showPyplotGlobalUse', False)
# Create two columns
col1, col2 = st.columns([0.5, 1.2]) # Adjust the ratio as needed
# Column 1: Image
with col1:
st.image("img2.png", width=200) # Adjust the path and width as needed
# Column 2: Text
with col2:
st.write("""
# Ratings Prediction & Reviews Sentiment Analysis App
""")
st.write(" This app predicts **the average rating of a product, given a list of reviews and also displays the sentiment of these reviews**!")
st.write('---')
sidebar_selection = st.sidebar.radio("Select an option:", ("Ratings Prediction", "Sentiment Analysis"))
list_reviews = st.text_input("Enter the list of reviews: ")
sentiment_review = list_reviews
ratings_review = list_reviews
submit_button = st.button("Submit")
if sidebar_selection == "Ratings Prediction":
# Check if the submit button is clicked and the input is not empty
if submit_button and ratings_review:
rating_pred = ratings(ratings_review)
def get_rating_category(rating):
if rating < 2.0:
return "between 1 and 2 which is Very Low"
elif rating < 3.0:
return "between 2 and 3 which is Low"
elif rating < 4.0:
return "between 3 and 4 which is Medium"
elif rating < 5.0:
return "between 4 and 5 which is High"
else:
return "5 which is Very High"
# Determine the rating category
rating_category = get_rating_category(rating_pred)
# Display the result
st.write(f"Based on the list of reviews provided, your average rating falls {rating_category}.")
elif submit_button:
# Display a message if the submit button is clicked but no review is provided
st.write("Please enter a review to get a prediction.")
elif sidebar_selection == "Sentiment Analysis":
if submit_button and sentiment_review:
# Create a DataFrame
# Split the string into a list of reviews
review_list = sentiment_review.split(',')
df = pd.DataFrame(review_list, columns=['Review'])
scores = sentiment_analysis(df['Review'])
df['negative_score'] = [score[0] for score in scores]
df['neutral_score'] = [score[1] for score in scores]
df['positive_score'] = [score[2] for score in scores]
df['sentiment'] = df.apply(get_sentiment_label, axis=1)
# Display the sentiment distribution chart using Streamlit
st.write("**Sentiment Distribution:**")
plt.figure(figsize=(8, 6))
sns.countplot(data=df, x='sentiment', color='blue')
# Display values on top of the bars
for p in plt.gca().patches:
plt.gca().annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2, p.get_height()), ha='center',
va='bottom')
# Set plot labels and title
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.title('Sentiment Distribution')
st.pyplot(plt)
|