Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- random_forest_model.pkl +3 -0
- requirements.txt +10 -0
- reviews_app.py +151 -0
- tfidf_vectorizer.pkl +3 -0
random_forest_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e59ee5cc72522101824ced0660bd5dbe374facfaa5eecd85611e4d2d44f31298
|
3 |
+
size 5743385
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
matplotlib==3.6.3
|
2 |
+
nltk==3.8.1
|
3 |
+
numpy==1.23.5
|
4 |
+
pandas==1.5.3
|
5 |
+
regex==2023.10.3
|
6 |
+
scikit_learn==1.2.1
|
7 |
+
scipy==1.11.4
|
8 |
+
seaborn==0.13.0
|
9 |
+
streamlit==1.18.1
|
10 |
+
transformers==4.34.1
|
reviews_app.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from nltk.stem import WordNetLemmatizer
|
6 |
+
import streamlit as st
|
7 |
+
import pickle
|
8 |
+
import pandas as pd
|
9 |
+
import numpy as np
|
10 |
+
import nltk
|
11 |
+
import regex as re
|
12 |
+
from nltk.corpus import stopwords
|
13 |
+
from nltk.tokenize import word_tokenize
|
14 |
+
from sklearn.ensemble import RandomForestClassifier
|
15 |
+
import transformers
|
16 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
|
17 |
+
from scipy.special import softmax
|
18 |
+
import matplotlib.pyplot as plt
|
19 |
+
import seaborn as sns
|
20 |
+
import ast
|
21 |
+
|
22 |
+
|
23 |
+
# Load the model
|
24 |
+
def load_model():
|
25 |
+
with open('random_forest_model.pkl', 'rb') as file:
|
26 |
+
loaded_model = pickle.load(file)
|
27 |
+
return loaded_model
|
28 |
+
|
29 |
+
def load_vectorizer():
|
30 |
+
with open('tfidf_vectorizer.pkl', 'rb') as file:
|
31 |
+
loaded_vectorizer = pickle.load(file)
|
32 |
+
return loaded_vectorizer
|
33 |
+
|
34 |
+
|
35 |
+
def ratings(list_of_reviews):
|
36 |
+
xidf = []
|
37 |
+
stopwords = nltk.corpus.stopwords.words('english')
|
38 |
+
lemmatizer = WordNetLemmatizer()
|
39 |
+
review = re.sub('[^a-zA-Z]', ' ', list_of_reviews)
|
40 |
+
review = review.lower()
|
41 |
+
review = review.split()
|
42 |
+
review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords)]
|
43 |
+
review = ' '.join(review)
|
44 |
+
xidf.append(review)
|
45 |
+
tf_idf_vectorizer = load_vectorizer()
|
46 |
+
# Transform the new review using the loaded vectorizer
|
47 |
+
tf_review = tf_idf_vectorizer.transform(xidf)
|
48 |
+
model = load_model()
|
49 |
+
prediction = model.predict(tf_review)
|
50 |
+
|
51 |
+
return prediction
|
52 |
+
|
53 |
+
def sentiment_analysis(texts):
|
54 |
+
MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
|
55 |
+
task = 'sentiment'
|
56 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
57 |
+
config = AutoConfig.from_pretrained(MODEL)
|
58 |
+
|
59 |
+
# PT
|
60 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
61 |
+
|
62 |
+
results = []
|
63 |
+
for text in texts:
|
64 |
+
encoded_input = tokenizer(text, return_tensors='pt', max_length=512, truncation=True)
|
65 |
+
output = model(**encoded_input)
|
66 |
+
scores = output[0][0].detach().numpy()
|
67 |
+
scores = softmax(scores)
|
68 |
+
results.append(scores.tolist())
|
69 |
+
|
70 |
+
return results
|
71 |
+
|
72 |
+
def get_sentiment_label(row):
|
73 |
+
if row['positive_score'] > row['neutral_score'] and row['positive_score'] > row['negative_score']:
|
74 |
+
return 'positive'
|
75 |
+
elif row['negative_score'] > row['neutral_score'] and row['negative_score'] > row['positive_score']:
|
76 |
+
return 'negative'
|
77 |
+
else:
|
78 |
+
return 'neutral'
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
83 |
+
|
84 |
+
|
85 |
+
# Create two columns
|
86 |
+
col1, col2 = st.columns([0.5, 1.2]) # Adjust the ratio as needed
|
87 |
+
|
88 |
+
# Column 1: Image
|
89 |
+
with col1:
|
90 |
+
st.image("img2.png", width=200) # Adjust the path and width as needed
|
91 |
+
|
92 |
+
# Column 2: Text
|
93 |
+
with col2:
|
94 |
+
st.write("""
|
95 |
+
# Ratings Prediction & Reviews Sentiment Analysis App
|
96 |
+
""")
|
97 |
+
st.write(" This app predicts **the average rating of a product, given a list of reviews and also displays the sentiment of these reviews**!")
|
98 |
+
st.write('---')
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
+
sidebar_selection = st.sidebar.radio("Select an option:", ("Ratings Prediction", "Sentiment Analysis"))
|
103 |
+
|
104 |
+
list_reviews = st.text_input("Enter the list of reviews: ")
|
105 |
+
sentiment_review = list_reviews
|
106 |
+
ratings_review = list_reviews
|
107 |
+
submit_button = st.button("Submit")
|
108 |
+
|
109 |
+
if sidebar_selection == "Ratings Prediction":
|
110 |
+
# Check if the submit button is clicked and the input is not empty
|
111 |
+
if submit_button and ratings_review:
|
112 |
+
rating_pred = ratings(ratings_review)
|
113 |
+
st.write(f"The predicted average rating for a product with the list of reviews above is: {rating_pred}")
|
114 |
+
elif submit_button:
|
115 |
+
# Display a message if the submit button is clicked but no review is provided
|
116 |
+
st.write("Please enter a review to get a prediction.")
|
117 |
+
|
118 |
+
|
119 |
+
elif sidebar_selection == "Sentiment Analysis":
|
120 |
+
if submit_button and sentiment_review:
|
121 |
+
# Create a DataFrame
|
122 |
+
# Split the string into a list of reviews
|
123 |
+
review_list = sentiment_review.split(',')
|
124 |
+
df = pd.DataFrame(review_list, columns=['Review'])
|
125 |
+
scores = sentiment_analysis(df['Review'])
|
126 |
+
|
127 |
+
df['negative_score'] = [score[0] for score in scores]
|
128 |
+
df['neutral_score'] = [score[1] for score in scores]
|
129 |
+
df['positive_score'] = [score[2] for score in scores]
|
130 |
+
|
131 |
+
df['sentiment'] = df.apply(get_sentiment_label, axis=1)
|
132 |
+
|
133 |
+
# Display the sentiment distribution chart using Streamlit
|
134 |
+
st.write("**Sentiment Distribution:**")
|
135 |
+
plt.figure(figsize=(8, 6))
|
136 |
+
sns.countplot(data=df, x='sentiment', color='blue')
|
137 |
+
|
138 |
+
# Display values on top of the bars
|
139 |
+
for p in plt.gca().patches:
|
140 |
+
plt.gca().annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2, p.get_height()), ha='center',
|
141 |
+
va='bottom')
|
142 |
+
|
143 |
+
# Set plot labels and title
|
144 |
+
plt.xlabel('Sentiment')
|
145 |
+
plt.ylabel('Count')
|
146 |
+
plt.title('Sentiment Distribution')
|
147 |
+
|
148 |
+
st.pyplot(plt)
|
149 |
+
|
150 |
+
|
151 |
+
|
tfidf_vectorizer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd983952dee953c67f157dfc789b66971fc59c3017923c470b19adb10ca6cfbf
|
3 |
+
size 172420
|