adejumobi commited on
Commit
88f4baf
1 Parent(s): 36ed68d

Upload 4 files

Browse files
random_forest_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e59ee5cc72522101824ced0660bd5dbe374facfaa5eecd85611e4d2d44f31298
3
+ size 5743385
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ matplotlib==3.6.3
2
+ nltk==3.8.1
3
+ numpy==1.23.5
4
+ pandas==1.5.3
5
+ regex==2023.10.3
6
+ scikit_learn==1.2.1
7
+ scipy==1.11.4
8
+ seaborn==0.13.0
9
+ streamlit==1.18.1
10
+ transformers==4.34.1
reviews_app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from nltk.stem import WordNetLemmatizer
6
+ import streamlit as st
7
+ import pickle
8
+ import pandas as pd
9
+ import numpy as np
10
+ import nltk
11
+ import regex as re
12
+ from nltk.corpus import stopwords
13
+ from nltk.tokenize import word_tokenize
14
+ from sklearn.ensemble import RandomForestClassifier
15
+ import transformers
16
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
17
+ from scipy.special import softmax
18
+ import matplotlib.pyplot as plt
19
+ import seaborn as sns
20
+ import ast
21
+
22
+
23
+ # Load the model
24
+ def load_model():
25
+ with open('random_forest_model.pkl', 'rb') as file:
26
+ loaded_model = pickle.load(file)
27
+ return loaded_model
28
+
29
+ def load_vectorizer():
30
+ with open('tfidf_vectorizer.pkl', 'rb') as file:
31
+ loaded_vectorizer = pickle.load(file)
32
+ return loaded_vectorizer
33
+
34
+
35
+ def ratings(list_of_reviews):
36
+ xidf = []
37
+ stopwords = nltk.corpus.stopwords.words('english')
38
+ lemmatizer = WordNetLemmatizer()
39
+ review = re.sub('[^a-zA-Z]', ' ', list_of_reviews)
40
+ review = review.lower()
41
+ review = review.split()
42
+ review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords)]
43
+ review = ' '.join(review)
44
+ xidf.append(review)
45
+ tf_idf_vectorizer = load_vectorizer()
46
+ # Transform the new review using the loaded vectorizer
47
+ tf_review = tf_idf_vectorizer.transform(xidf)
48
+ model = load_model()
49
+ prediction = model.predict(tf_review)
50
+
51
+ return prediction
52
+
53
+ def sentiment_analysis(texts):
54
+ MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
55
+ task = 'sentiment'
56
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
57
+ config = AutoConfig.from_pretrained(MODEL)
58
+
59
+ # PT
60
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
61
+
62
+ results = []
63
+ for text in texts:
64
+ encoded_input = tokenizer(text, return_tensors='pt', max_length=512, truncation=True)
65
+ output = model(**encoded_input)
66
+ scores = output[0][0].detach().numpy()
67
+ scores = softmax(scores)
68
+ results.append(scores.tolist())
69
+
70
+ return results
71
+
72
+ def get_sentiment_label(row):
73
+ if row['positive_score'] > row['neutral_score'] and row['positive_score'] > row['negative_score']:
74
+ return 'positive'
75
+ elif row['negative_score'] > row['neutral_score'] and row['negative_score'] > row['positive_score']:
76
+ return 'negative'
77
+ else:
78
+ return 'neutral'
79
+
80
+
81
+
82
+ st.set_option('deprecation.showPyplotGlobalUse', False)
83
+
84
+
85
+ # Create two columns
86
+ col1, col2 = st.columns([0.5, 1.2]) # Adjust the ratio as needed
87
+
88
+ # Column 1: Image
89
+ with col1:
90
+ st.image("img2.png", width=200) # Adjust the path and width as needed
91
+
92
+ # Column 2: Text
93
+ with col2:
94
+ st.write("""
95
+ # Ratings Prediction & Reviews Sentiment Analysis App
96
+ """)
97
+ st.write(" This app predicts **the average rating of a product, given a list of reviews and also displays the sentiment of these reviews**!")
98
+ st.write('---')
99
+
100
+
101
+
102
+ sidebar_selection = st.sidebar.radio("Select an option:", ("Ratings Prediction", "Sentiment Analysis"))
103
+
104
+ list_reviews = st.text_input("Enter the list of reviews: ")
105
+ sentiment_review = list_reviews
106
+ ratings_review = list_reviews
107
+ submit_button = st.button("Submit")
108
+
109
+ if sidebar_selection == "Ratings Prediction":
110
+ # Check if the submit button is clicked and the input is not empty
111
+ if submit_button and ratings_review:
112
+ rating_pred = ratings(ratings_review)
113
+ st.write(f"The predicted average rating for a product with the list of reviews above is: {rating_pred}")
114
+ elif submit_button:
115
+ # Display a message if the submit button is clicked but no review is provided
116
+ st.write("Please enter a review to get a prediction.")
117
+
118
+
119
+ elif sidebar_selection == "Sentiment Analysis":
120
+ if submit_button and sentiment_review:
121
+ # Create a DataFrame
122
+ # Split the string into a list of reviews
123
+ review_list = sentiment_review.split(',')
124
+ df = pd.DataFrame(review_list, columns=['Review'])
125
+ scores = sentiment_analysis(df['Review'])
126
+
127
+ df['negative_score'] = [score[0] for score in scores]
128
+ df['neutral_score'] = [score[1] for score in scores]
129
+ df['positive_score'] = [score[2] for score in scores]
130
+
131
+ df['sentiment'] = df.apply(get_sentiment_label, axis=1)
132
+
133
+ # Display the sentiment distribution chart using Streamlit
134
+ st.write("**Sentiment Distribution:**")
135
+ plt.figure(figsize=(8, 6))
136
+ sns.countplot(data=df, x='sentiment', color='blue')
137
+
138
+ # Display values on top of the bars
139
+ for p in plt.gca().patches:
140
+ plt.gca().annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2, p.get_height()), ha='center',
141
+ va='bottom')
142
+
143
+ # Set plot labels and title
144
+ plt.xlabel('Sentiment')
145
+ plt.ylabel('Count')
146
+ plt.title('Sentiment Distribution')
147
+
148
+ st.pyplot(plt)
149
+
150
+
151
+
tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd983952dee953c67f157dfc789b66971fc59c3017923c470b19adb10ca6cfbf
3
+ size 172420