Spaces:

adejumobi
/

ratingsapp

Sleeping

App Files Files Community

adejumobi commited on Dec 14, 2023

Commit

88f4baf

•

1 Parent(s): 36ed68d

Upload 4 files

Browse files

Files changed (4) hide show

random_forest_model.pkl +3 -0
requirements.txt +10 -0
reviews_app.py +151 -0
tfidf_vectorizer.pkl +3 -0

random_forest_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e59ee5cc72522101824ced0660bd5dbe374facfaa5eecd85611e4d2d44f31298
+size 5743385

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+matplotlib==3.6.3
+nltk==3.8.1
+numpy==1.23.5
+pandas==1.5.3
+regex==2023.10.3
+scikit_learn==1.2.1
+scipy==1.11.4
+seaborn==0.13.0
+streamlit==1.18.1
+transformers==4.34.1

reviews_app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+#!/usr/bin/env python
+# coding: utf-8
+from sklearn.feature_extraction.text import TfidfVectorizer
+from nltk.stem import WordNetLemmatizer
+import streamlit as st
+import pickle
+import pandas as pd
+import numpy as np
+import nltk
+import regex as re
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from sklearn.ensemble import RandomForestClassifier
+import transformers
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
+from scipy.special import softmax
+import matplotlib.pyplot as plt
+import seaborn as sns
+import ast
+# Load the model
+def load_model():
+    with open('random_forest_model.pkl', 'rb') as file:
+        loaded_model = pickle.load(file)
+    return loaded_model
+def load_vectorizer():
+    with open('tfidf_vectorizer.pkl', 'rb') as file:
+        loaded_vectorizer = pickle.load(file)
+    return loaded_vectorizer
+def ratings(list_of_reviews):
+    xidf = []
+    stopwords = nltk.corpus.stopwords.words('english')
+    lemmatizer = WordNetLemmatizer()
+    review = re.sub('[^a-zA-Z]', ' ', list_of_reviews)
+    review = review.lower()
+    review = review.split()
+    review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords)]
+    review = ' '.join(review)
+    xidf.append(review)
+    tf_idf_vectorizer = load_vectorizer()
+    # Transform the new review using the loaded vectorizer
+    tf_review = tf_idf_vectorizer.transform(xidf)
+    model = load_model()
+    prediction = model.predict(tf_review)
+    return prediction
+def sentiment_analysis(texts):
+    MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
+    task = 'sentiment'
+    tokenizer = AutoTokenizer.from_pretrained(MODEL)
+    config = AutoConfig.from_pretrained(MODEL)
+    # PT
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+    results = []
+    for text in texts:
+        encoded_input = tokenizer(text, return_tensors='pt', max_length=512, truncation=True)
+        output = model(**encoded_input)
+        scores = output[0][0].detach().numpy()
+        scores = softmax(scores)
+        results.append(scores.tolist())
+    return results
+def get_sentiment_label(row):
+    if row['positive_score'] > row['neutral_score'] and row['positive_score'] > row['negative_score']:
+        return 'positive'
+    elif row['negative_score'] > row['neutral_score'] and row['negative_score'] > row['positive_score']:
+        return 'negative'
+    else:
+        return 'neutral'
+st.set_option('deprecation.showPyplotGlobalUse', False)
+# Create two columns
+col1, col2 = st.columns([0.5, 1.2])  # Adjust the ratio as needed
+# Column 1: Image
+with col1:
+    st.image("img2.png", width=200)  # Adjust the path and width as needed
+# Column 2: Text
+with col2:
+    st.write("""
+    # Ratings Prediction & Reviews Sentiment Analysis App
+    """)
+st.write(" This app predicts **the average rating of a product, given a list of reviews and also displays the sentiment of these reviews**!")
+st.write('---')
+sidebar_selection = st.sidebar.radio("Select an option:", ("Ratings Prediction", "Sentiment Analysis"))
+list_reviews = st.text_input("Enter the list of reviews: ")
+sentiment_review = list_reviews
+ratings_review = list_reviews
+submit_button = st.button("Submit")
+if sidebar_selection == "Ratings Prediction":
+    # Check if the submit button is clicked and the input is not empty
+    if submit_button and ratings_review:
+        rating_pred = ratings(ratings_review)
+        st.write(f"The predicted average rating for a product with the list of reviews above is: {rating_pred}")
+    elif submit_button:
+        # Display a message if the submit button is clicked but no review is provided
+        st.write("Please enter a review to get a prediction.")
+elif sidebar_selection == "Sentiment Analysis":
+    if submit_button and sentiment_review:
+        # Create a DataFrame
+        # Split the string into a list of reviews
+        review_list = sentiment_review.split(',')
+        df = pd.DataFrame(review_list,  columns=['Review'])
+        scores = sentiment_analysis(df['Review'])
+        df['negative_score'] = [score[0] for score in scores]
+        df['neutral_score'] = [score[1] for score in scores]
+        df['positive_score'] = [score[2] for score in scores]
+        df['sentiment'] = df.apply(get_sentiment_label, axis=1)
+        # Display the sentiment distribution chart using Streamlit
+        st.write("**Sentiment Distribution:**")
+        plt.figure(figsize=(8, 6))
+        sns.countplot(data=df, x='sentiment', color='blue')
+        # Display values on top of the bars
+        for p in plt.gca().patches:
+            plt.gca().annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2, p.get_height()), ha='center',
+                           va='bottom')
+        # Set plot labels and title
+        plt.xlabel('Sentiment')
+        plt.ylabel('Count')
+        plt.title('Sentiment Distribution')
+        st.pyplot(plt)

tfidf_vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd983952dee953c67f157dfc789b66971fc59c3017923c470b19adb10ca6cfbf
+size 172420