File size: 4,950 Bytes
88f4baf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python
# coding: utf-8

from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem import WordNetLemmatizer
import streamlit as st
import pickle
import pandas as pd
import numpy as np
import nltk
import regex as re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.ensemble import RandomForestClassifier
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from scipy.special import softmax
import matplotlib.pyplot as plt
import seaborn as sns
import ast


# Load the model
def load_model():
    with open('random_forest_model.pkl', 'rb') as file:
        loaded_model = pickle.load(file)
    return loaded_model

def load_vectorizer():
    with open('tfidf_vectorizer.pkl', 'rb') as file:
        loaded_vectorizer = pickle.load(file)
    return loaded_vectorizer


def ratings(list_of_reviews):
    xidf = []
    stopwords = nltk.corpus.stopwords.words('english')
    lemmatizer = WordNetLemmatizer()
    review = re.sub('[^a-zA-Z]', ' ', list_of_reviews)
    review = review.lower()
    review = review.split()
    review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords)]
    review = ' '.join(review)
    xidf.append(review)
    tf_idf_vectorizer = load_vectorizer()
    # Transform the new review using the loaded vectorizer
    tf_review = tf_idf_vectorizer.transform(xidf)
    model = load_model()
    prediction = model.predict(tf_review)
    
    return prediction

def sentiment_analysis(texts):
    MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
    task = 'sentiment'
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    config = AutoConfig.from_pretrained(MODEL)

    # PT
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)

    results = []
    for text in texts:
        encoded_input = tokenizer(text, return_tensors='pt', max_length=512, truncation=True)
        output = model(**encoded_input)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)
        results.append(scores.tolist())

    return results

def get_sentiment_label(row):
    if row['positive_score'] > row['neutral_score'] and row['positive_score'] > row['negative_score']:
        return 'positive'
    elif row['negative_score'] > row['neutral_score'] and row['negative_score'] > row['positive_score']:
        return 'negative'
    else:
        return 'neutral'



st.set_option('deprecation.showPyplotGlobalUse', False)


# Create two columns
col1, col2 = st.columns([0.5, 1.2])  # Adjust the ratio as needed

# Column 1: Image
with col1:
    st.image("img2.png", width=200)  # Adjust the path and width as needed

# Column 2: Text
with col2:
    st.write("""
    # Ratings Prediction & Reviews Sentiment Analysis App 
    """)
st.write(" This app predicts **the average rating of a product, given a list of reviews and also displays the sentiment of these reviews**!")
st.write('---')



sidebar_selection = st.sidebar.radio("Select an option:", ("Ratings Prediction", "Sentiment Analysis"))

list_reviews = st.text_input("Enter the list of reviews: ")
sentiment_review = list_reviews
ratings_review = list_reviews
submit_button = st.button("Submit")

if sidebar_selection == "Ratings Prediction":
    # Check if the submit button is clicked and the input is not empty
    if submit_button and ratings_review:
        rating_pred = ratings(ratings_review)
        st.write(f"The predicted average rating for a product with the list of reviews above is: {rating_pred}")
    elif submit_button:
        # Display a message if the submit button is clicked but no review is provided
        st.write("Please enter a review to get a prediction.")

    
elif sidebar_selection == "Sentiment Analysis":
    if submit_button and sentiment_review:
        # Create a DataFrame
        # Split the string into a list of reviews
        review_list = sentiment_review.split(',')
        df = pd.DataFrame(review_list,  columns=['Review'])
        scores = sentiment_analysis(df['Review'])

        df['negative_score'] = [score[0] for score in scores]
        df['neutral_score'] = [score[1] for score in scores]
        df['positive_score'] = [score[2] for score in scores]

        df['sentiment'] = df.apply(get_sentiment_label, axis=1)
        
        # Display the sentiment distribution chart using Streamlit
        st.write("**Sentiment Distribution:**")
        plt.figure(figsize=(8, 6))
        sns.countplot(data=df, x='sentiment', color='blue')

        # Display values on top of the bars
        for p in plt.gca().patches:
            plt.gca().annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2, p.get_height()), ha='center',
                           va='bottom')

        # Set plot labels and title
        plt.xlabel('Sentiment')
        plt.ylabel('Count')
        plt.title('Sentiment Distribution')

        st.pyplot(plt)