ahmadluay commited on
Commit
3329a29
1 Parent(s): 0952d6e

first commit

Browse files
Files changed (9) hide show
  1. amazon.jpg +0 -0
  2. amazon1.jpg +0 -0
  3. app.py +11 -0
  4. best_model.h5 +3 -0
  5. dataset_20000_rows.csv +0 -0
  6. eda.py +148 -0
  7. prediction.py +134 -0
  8. requirements.txt +8 -0
  9. t.pickle +3 -0
amazon.jpg ADDED
amazon1.jpg ADDED
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda # python file
3
+ import prediction # python file
4
+
5
+ navigation = st.sidebar.selectbox('Page Navigation: ',('EDA','Amazon Customer Review Prediction'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
11
+
best_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8856473cb9906cc5a5c1ebb2292e5b7b39b0a3b6e79f14f208a8d735909a996
3
+ size 78004048
dataset_20000_rows.csv ADDED
The diff for this file is too large to render. See raw diff
 
eda.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import plotly.express as px
7
+ import nltk
8
+ from nltk.tokenize import word_tokenize
9
+ import wordcloud
10
+ from wordcloud import WordCloud
11
+
12
+ from sklearn.preprocessing import LabelEncoder
13
+
14
+ from PIL import Image
15
+
16
+ st.set_page_config(
17
+ page_title='Sentiment Analysis of Amazon Customer Reviews',
18
+ layout = 'wide',
19
+ initial_sidebar_state='expanded'
20
+ )
21
+
22
+ def run():
23
+ # title
24
+ st.title('Sentiment Analysis of Amazon Customer Reviews')
25
+ st.write('by Ahmad Luay Adnani')
26
+
27
+ # sub header
28
+ st.subheader ('Exploratory Data Analysis of the Dataset.')
29
+
30
+ # Add Image
31
+ image = Image.open('amazon.jpg')
32
+ st.image(image,caption = 'illustration')
33
+
34
+ # Description
35
+ st.write('Amazon is one of the largest e-commerce platforms in the world, with millions of products available for purchase and billions of reviews submitted by customers. The reviews can vary in length, tone, and style, and often contain sarcasm, irony, or other forms of nuanced language. Sentiment analysis on Amazon reviews can **provide valuable insights for businesses**, including identifying common issues that customers face with their products or services, understanding the factors that drive customer satisfaction, and tracking changes in customer sentiment over time.')
36
+ st.write('# Dataset')
37
+ st.write('Dataset used is amazon review dataset from [kaggle]("https://www.kaggle.com/datasets/yacharki/amazon-reviews-for-sa-binary-negative-positive-csv").')
38
+
39
+ # show dataframe
40
+ df2 = pd.read_csv('dataset_20000_rows.csv')
41
+ df2 = df2.drop(['Unnamed: 0'],axis=1)
42
+ st.dataframe(df2)
43
+ # add description of Dataset
44
+ st.write('In this dataset, class 1 is the **negative review** and class 2 is the **positive review**')
45
+
46
+ ###
47
+ # create a copy of the dataframe
48
+ df_eda = df2.copy()
49
+ df_eda.class_index.replace({1:'Negative Review',2:'Positive Review'}, inplace=True)
50
+ # Separating positive & negative review
51
+ positive_review = df_eda[df_eda['class_index']=='Positive Review']
52
+ negative_review = df_eda[df_eda['class_index']=='Negative Review']
53
+
54
+ # Histogram and Boxplot based on user input
55
+ st.write('# Exploratory Data Analysis')
56
+ select_eda = st.selectbox('Select EDA : ', ('Type of Review','Example of Positive and Negative Review','Number of Words','WordCloud'))
57
+ if select_eda == 'Type of Review':
58
+ review = df_eda['class_index'].value_counts().to_frame().reset_index()
59
+ fig = px.pie(review,values='class_index', names='index',color_discrete_sequence=['red','blue'])
60
+ fig.update_layout(title_text = "Type of Review")
61
+ st.plotly_chart(fig)
62
+ st.write('Based on the table and visualization above, it can be seen that both negative and positive reviews consist of 10,000 reviews each.')
63
+ elif select_eda == 'Example of Positive and Negative Review':
64
+ # Print sample reviews
65
+ pd.set_option('display.width', None)
66
+ sample_negative_review = df_eda[df_eda['class_index']=='Negative Review'].sample(20)
67
+ sample_positive_review = df_eda[df_eda['class_index']=='Positive Review'].sample(20)
68
+
69
+ # Print Sample of Negative Review
70
+ st.write('Example of Negative Reviews')
71
+ st.write('-'*100)
72
+ for i in range(0,20):
73
+ st.write(sample_negative_review.iloc[i,2])
74
+ st.write('-'*100)
75
+
76
+ # Print Sample of Positive Review
77
+ st.write('Example of Positive Reviews')
78
+ st.write('-'*100)
79
+ for i in range(0,20):
80
+ st.write(sample_positive_review.iloc[i,2])
81
+ st.write('-'*100)
82
+
83
+ elif select_eda == 'Number of Words':
84
+ # Count the number of words in each review
85
+ df_eda['len_words'] = df_eda['review_text'].apply(lambda x: len(nltk.word_tokenize(x)))
86
+ # Histogram plot for each review
87
+ fig, ax =plt.subplots(1,2,figsize=(30,10))
88
+ sns.histplot(ax=ax[0],data=df_eda[df_eda['class_index'] == 'Positive Review']['len_words'],kde=True)
89
+ ax[0].set_title('Positive Review')
90
+ sns.histplot(ax=ax[1],data=df_eda[df_eda['class_index'] == 'Negative Review']['len_words'],kde=True)
91
+ ax[1].set_title('Negative Review')
92
+ st.pyplot(fig)
93
+ st.write('-'*100)
94
+ # Print Max and Average number of words
95
+
96
+ st.write('The maximum number of words on each review is ', df_eda['len_words'].max())
97
+ st.write('The average number of words on each review is', df_eda['len_words'].mean())
98
+ st.write('-'*100)
99
+ # Print Max and Average number of words on positive review
100
+
101
+ st.write('The maximum number of words on positive review is ', df_eda[df_eda['class_index']=='Positive Review']['len_words'].max())
102
+ st.write('The average number of words on positive review is', df_eda[df_eda['class_index']=='Positive Review']['len_words'].mean())
103
+ st.write('-'*100)
104
+ # Print Max and Average number of words on negative review
105
+
106
+ st.write('The maximum number of words on negative review is ', df_eda[df_eda['class_index']=='Negative Review']['len_words'].max())
107
+ st.write('The average number of words on negative review is', df_eda[df_eda['class_index']=='Negative Review']['len_words'].mean())
108
+ st.write('-'*100)
109
+ st.write('Based on the information above, it is known that **negative reviews have, on average, more words than positive reviews**. Based on my assumption, there could be various reasons why negative reviews have, on average, more words than positive reviews. Here are some possible explanations:')
110
+ st.write("1. **Complex issues**: Negative reviews might involve more complex issues or problems with the product or service, which require more detailed explanations and examples.")
111
+ st.write('2. **Emotional expression**: Negative reviews might include more emotional expression, such as frustration or disappointment, which can lead to more detailed and expressive language.')
112
+ st.write('3. **Expectations**: Negative reviews might involve higher expectations from customers, leading them to provide more detailed feedback in order to articulate their disappointment or frustration.')
113
+ st.write('4. *Personal experience**: Negative reviews might be based on a more personal experience, such as a defective product or poor customer service, which can lead to a more detailed and personalized account of the issue.')
114
+
115
+ else:
116
+ # Creating wordcloud
117
+ text_positive = positive_review.review_text.values
118
+ cloud_positive = WordCloud(max_words=50, background_color="white",width=2000,height=1000).generate(" ".join(text_positive))
119
+
120
+ # Showing wordcloud
121
+ plt.figure(figsize=(15,10))
122
+ plt.axis('off')
123
+ plt.title("Positive Review",fontsize=20)
124
+ plt.imshow(cloud_positive)
125
+ plt.show()
126
+ st.set_option('deprecation.showPyplotGlobalUse', False)
127
+ st.pyplot()
128
+
129
+ # Creating wordcloud
130
+ text_negative = negative_review.review_text.values
131
+ cloud_negative = WordCloud(max_words=50, background_color="black",width=2000,height=1000).generate(" ".join(text_negative))
132
+
133
+ # Showing wordcloud
134
+ plt.figure(figsize=(15,10))
135
+ plt.axis('off')
136
+ plt.title("Negative Review",fontsize=20)
137
+ plt.imshow(cloud_negative)
138
+ plt.show()
139
+ st.set_option('deprecation.showPyplotGlobalUse', False)
140
+ st.pyplot()
141
+
142
+ st.write('From the 2 visualizations above, we can obtain the following information:')
143
+ st.write("1. **Book**, **movie**, **cd** and **album** are the products most frequently reviewed by amazon customers.")
144
+ st.write('2. The most frequent positive words used by amazon customers included **good**, **great**, **love**, **best**, and **easy**. ')
145
+ st.write('3. The most common negative words used by amazon customers included **bad**, **used** and **problem**.')
146
+
147
+ if __name__ == '__main__':
148
+ run()
prediction.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import tensorflow
5
+ from tensorflow.keras.models import load_model
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ import pickle
8
+ from nltk.stem import WordNetLemmatizer
9
+ import nltk
10
+ import re
11
+ import string
12
+ nltk.download('punkt')
13
+ nltk.download('stopwords')
14
+ nltk.download('wordnet')
15
+ from nltk.corpus import stopwords
16
+ from nltk.tokenize import word_tokenize
17
+ from PIL import Image
18
+
19
+ import pickle
20
+ # Load All Files
21
+
22
+ with open('t.pickle', 'rb') as file_1:
23
+ t = pickle.load(file_1)
24
+ # Adding spanish to stopwords
25
+ spanish = stopwords.words('spanish')
26
+ additional_stopwords = []
27
+ for i in spanish:
28
+ additional_stopwords.append(i)
29
+
30
+ # Adding french to stopwords
31
+ french = stopwords.words('french')
32
+ for i in french:
33
+ additional_stopwords.append(i)
34
+
35
+ # Adding german to stopwords
36
+ german = stopwords.words('german')
37
+ for i in german:
38
+ additional_stopwords.append(i)
39
+
40
+ # Setting stopwords with english as default language
41
+ stopwords = list(set(stopwords.words('english')))
42
+ for i in additional_stopwords:
43
+ stopwords.append(i)
44
+
45
+ lemmatizer = WordNetLemmatizer()
46
+ def text_processing(text):
47
+
48
+ # Converting all text to Lowercase
49
+ text = text.lower()
50
+
51
+ # Removing Unicode Characters
52
+ text = re.sub("&#[A-Za-z0-9_]+", " ", text)
53
+
54
+ # Removing punctuation
55
+ text = text.translate(str.maketrans('', '', string.punctuation))
56
+
57
+ # Removing Whitespace
58
+ text = text.strip()
59
+
60
+ # Removing emoji
61
+ text = re.sub("[^A-Za-z\s']", " ", text)
62
+
63
+ # Removing double space
64
+ text = re.sub("\s\s+" , " ", text)
65
+
66
+ # Tokenizing words
67
+ tokens = word_tokenize(text)
68
+
69
+ # Removing Stopwords
70
+ text = ' '.join([word for word in tokens if word not in stopwords])
71
+
72
+ # Lemmatizer
73
+ text = lemmatizer.lemmatize(text)
74
+
75
+ return text
76
+
77
+
78
+ model_gruimp = load_model('best_model.h5', compile=False)
79
+
80
+ def run():
81
+ with st.form(key='Amazon_Customer_Review'):
82
+ st.title('Amazon Customer Review')
83
+ image = Image.open('amazon1.jpg')
84
+ st.image(image)
85
+ review_title = st.text_input('Title',value='Mystery at Walt Disney World')
86
+ review_text = st.text_input('Comments',value='Book arrived in good condition, but took quite a bit longer to arrive than normal. Overall, I am very satisfied.')
87
+
88
+ submitted = st.form_submit_button('Submit')
89
+
90
+ df_inf = {
91
+ 'review_title': review_title,
92
+ 'review_text': review_text,
93
+
94
+ }
95
+
96
+ df_inf = pd.DataFrame([df_inf])
97
+ # Data Inference
98
+ df_inf_copy = df_inf.copy()
99
+ # Applying all preprocessing in one document
100
+
101
+ df_inf_copy['review_processed'] = df_inf_copy['review_text'].apply(lambda x: text_processing(x))
102
+ st.dataframe(df_inf_copy)
103
+ # Transform Inference-Set
104
+ df_inf_transform = df_inf_copy.review_processed
105
+ df_inf_transform = t.texts_to_sequences(df_inf_transform)
106
+ # Padding the dataset to a maximum review length in words
107
+
108
+ df_inf_transform = pad_sequences(df_inf_transform, maxlen=117)
109
+
110
+
111
+ if submitted:
112
+ # Predict using Neural Network
113
+ y_pred_inf = model_gruimp.predict(df_inf_transform)
114
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
115
+ #st.write('# Is the customer at risk of churning ? :thinking_face:')
116
+ if y_pred_inf == 0:
117
+ st.subheader('Negative Feedback')
118
+ st.write('Dear valued customer,')
119
+ st.write('Thank you for taking the time to share your experience with us. We are sorry to hear that you were not satisfied with your recent purchase from Amazon. We take all feedback seriously and strive to provide the best possible shopping experience for our customers.')
120
+ st.write('We apologize for any inconvenience caused and would like to make it right. Please let us know what specifically went wrong with your order and we will do everything we can to rectify the situation. We value your satisfaction and would appreciate the opportunity to earn back your trust.')
121
+ st.write('Thank you again for bringing this to our attention. We hope to hear from you soon and look forward to the opportunity to serve you better in the future.')
122
+ st.write('Best regards,')
123
+ st.write('The Amazon Customer Service Team')
124
+ else:
125
+ st.subheader('Positive Feedback')
126
+ st.write('Dear valued customer,')
127
+ st.write('Thank you so much for taking the time to provide us with your positive feedback regarding your recent purchase from Amazon. We are thrilled to hear that you had a great shopping experience with us and that our product met your expectations.')
128
+ st.write('At Amazon, we always strive to exceed our customers expectations and provide the best possible service. Your satisfaction is our top priority and we are delighted to know that we have achieved this with your recent purchase.')
129
+ st.write('Your kind words have made our day and will serve as a motivation for us to continue delivering exceptional service to our customers. Thank you for choosing Amazon and we hope to have the pleasure of serving you again in the near future.')
130
+ st.write('Best regards,')
131
+ st.write('The Amazon Customer Service Team')
132
+
133
+ if __name__ == '__main__':
134
+ run()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ numpy
6
+ tensorflow==2.8.0
7
+ plotly
8
+ nltk
t.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81511b93c63c33d81015ede057cb169ca619cc49c3c8a83cbe8e5b78c651a9af
3
+ size 2066705