Upload 9 files
Browse files- McDonald_s_Reviews.csv +0 -0
- P2G7_Allen.ipynb +0 -0
- app.py +141 -0
- fingerprint.pb +3 -0
- keras_metadata.pb +3 -0
- model.png +0 -0
- model_inf_Allen_G7.ipynb +191 -0
- requirement.txt +12 -0
- saved_model.pb +3 -0
McDonald_s_Reviews.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
P2G7_Allen.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Library Load Model
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
from tensorflow.keras.models import load_model
|
6 |
+
import streamlit as st
|
7 |
+
# Library Pre-Processing
|
8 |
+
import nltk
|
9 |
+
import re
|
10 |
+
import tensorflow as tf
|
11 |
+
from nltk.corpus import stopwords
|
12 |
+
from nltk.tokenize import word_tokenize
|
13 |
+
from nltk.stem import WordNetLemmatizer
|
14 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
15 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
16 |
+
from gensim.models import Word2Vec
|
17 |
+
nltk.download('stopwords')
|
18 |
+
|
19 |
+
# # Load tokenizer
|
20 |
+
# with open("tokenizer.pkl", "rb") as tokenizer_file:
|
21 |
+
# tokenizer = pickle.load(tokenizer_file)
|
22 |
+
# Define the model
|
23 |
+
model_path= 'model'
|
24 |
+
|
25 |
+
# Load model
|
26 |
+
model = tf.keras.models.load_model(model_path)
|
27 |
+
|
28 |
+
# Define Stopwords
|
29 |
+
## Load Stopwords from NLTK
|
30 |
+
from nltk.corpus import stopwords
|
31 |
+
stop_words_en = stopwords.words("english")
|
32 |
+
|
33 |
+
print('Stopwords from NLTK')
|
34 |
+
print(len(stop_words_en), stop_words_en)
|
35 |
+
print('')
|
36 |
+
|
37 |
+
## Create A New Stopwords
|
38 |
+
new_stop_words = ['aye', 'mine', 'have']
|
39 |
+
|
40 |
+
# Define Lemmatizer
|
41 |
+
lemmatizer = WordNetLemmatizer()
|
42 |
+
|
43 |
+
## Merge Stopwords
|
44 |
+
stop_words_en = stop_words_en + new_stop_words
|
45 |
+
stop_words_en = list(set(stop_words_en))
|
46 |
+
print('Out Final Stopwords')
|
47 |
+
print(len(stop_words_en), stop_words_en)
|
48 |
+
|
49 |
+
# Create A Function for review Preprocessing
|
50 |
+
|
51 |
+
def review_preprocessing(review):
|
52 |
+
# Case folding
|
53 |
+
review = review.lower()
|
54 |
+
|
55 |
+
# Mention removal
|
56 |
+
review = re.sub("@[A-Za-z0-9_]+", " ", review)
|
57 |
+
|
58 |
+
# Hashtags removal
|
59 |
+
review = re.sub("#[A-Za-z0-9_]+", " ", review)
|
60 |
+
|
61 |
+
# Newline removal (\n)
|
62 |
+
review = re.sub(r"\\n", " ",review)
|
63 |
+
|
64 |
+
# Whitespace removal
|
65 |
+
review = review.strip()
|
66 |
+
|
67 |
+
# URL removal
|
68 |
+
review = re.sub(r"http\S+", " ", review)
|
69 |
+
review = re.sub(r"www.\S+", " ", review)
|
70 |
+
|
71 |
+
# Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
|
72 |
+
review = re.sub("[^A-Za-z\s']", " ", review)
|
73 |
+
review = re.sub("['ï']", " ", review)
|
74 |
+
review = re.sub("['¿']", " ", review)
|
75 |
+
review = re.sub("['½']", " ", review)
|
76 |
+
review = re.sub("['ý']", " ", review)
|
77 |
+
# Tokenization
|
78 |
+
tokens = word_tokenize(review)
|
79 |
+
|
80 |
+
# Stopwords removal
|
81 |
+
tokens = [word for word in tokens if word not in stop_words_en]
|
82 |
+
|
83 |
+
# Lemmetize
|
84 |
+
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
85 |
+
|
86 |
+
# Combining Tokens
|
87 |
+
review = ' '.join(tokens)
|
88 |
+
|
89 |
+
return review
|
90 |
+
|
91 |
+
def preprocess_text(text):
|
92 |
+
'''
|
93 |
+
Function to preprocess text by cleaning, removing stopwords, and lemmatizing.
|
94 |
+
|
95 |
+
Parameters:
|
96 |
+
text (str): The input text to be preprocessed.
|
97 |
+
|
98 |
+
Returns:
|
99 |
+
str: The preprocessed text.
|
100 |
+
'''
|
101 |
+
text = review_preprocessing(text)
|
102 |
+
return text
|
103 |
+
|
104 |
+
def run():
|
105 |
+
# membuat title
|
106 |
+
st.title("DETECTION RATING BASED ON MCDONALD'S CUSTOMER REVIEW")
|
107 |
+
st.subheader('Detecting Reviews')
|
108 |
+
st.markdown('---')
|
109 |
+
# Buat form
|
110 |
+
with st.form(key='review'):
|
111 |
+
st.write("## Customers' Review")
|
112 |
+
# URL input
|
113 |
+
text = st.text_input("Enter The Review:")
|
114 |
+
submitted = st.form_submit_button('Predict')
|
115 |
+
# Perform prediction
|
116 |
+
if submitted:
|
117 |
+
df_inf = {'preprocessing_review': text}
|
118 |
+
df_inf = pd.DataFrame([df_inf])
|
119 |
+
# Preprocess the text (apply the same preprocessing steps as used during training)
|
120 |
+
df_inf['preprocessing_review'] = df_inf['preprocessing_review'].apply(lambda x: review_preprocessing(x))
|
121 |
+
# df_inf = model.texts_to_sequences(df_inf)
|
122 |
+
# df_inf = pad_sequences(df_inf, maxlen=700)
|
123 |
+
# Make the prediction using the loaded model
|
124 |
+
|
125 |
+
y_pred_inf = model.predict(df_inf['preprocessing_review'])
|
126 |
+
y_pred_inf = np.argmax(df_inf['preprocessing_review'], axis = -1)
|
127 |
+
|
128 |
+
# Display the prediction result
|
129 |
+
if y_pred_inf == 0:
|
130 |
+
st.subheader("Prediction: Negative Comment with Rating 1 Star - 2 Stars")
|
131 |
+
elif y_pred_inf == 1:
|
132 |
+
st.subheader("Prediction: Neutral Comment with Rating 3 Stars")
|
133 |
+
else:
|
134 |
+
st.subheader("Prediction: Positive Comment with Rating 5 Stars")
|
135 |
+
|
136 |
+
# Display the extracted text
|
137 |
+
st.subheader("Extracted Text:")
|
138 |
+
st.write(text)
|
139 |
+
|
140 |
+
if __name__ == '__main__':
|
141 |
+
run()
|
fingerprint.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a162c26dd698bbbbe98323fa927ad21299bc342c835f02cb8673217029e1c65
|
3 |
+
size 54
|
keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16209a305a20bcf9beaabb389be32cfa6b95e546d5f8ba07180727e7c63031a7
|
3 |
+
size 26972
|
model.png
ADDED
model_inf_Allen_G7.ipynb
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 13,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import pickle\n",
|
11 |
+
"from tensorflow.keras.models import load_model\n",
|
12 |
+
"import tensorflow as tf"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": 25,
|
18 |
+
"metadata": {},
|
19 |
+
"outputs": [],
|
20 |
+
"source": [
|
21 |
+
"model_path= 'model'"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": 27,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [
|
29 |
+
{
|
30 |
+
"name": "stdout",
|
31 |
+
"output_type": "stream",
|
32 |
+
"text": [
|
33 |
+
"WARNING:tensorflow:From c:\\Users\\user\\anaconda3\\Lib\\site-packages\\keras\\src\\saving\\legacy\\saved_model\\load.py:107: The name tf.gfile.Exists is deprecated. Please use tf.io.gfile.exists instead.\n",
|
34 |
+
"\n"
|
35 |
+
]
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"model = tf.keras.models.load_model(model_path)"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": 32,
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [
|
47 |
+
{
|
48 |
+
"data": {
|
49 |
+
"text/html": [
|
50 |
+
"<div>\n",
|
51 |
+
"<style scoped>\n",
|
52 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
53 |
+
" vertical-align: middle;\n",
|
54 |
+
" }\n",
|
55 |
+
"\n",
|
56 |
+
" .dataframe tbody tr th {\n",
|
57 |
+
" vertical-align: top;\n",
|
58 |
+
" }\n",
|
59 |
+
"\n",
|
60 |
+
" .dataframe thead th {\n",
|
61 |
+
" text-align: right;\n",
|
62 |
+
" }\n",
|
63 |
+
"</style>\n",
|
64 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
65 |
+
" <thead>\n",
|
66 |
+
" <tr style=\"text-align: right;\">\n",
|
67 |
+
" <th></th>\n",
|
68 |
+
" <th>reviewer_id</th>\n",
|
69 |
+
" <th>store_name</th>\n",
|
70 |
+
" <th>category</th>\n",
|
71 |
+
" <th>store_address</th>\n",
|
72 |
+
" <th>latitude</th>\n",
|
73 |
+
" <th>longitude</th>\n",
|
74 |
+
" <th>rating_count</th>\n",
|
75 |
+
" <th>review_time</th>\n",
|
76 |
+
" <th>review</th>\n",
|
77 |
+
" </tr>\n",
|
78 |
+
" </thead>\n",
|
79 |
+
" <tbody>\n",
|
80 |
+
" <tr>\n",
|
81 |
+
" <th>0</th>\n",
|
82 |
+
" <td>40000</td>\n",
|
83 |
+
" <td>McDonald's</td>\n",
|
84 |
+
" <td>Fast food restaurant</td>\n",
|
85 |
+
" <td>1916 M St NW, Washington, DC 20036, United States</td>\n",
|
86 |
+
" <td>27.82</td>\n",
|
87 |
+
" <td>-80.189098</td>\n",
|
88 |
+
" <td>2.81</td>\n",
|
89 |
+
" <td>a year ago</td>\n",
|
90 |
+
" <td>treated badly</td>\n",
|
91 |
+
" </tr>\n",
|
92 |
+
" </tbody>\n",
|
93 |
+
"</table>\n",
|
94 |
+
"</div>"
|
95 |
+
],
|
96 |
+
"text/plain": [
|
97 |
+
" reviewer_id store_name category \\\n",
|
98 |
+
"0 40000 McDonald's Fast food restaurant \n",
|
99 |
+
"\n",
|
100 |
+
" store_address latitude longitude \\\n",
|
101 |
+
"0 1916 M St NW, Washington, DC 20036, United States 27.82 -80.189098 \n",
|
102 |
+
"\n",
|
103 |
+
" rating_count review_time review \n",
|
104 |
+
"0 2.81 a year ago treated badly "
|
105 |
+
]
|
106 |
+
},
|
107 |
+
"execution_count": 32,
|
108 |
+
"metadata": {},
|
109 |
+
"output_type": "execute_result"
|
110 |
+
}
|
111 |
+
],
|
112 |
+
"source": [
|
113 |
+
"# Creating new data as prediction\n",
|
114 |
+
"df_inf= {\n",
|
115 |
+
" 'reviewer_id':40000, \n",
|
116 |
+
" 'store_name':\"McDonald's\" , \n",
|
117 |
+
" 'category': 'Fast food restaurant', \n",
|
118 |
+
" 'store_address':'1916 M St NW, Washington, DC 20036, United States',\n",
|
119 |
+
" 'latitude':27.82, \n",
|
120 |
+
" 'longitude':'-80.189098', \n",
|
121 |
+
" 'rating_count': 2.810,\n",
|
122 |
+
" 'review_time':'a year ago', \n",
|
123 |
+
" 'review':'treated badly', \n",
|
124 |
+
" \n",
|
125 |
+
" \n",
|
126 |
+
" \n",
|
127 |
+
"}\n",
|
128 |
+
"df_inf = pd.DataFrame([df_inf])\n",
|
129 |
+
"df_inf"
|
130 |
+
]
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"cell_type": "code",
|
134 |
+
"execution_count": 35,
|
135 |
+
"metadata": {},
|
136 |
+
"outputs": [],
|
137 |
+
"source": [
|
138 |
+
"import tensorflow as tf\n",
|
139 |
+
"import numpy as np\n",
|
140 |
+
"integer_array = np.array([1, 2, 3], dtype=np.int32)\n",
|
141 |
+
"tensor = tf.convert_to_tensor(integer_array, dtype=tf.float32)"
|
142 |
+
]
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"cell_type": "code",
|
146 |
+
"execution_count": 37,
|
147 |
+
"metadata": {},
|
148 |
+
"outputs": [
|
149 |
+
{
|
150 |
+
"ename": "ValueError",
|
151 |
+
"evalue": "Failed to convert a NumPy array to a Tensor (Unsupported object type int).",
|
152 |
+
"output_type": "error",
|
153 |
+
"traceback": [
|
154 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
155 |
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
156 |
+
"Cell \u001b[1;32mIn[37], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Predict new data visitor\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m prediction \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mpredict(df_inf)\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mThis Review Predicted Gave Rating\u001b[39m\u001b[38;5;124m'\u001b[39m, tensor(prediction[\u001b[38;5;241m0\u001b[39m],\u001b[38;5;241m2\u001b[39m))\n",
|
157 |
+
"File \u001b[1;32mc:\\Users\\user\\anaconda3\\Lib\\site-packages\\keras\\src\\utils\\traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[0;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[1;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
|
158 |
+
"File \u001b[1;32mc:\\Users\\user\\anaconda3\\Lib\\site-packages\\tensorflow\\python\\framework\\constant_op.py:103\u001b[0m, in \u001b[0;36mconvert_to_eager_tensor\u001b[1;34m(value, ctx, dtype)\u001b[0m\n\u001b[0;32m 101\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtypes\u001b[38;5;241m.\u001b[39mas_dtype(dtype)\u001b[38;5;241m.\u001b[39mas_datatype_enum\n\u001b[0;32m 102\u001b[0m ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[1;32m--> 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ops\u001b[38;5;241m.\u001b[39mEagerTensor(value, ctx\u001b[38;5;241m.\u001b[39mdevice_name, dtype)\n",
|
159 |
+
"\u001b[1;31mValueError\u001b[0m: Failed to convert a NumPy array to a Tensor (Unsupported object type int)."
|
160 |
+
]
|
161 |
+
}
|
162 |
+
],
|
163 |
+
"source": [
|
164 |
+
"# Predict new data visitor\n",
|
165 |
+
"prediction = model.predict(df_inf)\n",
|
166 |
+
"print('This Review Predicted Gave Rating', tensor(prediction[0],2))"
|
167 |
+
]
|
168 |
+
}
|
169 |
+
],
|
170 |
+
"metadata": {
|
171 |
+
"kernelspec": {
|
172 |
+
"display_name": "base",
|
173 |
+
"language": "python",
|
174 |
+
"name": "python3"
|
175 |
+
},
|
176 |
+
"language_info": {
|
177 |
+
"codemirror_mode": {
|
178 |
+
"name": "ipython",
|
179 |
+
"version": 3
|
180 |
+
},
|
181 |
+
"file_extension": ".py",
|
182 |
+
"mimetype": "text/x-python",
|
183 |
+
"name": "python",
|
184 |
+
"nbconvert_exporter": "python",
|
185 |
+
"pygments_lexer": "ipython3",
|
186 |
+
"version": "3.11.5"
|
187 |
+
}
|
188 |
+
},
|
189 |
+
"nbformat": 4,
|
190 |
+
"nbformat_minor": 2
|
191 |
+
}
|
requirement.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas<2.0.0
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
plotly
|
6 |
+
Pillow
|
7 |
+
altair
|
8 |
+
feature_engine
|
9 |
+
scikit-learn==1.2.1
|
10 |
+
tensorflow==2.12.0
|
11 |
+
nltk
|
12 |
+
gensim
|
saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef6239b078d76654a3dda616414aada8126a1a76a4f9c2f340a0595621f2c069
|
3 |
+
size 3554328
|