Gansol commited on
Commit
f5b8913
1 Parent(s): 70bc615

Upload 9 files

Browse files
McDonald_s_Reviews.csv ADDED
The diff for this file is too large to render. See raw diff
 
P2G7_Allen.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Library Load Model
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ from tensorflow.keras.models import load_model
6
+ import streamlit as st
7
+ # Library Pre-Processing
8
+ import nltk
9
+ import re
10
+ import tensorflow as tf
11
+ from nltk.corpus import stopwords
12
+ from nltk.tokenize import word_tokenize
13
+ from nltk.stem import WordNetLemmatizer
14
+ from tensorflow.keras.preprocessing.text import Tokenizer
15
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
16
+ from gensim.models import Word2Vec
17
+ nltk.download('stopwords')
18
+
19
+ # # Load tokenizer
20
+ # with open("tokenizer.pkl", "rb") as tokenizer_file:
21
+ # tokenizer = pickle.load(tokenizer_file)
22
+ # Define the model
23
+ model_path= 'model'
24
+
25
+ # Load model
26
+ model = tf.keras.models.load_model(model_path)
27
+
28
+ # Define Stopwords
29
+ ## Load Stopwords from NLTK
30
+ from nltk.corpus import stopwords
31
+ stop_words_en = stopwords.words("english")
32
+
33
+ print('Stopwords from NLTK')
34
+ print(len(stop_words_en), stop_words_en)
35
+ print('')
36
+
37
+ ## Create A New Stopwords
38
+ new_stop_words = ['aye', 'mine', 'have']
39
+
40
+ # Define Lemmatizer
41
+ lemmatizer = WordNetLemmatizer()
42
+
43
+ ## Merge Stopwords
44
+ stop_words_en = stop_words_en + new_stop_words
45
+ stop_words_en = list(set(stop_words_en))
46
+ print('Out Final Stopwords')
47
+ print(len(stop_words_en), stop_words_en)
48
+
49
+ # Create A Function for review Preprocessing
50
+
51
+ def review_preprocessing(review):
52
+ # Case folding
53
+ review = review.lower()
54
+
55
+ # Mention removal
56
+ review = re.sub("@[A-Za-z0-9_]+", " ", review)
57
+
58
+ # Hashtags removal
59
+ review = re.sub("#[A-Za-z0-9_]+", " ", review)
60
+
61
+ # Newline removal (\n)
62
+ review = re.sub(r"\\n", " ",review)
63
+
64
+ # Whitespace removal
65
+ review = review.strip()
66
+
67
+ # URL removal
68
+ review = re.sub(r"http\S+", " ", review)
69
+ review = re.sub(r"www.\S+", " ", review)
70
+
71
+ # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
72
+ review = re.sub("[^A-Za-z\s']", " ", review)
73
+ review = re.sub("['ï']", " ", review)
74
+ review = re.sub("['¿']", " ", review)
75
+ review = re.sub("['½']", " ", review)
76
+ review = re.sub("['ý']", " ", review)
77
+ # Tokenization
78
+ tokens = word_tokenize(review)
79
+
80
+ # Stopwords removal
81
+ tokens = [word for word in tokens if word not in stop_words_en]
82
+
83
+ # Lemmetize
84
+ tokens = [lemmatizer.lemmatize(word) for word in tokens]
85
+
86
+ # Combining Tokens
87
+ review = ' '.join(tokens)
88
+
89
+ return review
90
+
91
+ def preprocess_text(text):
92
+ '''
93
+ Function to preprocess text by cleaning, removing stopwords, and lemmatizing.
94
+
95
+ Parameters:
96
+ text (str): The input text to be preprocessed.
97
+
98
+ Returns:
99
+ str: The preprocessed text.
100
+ '''
101
+ text = review_preprocessing(text)
102
+ return text
103
+
104
+ def run():
105
+ # membuat title
106
+ st.title("DETECTION RATING BASED ON MCDONALD'S CUSTOMER REVIEW")
107
+ st.subheader('Detecting Reviews')
108
+ st.markdown('---')
109
+ # Buat form
110
+ with st.form(key='review'):
111
+ st.write("## Customers' Review")
112
+ # URL input
113
+ text = st.text_input("Enter The Review:")
114
+ submitted = st.form_submit_button('Predict')
115
+ # Perform prediction
116
+ if submitted:
117
+ df_inf = {'preprocessing_review': text}
118
+ df_inf = pd.DataFrame([df_inf])
119
+ # Preprocess the text (apply the same preprocessing steps as used during training)
120
+ df_inf['preprocessing_review'] = df_inf['preprocessing_review'].apply(lambda x: review_preprocessing(x))
121
+ # df_inf = model.texts_to_sequences(df_inf)
122
+ # df_inf = pad_sequences(df_inf, maxlen=700)
123
+ # Make the prediction using the loaded model
124
+
125
+ y_pred_inf = model.predict(df_inf['preprocessing_review'])
126
+ y_pred_inf = np.argmax(df_inf['preprocessing_review'], axis = -1)
127
+
128
+ # Display the prediction result
129
+ if y_pred_inf == 0:
130
+ st.subheader("Prediction: Negative Comment with Rating 1 Star - 2 Stars")
131
+ elif y_pred_inf == 1:
132
+ st.subheader("Prediction: Neutral Comment with Rating 3 Stars")
133
+ else:
134
+ st.subheader("Prediction: Positive Comment with Rating 5 Stars")
135
+
136
+ # Display the extracted text
137
+ st.subheader("Extracted Text:")
138
+ st.write(text)
139
+
140
+ if __name__ == '__main__':
141
+ run()
fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a162c26dd698bbbbe98323fa927ad21299bc342c835f02cb8673217029e1c65
3
+ size 54
keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16209a305a20bcf9beaabb389be32cfa6b95e546d5f8ba07180727e7c63031a7
3
+ size 26972
model.png ADDED
model_inf_Allen_G7.ipynb ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 13,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import pickle\n",
11
+ "from tensorflow.keras.models import load_model\n",
12
+ "import tensorflow as tf"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 25,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "model_path= 'model'"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 27,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "WARNING:tensorflow:From c:\\Users\\user\\anaconda3\\Lib\\site-packages\\keras\\src\\saving\\legacy\\saved_model\\load.py:107: The name tf.gfile.Exists is deprecated. Please use tf.io.gfile.exists instead.\n",
34
+ "\n"
35
+ ]
36
+ }
37
+ ],
38
+ "source": [
39
+ "model = tf.keras.models.load_model(model_path)"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 32,
45
+ "metadata": {},
46
+ "outputs": [
47
+ {
48
+ "data": {
49
+ "text/html": [
50
+ "<div>\n",
51
+ "<style scoped>\n",
52
+ " .dataframe tbody tr th:only-of-type {\n",
53
+ " vertical-align: middle;\n",
54
+ " }\n",
55
+ "\n",
56
+ " .dataframe tbody tr th {\n",
57
+ " vertical-align: top;\n",
58
+ " }\n",
59
+ "\n",
60
+ " .dataframe thead th {\n",
61
+ " text-align: right;\n",
62
+ " }\n",
63
+ "</style>\n",
64
+ "<table border=\"1\" class=\"dataframe\">\n",
65
+ " <thead>\n",
66
+ " <tr style=\"text-align: right;\">\n",
67
+ " <th></th>\n",
68
+ " <th>reviewer_id</th>\n",
69
+ " <th>store_name</th>\n",
70
+ " <th>category</th>\n",
71
+ " <th>store_address</th>\n",
72
+ " <th>latitude</th>\n",
73
+ " <th>longitude</th>\n",
74
+ " <th>rating_count</th>\n",
75
+ " <th>review_time</th>\n",
76
+ " <th>review</th>\n",
77
+ " </tr>\n",
78
+ " </thead>\n",
79
+ " <tbody>\n",
80
+ " <tr>\n",
81
+ " <th>0</th>\n",
82
+ " <td>40000</td>\n",
83
+ " <td>McDonald's</td>\n",
84
+ " <td>Fast food restaurant</td>\n",
85
+ " <td>1916 M St NW, Washington, DC 20036, United States</td>\n",
86
+ " <td>27.82</td>\n",
87
+ " <td>-80.189098</td>\n",
88
+ " <td>2.81</td>\n",
89
+ " <td>a year ago</td>\n",
90
+ " <td>treated badly</td>\n",
91
+ " </tr>\n",
92
+ " </tbody>\n",
93
+ "</table>\n",
94
+ "</div>"
95
+ ],
96
+ "text/plain": [
97
+ " reviewer_id store_name category \\\n",
98
+ "0 40000 McDonald's Fast food restaurant \n",
99
+ "\n",
100
+ " store_address latitude longitude \\\n",
101
+ "0 1916 M St NW, Washington, DC 20036, United States 27.82 -80.189098 \n",
102
+ "\n",
103
+ " rating_count review_time review \n",
104
+ "0 2.81 a year ago treated badly "
105
+ ]
106
+ },
107
+ "execution_count": 32,
108
+ "metadata": {},
109
+ "output_type": "execute_result"
110
+ }
111
+ ],
112
+ "source": [
113
+ "# Creating new data as prediction\n",
114
+ "df_inf= {\n",
115
+ " 'reviewer_id':40000, \n",
116
+ " 'store_name':\"McDonald's\" , \n",
117
+ " 'category': 'Fast food restaurant', \n",
118
+ " 'store_address':'1916 M St NW, Washington, DC 20036, United States',\n",
119
+ " 'latitude':27.82, \n",
120
+ " 'longitude':'-80.189098', \n",
121
+ " 'rating_count': 2.810,\n",
122
+ " 'review_time':'a year ago', \n",
123
+ " 'review':'treated badly', \n",
124
+ " \n",
125
+ " \n",
126
+ " \n",
127
+ "}\n",
128
+ "df_inf = pd.DataFrame([df_inf])\n",
129
+ "df_inf"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": 35,
135
+ "metadata": {},
136
+ "outputs": [],
137
+ "source": [
138
+ "import tensorflow as tf\n",
139
+ "import numpy as np\n",
140
+ "integer_array = np.array([1, 2, 3], dtype=np.int32)\n",
141
+ "tensor = tf.convert_to_tensor(integer_array, dtype=tf.float32)"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 37,
147
+ "metadata": {},
148
+ "outputs": [
149
+ {
150
+ "ename": "ValueError",
151
+ "evalue": "Failed to convert a NumPy array to a Tensor (Unsupported object type int).",
152
+ "output_type": "error",
153
+ "traceback": [
154
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
155
+ "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
156
+ "Cell \u001b[1;32mIn[37], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Predict new data visitor\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m prediction \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mpredict(df_inf)\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mThis Review Predicted Gave Rating\u001b[39m\u001b[38;5;124m'\u001b[39m, tensor(prediction[\u001b[38;5;241m0\u001b[39m],\u001b[38;5;241m2\u001b[39m))\n",
157
+ "File \u001b[1;32mc:\\Users\\user\\anaconda3\\Lib\\site-packages\\keras\\src\\utils\\traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[0;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[1;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
158
+ "File \u001b[1;32mc:\\Users\\user\\anaconda3\\Lib\\site-packages\\tensorflow\\python\\framework\\constant_op.py:103\u001b[0m, in \u001b[0;36mconvert_to_eager_tensor\u001b[1;34m(value, ctx, dtype)\u001b[0m\n\u001b[0;32m 101\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtypes\u001b[38;5;241m.\u001b[39mas_dtype(dtype)\u001b[38;5;241m.\u001b[39mas_datatype_enum\n\u001b[0;32m 102\u001b[0m ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[1;32m--> 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ops\u001b[38;5;241m.\u001b[39mEagerTensor(value, ctx\u001b[38;5;241m.\u001b[39mdevice_name, dtype)\n",
159
+ "\u001b[1;31mValueError\u001b[0m: Failed to convert a NumPy array to a Tensor (Unsupported object type int)."
160
+ ]
161
+ }
162
+ ],
163
+ "source": [
164
+ "# Predict new data visitor\n",
165
+ "prediction = model.predict(df_inf)\n",
166
+ "print('This Review Predicted Gave Rating', tensor(prediction[0],2))"
167
+ ]
168
+ }
169
+ ],
170
+ "metadata": {
171
+ "kernelspec": {
172
+ "display_name": "base",
173
+ "language": "python",
174
+ "name": "python3"
175
+ },
176
+ "language_info": {
177
+ "codemirror_mode": {
178
+ "name": "ipython",
179
+ "version": 3
180
+ },
181
+ "file_extension": ".py",
182
+ "mimetype": "text/x-python",
183
+ "name": "python",
184
+ "nbconvert_exporter": "python",
185
+ "pygments_lexer": "ipython3",
186
+ "version": "3.11.5"
187
+ }
188
+ },
189
+ "nbformat": 4,
190
+ "nbformat_minor": 2
191
+ }
requirement.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas<2.0.0
3
+ seaborn
4
+ matplotlib
5
+ plotly
6
+ Pillow
7
+ altair
8
+ feature_engine
9
+ scikit-learn==1.2.1
10
+ tensorflow==2.12.0
11
+ nltk
12
+ gensim
saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef6239b078d76654a3dda616414aada8126a1a76a4f9c2f340a0595621f2c069
3
+ size 3554328