SagarBapodara commited on
Commit
3719961
1 Parent(s): d4d9bfe

Added File

Browse files
Files changed (1) hide show
  1. app.py.py +156 -0
app.py.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ #Importing the dependencies
5
+ import pandas as pd
6
+ import numpy as np
7
+ import streamlit as st
8
+
9
+
10
+ # Loading the Dataset
11
+ RtData = pd.read_csv('RestaurantRatingData.csv', encoding='latin')
12
+ # Selecting the restaurants located in India
13
+ RtData = RtData[(RtData.Currency == "Indian Rupees(Rs.)")]
14
+ # Removing the data where Average cost is 0
15
+ RtData = RtData.loc[(RtData['Average Cost for two'] > 0)]
16
+
17
+
18
+ # Deleting those columns which are not useful in predictive analysis because these variables are qualitative
19
+ UselessColumns = ['Restaurant ID', 'Restaurant Name','City','Address',
20
+ 'Locality', 'Locality Verbose','Cuisines']
21
+ RtData = RtData.drop(UselessColumns,axis=1)
22
+ RtData.head(5)
23
+
24
+ RtData.rename(columns={'Has Table booking': 'Has_Table_booking', 'Has Online delivery' : 'Has_Online_delivery', 'Average Cost for two':'Average_Cost_for_two', 'Price range':'Price_range'}, inplace=True)
25
+
26
+ # Finding nearest values to 4000 mark
27
+ RtData['Votes'][RtData['Votes']<4000].sort_values(ascending=False)
28
+
29
+ # Above result shows the nearest logical value is 3986, hence, replacing any value above 4000 with it.
30
+ # Replacing outliers with nearest possibe value
31
+ RtData['Votes'][RtData['Votes']>4000] =3986
32
+
33
+ # Above result shows the nearest logical value is 8000, hence, replacing any value above 50000 with it.
34
+ ## Replacing outliers with nearest possibe value
35
+ RtData['Average_Cost_for_two'][RtData['Average_Cost_for_two']>50000] = 8000
36
+
37
+ #Final Selected Predictors
38
+ SelectedColumns=['Votes','Average_Cost_for_two','Has_Table_booking',
39
+ 'Has_Online_delivery','Price_range']
40
+
41
+ # Selecting final columns
42
+ DataForML=RtData[SelectedColumns]
43
+
44
+ # Converting the binary nominal variable sex to numeric
45
+ DataForML['Has_Table_booking'].replace({'Yes':1, 'No':0}, inplace=True)
46
+ DataForML['Has_Online_delivery'].replace({'Yes':1, 'No':0}, inplace=True)
47
+
48
+ # Treating all the nominal variables at once using dummy variables
49
+ DataForML_Numeric=pd.get_dummies(DataForML)
50
+
51
+ # Adding Target Variable to the data
52
+ DataForML_Numeric['Rating']=RtData['Rating']
53
+
54
+ # Printing sample rows
55
+ DataForML_Numeric.head()
56
+
57
+ # Separate Target Variable and Predictor Variables
58
+ TargetVariable='Rating'
59
+ Predictors=['Votes', 'Average_Cost_for_two', 'Has_Table_booking',
60
+ 'Has_Online_delivery', 'Price_range']
61
+
62
+ X=DataForML_Numeric[Predictors].values
63
+ y=DataForML_Numeric[TargetVariable].values
64
+
65
+ # Split the data into training and testing set
66
+ from sklearn.model_selection import train_test_split
67
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=428)
68
+
69
+ # XGBOOST Model
70
+ # Xtreme Gradient Boosting (XGBoost)
71
+ from xgboost import XGBRegressor
72
+ RegModel=XGBRegressor(max_depth=2, learning_rate=0.1, verbosity = 0, silent=True, n_estimators=1000, objective='reg:linear', booster='gbtree')
73
+
74
+ # Printing all the parameters of XGBoost
75
+ print(RegModel)
76
+
77
+ # Creating the model on Training Data
78
+ XGB=RegModel.fit(X_train,y_train)
79
+ prediction=XGB.predict(X_test)
80
+
81
+ @st.cache()
82
+
83
+ # Defining the function which will make the prediction using the data which the user inputs
84
+
85
+
86
+ def prediction(Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range):
87
+ pred = None
88
+
89
+ if Has_Table_booking == "No":
90
+ Has_Table_booking = 0
91
+ else:
92
+ Has_Table_booking = 1
93
+
94
+ if Has_Online_delivery == "No":
95
+ Has_Online_delivery = 0
96
+ else:
97
+ Has_Online_delivery = 1
98
+
99
+
100
+ # Making predictions
101
+ pred_inputs = XGB.predict(pd.DataFrame([[Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range]]))
102
+
103
+
104
+ if pred_inputs[0] <= 2:
105
+ pred = 'It is a Low Rated Restaurant.'
106
+ elif ((pred_inputs[0] >= 3) and (pred_inputs[0] <= 4)):
107
+ pred = 'It is a Decent Rated Restaurant'
108
+ elif pred_inputs[0] >= 4:
109
+ pred = 'It is a High Rated Restaurant'
110
+
111
+ return pred
112
+
113
+
114
+
115
+
116
+ def main():
117
+ # front end elements of the web page
118
+ html_temp = """
119
+ <div style ="background-color:orange;padding:13px">
120
+ <h1 style ="color:black;text-align:center;"> Restaurant Rating
121
+ Prediction App</h1>
122
+ <h8 style ="color:black;text-align:center;"> The data from an online food app,
123
+ which needs assistance in predicting the future success or failure of a business (restaurant),
124
+ has been used in this case study. Such that they can choose whether to delete the restaurant
125
+ from their app or keep it. They have provided information from of 8643 eateries from different
126
+ states of India that are currently accessible on their app. It contains details about the
127
+ restaurants, including the overall rating. Below I have developed a machine learning model
128
+ that can predict a restaurant's rating based on its attributes.</h8>
129
+ </div>
130
+ """
131
+
132
+ # Display dataset when check box is ON
133
+ if st.checkbox('View dataset in table data format'):
134
+ st.dataframe(RtData)
135
+
136
+ # display the front end aspect
137
+ st.markdown(html_temp, unsafe_allow_html = True)
138
+
139
+ # following lines create boxes in which user can enter data required to make prediction
140
+ Votes = st.number_input("No. of Votes (Range between 0 to 2500)")
141
+ Average_Cost_for_two= st.number_input("Cost of 2 person between 50 to 8000 (Indian Rupees(Rs.))")
142
+ Price_range = st.number_input("Price Range between 1(Inexpensive) to 4(Most Expensive)")
143
+ Has_Table_booking= st.selectbox(' Has Table Booking',("Yes","No"))
144
+ Has_Online_delivery= st.selectbox(' Has Online Delivery',("Yes","No"))
145
+ result =""
146
+
147
+
148
+ # when 'Predict' is clicked, make the prediction and store it
149
+ if st.button("Predict"):
150
+ result = prediction(Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range)
151
+ st.success('Final Decision: {}'.format(result))
152
+
153
+
154
+ if __name__=='__main__':
155
+ main()
156
+