imamzarkasie commited on
Commit
65e6719
1 Parent(s): 241fb10

Upload 15 files

Browse files
__pycache__/eda.cpython-39.pyc ADDED
Binary file (3.35 kB). View file
 
__pycache__/prediction.cpython-39.pyc ADDED
Binary file (1.82 kB). View file
 
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict Taxi Price'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
eda.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+ st.set_page_config(
9
+ page_title = 'NYC TAXI - EDA',
10
+ layout = 'wide',
11
+ initial_sidebar_state = 'expanded'
12
+ )
13
+
14
+ def run():
15
+ # Membuat Title
16
+ st.title('NYC Taxi Price Prediction')
17
+
18
+ # Membuat Sub Header
19
+ st.subheader('EDA for NYC Yellow Taxi Analysis')
20
+
21
+ # Menambahkan Gambar
22
+ image = Image.open('taxi.jpeg')
23
+ st.image(image, caption='NYC Taxi')
24
+
25
+ # Menambahkan Deskripsi
26
+ st.write('This page created by **Imam Zarkasie**')
27
+ st.write('### Hello!')
28
+ st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.')
29
+ st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City')
30
+
31
+
32
+ # Membuat Garis Lurus
33
+ st.markdown('---')
34
+
35
+ # Magic Syntax
36
+ '''
37
+ On this page, the author will do a simple exploration.
38
+ The dataset used is the NYC Yellow Taxi dataset.
39
+ This dataset comes from the website Google BigQuery.
40
+ '''
41
+
42
+ # Show DataFrame
43
+ data = pd.read_csv('taxi_dataset.csv')
44
+ st.dataframe(data)
45
+
46
+
47
+ st.write('#### Some description for features that have a class:')
48
+ st.write('##### trip_type:')
49
+ st.write('##### 1: Standard Rate')
50
+ st.write('##### 2: JFK Airport and Others')
51
+ st.write('##### payment_type:')
52
+ st.write('##### 1: Credit Card')
53
+ st.write('##### 2: Cash')
54
+
55
+ #Make price histogram plots and scatter plots to compare
56
+ fig = plt.figure(figsize=(15, 5))
57
+ plt.subplot(1, 2, 1)
58
+ sns.histplot(data['trip_price'], kde=True, bins=30)
59
+ plt.title('Histogram of trip_price')
60
+
61
+ plt.subplot(1, 2, 2)
62
+ sns.scatterplot(x='trip_distance', y='trip_price', data=data)
63
+ plt.title('trip_distance vs trip_price')
64
+
65
+ st.pyplot(fig)
66
+
67
+ # Create the payment_type pie plot
68
+
69
+ # Convert 'payment_type' column to int if it's not already
70
+ data['payment_type'] = data['payment_type'].astype(int)
71
+
72
+ # Calculate the counts of each payment type
73
+ method_payment_type = data['payment_type']
74
+ method_counts_payment_type = method_payment_type.value_counts()
75
+
76
+ fig, ax = plt.subplots(figsize=(8, 3))
77
+ method_counts_payment_type.plot(kind='pie',
78
+ autopct='%1.1f%%',
79
+ startangle=90,
80
+ shadow=True,
81
+ ax=ax)
82
+ plt.title('Pie Plot User Payment Type')
83
+ plt.axis('equal')
84
+
85
+ # Menambahkan legend dengan keterangan untuk setiap kelas
86
+ labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'}
87
+ plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right')
88
+
89
+ plt.tight_layout()
90
+ st.pyplot(fig)
91
+
92
+ #Create pie plot trip_type
93
+ method_trip_type = data['trip_type']
94
+ method_counts_trip_type = method_trip_type.value_counts()
95
+
96
+ fig, ax = plt.subplots(figsize=(8, 3))
97
+ method_counts_trip_type.plot(kind='pie',
98
+ autopct='%1.1f%%',
99
+ startangle=90,
100
+ shadow=True,
101
+ ax=ax)
102
+ plt.title('Pie Plot User Trip Type')
103
+ plt.axis('equal')
104
+
105
+ # Menambahkan legend dengan keterangan untuk setiap kelas
106
+ labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'}
107
+ plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right')
108
+
109
+ st.pyplot(fig)
110
+
111
+ #Passenger Count Histogram
112
+ passenger_count = data['passenger_count']
113
+ unique_counts = passenger_count.unique()
114
+
115
+ n_bins = 10
116
+ colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow']
117
+ bar_width = 0.8
118
+
119
+ fig =plt.figure(figsize=(8, 2))
120
+
121
+ for i, count in enumerate(unique_counts):
122
+ counts = passenger_count[passenger_count == count]
123
+ plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width)
124
+
125
+ plt.title('Histogram of Passenger Count')
126
+ plt.xlabel('Passenger Count')
127
+ plt.ylabel('Frequency')
128
+
129
+ st.pyplot(fig)
130
+
131
+ if __name__=='__main__':
132
+ run()
list_cat_cols_taxi.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["trip_type"]
list_num_cols_taxi.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["trip_distance", "passenger_count"]
model_encoder_taxi.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34cafa737dd2c38063a8b6540e20a350f2c6169a7c0454556cb42c4331e890f2
3
+ size 606
model_lin_reg_ord.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e0530112a9d87ac767755541fd59fa3381ac7dbd8d38ee8ba7ca502288f7fb4
3
+ size 584
model_scaler_taxi.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5b7178b3542a8ed37ccbf4f0ed593a2f2cae42058478462a2cedc71e7543f9
3
+ size 682
pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af6d989d67b1af9a44d570a268d7daf7a090efd42821e66b5583d64ec33066f
3
+ size 2837
prediction.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ from sklearn.preprocessing import StandardScaler, OrdinalEncoder
6
+ import json
7
+
8
+
9
+ # Load All Files
10
+ with open('model_lin_reg_ord.pkl', 'rb') as file_1:
11
+ model_lin_reg = pickle.load(file_1)
12
+
13
+ with open('model_scaler_taxi.pkl', 'rb') as file_2:
14
+ model_scaler = pickle.load(file_2)
15
+
16
+ with open('model_encoder_taxi.pkl','rb') as file_3:
17
+ model_encoder = pickle.load(file_3)
18
+
19
+ with open('list_num_cols_taxi.txt', 'r') as file_4:
20
+ list_num_cols = json.load(file_4)
21
+
22
+ with open('list_cat_cols_taxi.txt', 'r') as file_5:
23
+ list_cat_cols = json.load(file_5)
24
+
25
+
26
+ def run():
27
+ with st.form(key='from_taxi_nyc'):
28
+ trip_distance = st.number_input('Trip Distance', min_value=0, max_value=100000000, value=0)
29
+ passenger_count = st.number_input('Passenger Count', min_value=0, max_value=20, value=0)
30
+ trip_type = st.selectbox('Trip Type', ('Standard Rate', 'JFK Airport and Others'), index=1)
31
+
32
+ submitted = st.form_submit_button('Predict')
33
+
34
+ data_inf = {
35
+ 'trip_distance': float(trip_distance),
36
+ 'passenger_count': float(passenger_count),
37
+ 'trip_type': trip_type
38
+ }
39
+
40
+ data_inf = pd.DataFrame([data_inf])
41
+ st.dataframe(data_inf)
42
+
43
+
44
+ if submitted:
45
+
46
+ # Split between Numerical Columns and Categorical Columns
47
+ data_inf_num = data_inf[list_num_cols]
48
+ data_inf_cat = data_inf[list_cat_cols]
49
+
50
+ # Feature Scaling and Feature Encoding
51
+ data_inf_num_scaled = model_scaler.transform(data_inf_num)
52
+ data_inf_cat_encoded = model_encoder.transform(data_inf_cat)
53
+ data_inf_final = np.concatenate([data_inf_num_scaled, data_inf_cat_encoded], axis=1)
54
+
55
+ # Predict using Linear Regression
56
+ y_pred_inf = model_lin_reg.predict(data_inf_final)
57
+
58
+ st.write('# Price : ', str(int(y_pred_inf)))
59
+
60
+ if __name__ == '__main__':
61
+ run()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ Pillow
6
+ plotly
7
+ scikit-learn==1.2.2
8
+ streamlit as st
9
+ numpy
taxi.jpeg ADDED
taxi_dataset.csv ADDED
The diff for this file is too large to render. See raw diff