Spaces:
Sleeping
Sleeping
imamzarkasie
commited on
Commit
•
65e6719
1
Parent(s):
241fb10
Upload 15 files
Browse files- __pycache__/eda.cpython-39.pyc +0 -0
- __pycache__/prediction.cpython-39.pyc +0 -0
- app.py +10 -0
- eda.py +132 -0
- list_cat_cols_taxi.txt +1 -0
- list_num_cols_taxi.txt +1 -0
- model_encoder_taxi.pkl +3 -0
- model_lin_reg_ord.pkl +3 -0
- model_scaler_taxi.pkl +3 -0
- pipeline.pkl +3 -0
- prediction.py +61 -0
- requirements.txt +9 -0
- taxi.jpeg +0 -0
- taxi_dataset.csv +0 -0
__pycache__/eda.cpython-39.pyc
ADDED
Binary file (3.35 kB). View file
|
|
__pycache__/prediction.cpython-39.pyc
ADDED
Binary file (1.82 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict Taxi Price'))
|
6 |
+
|
7 |
+
if navigation == 'EDA':
|
8 |
+
eda.run()
|
9 |
+
else:
|
10 |
+
prediction.run()
|
eda.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_title = 'NYC TAXI - EDA',
|
10 |
+
layout = 'wide',
|
11 |
+
initial_sidebar_state = 'expanded'
|
12 |
+
)
|
13 |
+
|
14 |
+
def run():
|
15 |
+
# Membuat Title
|
16 |
+
st.title('NYC Taxi Price Prediction')
|
17 |
+
|
18 |
+
# Membuat Sub Header
|
19 |
+
st.subheader('EDA for NYC Yellow Taxi Analysis')
|
20 |
+
|
21 |
+
# Menambahkan Gambar
|
22 |
+
image = Image.open('taxi.jpeg')
|
23 |
+
st.image(image, caption='NYC Taxi')
|
24 |
+
|
25 |
+
# Menambahkan Deskripsi
|
26 |
+
st.write('This page created by **Imam Zarkasie**')
|
27 |
+
st.write('### Hello!')
|
28 |
+
st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.')
|
29 |
+
st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City')
|
30 |
+
|
31 |
+
|
32 |
+
# Membuat Garis Lurus
|
33 |
+
st.markdown('---')
|
34 |
+
|
35 |
+
# Magic Syntax
|
36 |
+
'''
|
37 |
+
On this page, the author will do a simple exploration.
|
38 |
+
The dataset used is the NYC Yellow Taxi dataset.
|
39 |
+
This dataset comes from the website Google BigQuery.
|
40 |
+
'''
|
41 |
+
|
42 |
+
# Show DataFrame
|
43 |
+
data = pd.read_csv('taxi_dataset.csv')
|
44 |
+
st.dataframe(data)
|
45 |
+
|
46 |
+
|
47 |
+
st.write('#### Some description for features that have a class:')
|
48 |
+
st.write('##### trip_type:')
|
49 |
+
st.write('##### 1: Standard Rate')
|
50 |
+
st.write('##### 2: JFK Airport and Others')
|
51 |
+
st.write('##### payment_type:')
|
52 |
+
st.write('##### 1: Credit Card')
|
53 |
+
st.write('##### 2: Cash')
|
54 |
+
|
55 |
+
#Make price histogram plots and scatter plots to compare
|
56 |
+
fig = plt.figure(figsize=(15, 5))
|
57 |
+
plt.subplot(1, 2, 1)
|
58 |
+
sns.histplot(data['trip_price'], kde=True, bins=30)
|
59 |
+
plt.title('Histogram of trip_price')
|
60 |
+
|
61 |
+
plt.subplot(1, 2, 2)
|
62 |
+
sns.scatterplot(x='trip_distance', y='trip_price', data=data)
|
63 |
+
plt.title('trip_distance vs trip_price')
|
64 |
+
|
65 |
+
st.pyplot(fig)
|
66 |
+
|
67 |
+
# Create the payment_type pie plot
|
68 |
+
|
69 |
+
# Convert 'payment_type' column to int if it's not already
|
70 |
+
data['payment_type'] = data['payment_type'].astype(int)
|
71 |
+
|
72 |
+
# Calculate the counts of each payment type
|
73 |
+
method_payment_type = data['payment_type']
|
74 |
+
method_counts_payment_type = method_payment_type.value_counts()
|
75 |
+
|
76 |
+
fig, ax = plt.subplots(figsize=(8, 3))
|
77 |
+
method_counts_payment_type.plot(kind='pie',
|
78 |
+
autopct='%1.1f%%',
|
79 |
+
startangle=90,
|
80 |
+
shadow=True,
|
81 |
+
ax=ax)
|
82 |
+
plt.title('Pie Plot User Payment Type')
|
83 |
+
plt.axis('equal')
|
84 |
+
|
85 |
+
# Menambahkan legend dengan keterangan untuk setiap kelas
|
86 |
+
labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'}
|
87 |
+
plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right')
|
88 |
+
|
89 |
+
plt.tight_layout()
|
90 |
+
st.pyplot(fig)
|
91 |
+
|
92 |
+
#Create pie plot trip_type
|
93 |
+
method_trip_type = data['trip_type']
|
94 |
+
method_counts_trip_type = method_trip_type.value_counts()
|
95 |
+
|
96 |
+
fig, ax = plt.subplots(figsize=(8, 3))
|
97 |
+
method_counts_trip_type.plot(kind='pie',
|
98 |
+
autopct='%1.1f%%',
|
99 |
+
startangle=90,
|
100 |
+
shadow=True,
|
101 |
+
ax=ax)
|
102 |
+
plt.title('Pie Plot User Trip Type')
|
103 |
+
plt.axis('equal')
|
104 |
+
|
105 |
+
# Menambahkan legend dengan keterangan untuk setiap kelas
|
106 |
+
labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'}
|
107 |
+
plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right')
|
108 |
+
|
109 |
+
st.pyplot(fig)
|
110 |
+
|
111 |
+
#Passenger Count Histogram
|
112 |
+
passenger_count = data['passenger_count']
|
113 |
+
unique_counts = passenger_count.unique()
|
114 |
+
|
115 |
+
n_bins = 10
|
116 |
+
colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow']
|
117 |
+
bar_width = 0.8
|
118 |
+
|
119 |
+
fig =plt.figure(figsize=(8, 2))
|
120 |
+
|
121 |
+
for i, count in enumerate(unique_counts):
|
122 |
+
counts = passenger_count[passenger_count == count]
|
123 |
+
plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width)
|
124 |
+
|
125 |
+
plt.title('Histogram of Passenger Count')
|
126 |
+
plt.xlabel('Passenger Count')
|
127 |
+
plt.ylabel('Frequency')
|
128 |
+
|
129 |
+
st.pyplot(fig)
|
130 |
+
|
131 |
+
if __name__=='__main__':
|
132 |
+
run()
|
list_cat_cols_taxi.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["trip_type"]
|
list_num_cols_taxi.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["trip_distance", "passenger_count"]
|
model_encoder_taxi.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34cafa737dd2c38063a8b6540e20a350f2c6169a7c0454556cb42c4331e890f2
|
3 |
+
size 606
|
model_lin_reg_ord.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e0530112a9d87ac767755541fd59fa3381ac7dbd8d38ee8ba7ca502288f7fb4
|
3 |
+
size 584
|
model_scaler_taxi.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d5b7178b3542a8ed37ccbf4f0ed593a2f2cae42058478462a2cedc71e7543f9
|
3 |
+
size 682
|
pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4af6d989d67b1af9a44d570a268d7daf7a090efd42821e66b5583d64ec33066f
|
3 |
+
size 2837
|
prediction.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
|
6 |
+
import json
|
7 |
+
|
8 |
+
|
9 |
+
# Load All Files
|
10 |
+
with open('model_lin_reg_ord.pkl', 'rb') as file_1:
|
11 |
+
model_lin_reg = pickle.load(file_1)
|
12 |
+
|
13 |
+
with open('model_scaler_taxi.pkl', 'rb') as file_2:
|
14 |
+
model_scaler = pickle.load(file_2)
|
15 |
+
|
16 |
+
with open('model_encoder_taxi.pkl','rb') as file_3:
|
17 |
+
model_encoder = pickle.load(file_3)
|
18 |
+
|
19 |
+
with open('list_num_cols_taxi.txt', 'r') as file_4:
|
20 |
+
list_num_cols = json.load(file_4)
|
21 |
+
|
22 |
+
with open('list_cat_cols_taxi.txt', 'r') as file_5:
|
23 |
+
list_cat_cols = json.load(file_5)
|
24 |
+
|
25 |
+
|
26 |
+
def run():
|
27 |
+
with st.form(key='from_taxi_nyc'):
|
28 |
+
trip_distance = st.number_input('Trip Distance', min_value=0, max_value=100000000, value=0)
|
29 |
+
passenger_count = st.number_input('Passenger Count', min_value=0, max_value=20, value=0)
|
30 |
+
trip_type = st.selectbox('Trip Type', ('Standard Rate', 'JFK Airport and Others'), index=1)
|
31 |
+
|
32 |
+
submitted = st.form_submit_button('Predict')
|
33 |
+
|
34 |
+
data_inf = {
|
35 |
+
'trip_distance': float(trip_distance),
|
36 |
+
'passenger_count': float(passenger_count),
|
37 |
+
'trip_type': trip_type
|
38 |
+
}
|
39 |
+
|
40 |
+
data_inf = pd.DataFrame([data_inf])
|
41 |
+
st.dataframe(data_inf)
|
42 |
+
|
43 |
+
|
44 |
+
if submitted:
|
45 |
+
|
46 |
+
# Split between Numerical Columns and Categorical Columns
|
47 |
+
data_inf_num = data_inf[list_num_cols]
|
48 |
+
data_inf_cat = data_inf[list_cat_cols]
|
49 |
+
|
50 |
+
# Feature Scaling and Feature Encoding
|
51 |
+
data_inf_num_scaled = model_scaler.transform(data_inf_num)
|
52 |
+
data_inf_cat_encoded = model_encoder.transform(data_inf_cat)
|
53 |
+
data_inf_final = np.concatenate([data_inf_num_scaled, data_inf_cat_encoded], axis=1)
|
54 |
+
|
55 |
+
# Predict using Linear Regression
|
56 |
+
y_pred_inf = model_lin_reg.predict(data_inf_final)
|
57 |
+
|
58 |
+
st.write('# Price : ', str(int(y_pred_inf)))
|
59 |
+
|
60 |
+
if __name__ == '__main__':
|
61 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
Pillow
|
6 |
+
plotly
|
7 |
+
scikit-learn==1.2.2
|
8 |
+
streamlit as st
|
9 |
+
numpy
|
taxi.jpeg
ADDED
taxi_dataset.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|