Spaces:

imamzarkasie
/

Taxi-Price-Predict

Sleeping

App Files Files Community

imamzarkasie commited on May 25, 2023

Commit

65e6719

•

1 Parent(s): 241fb10

Upload 15 files

Browse files

Files changed (14) hide show

__pycache__/eda.cpython-39.pyc +0 -0
__pycache__/prediction.cpython-39.pyc +0 -0
app.py +10 -0
eda.py +132 -0
list_cat_cols_taxi.txt +1 -0
list_num_cols_taxi.txt +1 -0
model_encoder_taxi.pkl +3 -0
model_lin_reg_ord.pkl +3 -0
model_scaler_taxi.pkl +3 -0
pipeline.pkl +3 -0
prediction.py +61 -0
requirements.txt +9 -0
taxi.jpeg +0 -0
taxi_dataset.csv +0 -0

__pycache__/eda.cpython-39.pyc ADDED Viewed

Binary file (3.35 kB). View file

__pycache__/prediction.cpython-39.pyc ADDED Viewed

Binary file (1.82 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict Taxi Price'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

eda.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+st.set_page_config(
+    page_title = 'NYC TAXI - EDA',
+    layout = 'wide',
+    initial_sidebar_state = 'expanded'
+    )
+def run():
+    # Membuat Title
+    st.title('NYC Taxi Price Prediction')
+    # Membuat Sub Header
+    st.subheader('EDA for NYC Yellow Taxi Analysis')
+    # Menambahkan Gambar
+    image = Image.open('taxi.jpeg')
+    st.image(image, caption='NYC Taxi')
+    # Menambahkan Deskripsi
+    st.write('This page created by **Imam Zarkasie**')
+    st.write('### Hello!')
+    st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.')
+    st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City')
+    # Membuat Garis Lurus
+    st.markdown('---')
+    # Magic Syntax
+    '''
+    On this page, the author will do a simple exploration.
+    The dataset used is the NYC Yellow Taxi dataset.
+    This dataset comes from the website Google BigQuery.
+    '''
+    # Show DataFrame
+    data = pd.read_csv('taxi_dataset.csv')
+    st.dataframe(data)
+    st.write('####  Some description for features that have a class:')
+    st.write('#####  trip_type:')
+    st.write('#####  1: Standard Rate')
+    st.write('#####  2: JFK Airport and Others')
+    st.write('#####  payment_type:')
+    st.write('#####  1: Credit Card')
+    st.write('#####  2: Cash')
+    #Make price histogram plots and scatter plots to compare
+    fig = plt.figure(figsize=(15, 5))
+    plt.subplot(1, 2, 1)
+    sns.histplot(data['trip_price'], kde=True, bins=30)
+    plt.title('Histogram of trip_price')
+    plt.subplot(1, 2, 2)
+    sns.scatterplot(x='trip_distance', y='trip_price', data=data)
+    plt.title('trip_distance vs trip_price')
+    st.pyplot(fig)
+    # Create the payment_type pie plot
+    # Convert 'payment_type' column to int if it's not already
+    data['payment_type'] = data['payment_type'].astype(int)
+    # Calculate the counts of each payment type
+    method_payment_type = data['payment_type']
+    method_counts_payment_type = method_payment_type.value_counts()
+    fig, ax = plt.subplots(figsize=(8, 3))
+    method_counts_payment_type.plot(kind='pie',
+                                    autopct='%1.1f%%',
+                                    startangle=90,
+                                    shadow=True,
+                                    ax=ax)
+    plt.title('Pie Plot User Payment Type')
+    plt.axis('equal')
+    # Menambahkan legend dengan keterangan untuk setiap kelas
+    labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'}
+    plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right')
+    plt.tight_layout()
+    st.pyplot(fig)
+    #Create pie plot trip_type
+    method_trip_type = data['trip_type']
+    method_counts_trip_type = method_trip_type.value_counts()
+    fig, ax = plt.subplots(figsize=(8, 3))
+    method_counts_trip_type.plot(kind='pie',
+                                    autopct='%1.1f%%',
+                                    startangle=90,
+                                    shadow=True,
+                                    ax=ax)
+    plt.title('Pie Plot User Trip Type')
+    plt.axis('equal')
+    # Menambahkan legend dengan keterangan untuk setiap kelas
+    labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'}
+    plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right')
+    st.pyplot(fig)
+    #Passenger Count Histogram
+    passenger_count = data['passenger_count']
+    unique_counts = passenger_count.unique()
+    n_bins = 10
+    colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow']
+    bar_width = 0.8
+    fig =plt.figure(figsize=(8, 2))
+    for i, count in enumerate(unique_counts):
+        counts = passenger_count[passenger_count == count]
+        plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width)
+    plt.title('Histogram of Passenger Count')
+    plt.xlabel('Passenger Count')
+    plt.ylabel('Frequency')
+    st.pyplot(fig)
+if __name__=='__main__':
+    run()

list_cat_cols_taxi.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["trip_type"]

list_num_cols_taxi.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["trip_distance", "passenger_count"]

model_encoder_taxi.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34cafa737dd2c38063a8b6540e20a350f2c6169a7c0454556cb42c4331e890f2
+size 606

model_lin_reg_ord.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e0530112a9d87ac767755541fd59fa3381ac7dbd8d38ee8ba7ca502288f7fb4
+size 584

model_scaler_taxi.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d5b7178b3542a8ed37ccbf4f0ed593a2f2cae42058478462a2cedc71e7543f9
+size 682

pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4af6d989d67b1af9a44d570a268d7daf7a090efd42821e66b5583d64ec33066f
+size 2837

prediction.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+from sklearn.preprocessing import StandardScaler, OrdinalEncoder
+import json
+# Load All Files
+with open('model_lin_reg_ord.pkl', 'rb') as file_1:
+  model_lin_reg = pickle.load(file_1)
+with open('model_scaler_taxi.pkl', 'rb') as file_2:
+  model_scaler = pickle.load(file_2)
+with open('model_encoder_taxi.pkl','rb') as file_3:
+  model_encoder = pickle.load(file_3)
+with open('list_num_cols_taxi.txt', 'r') as file_4:
+  list_num_cols = json.load(file_4)
+with open('list_cat_cols_taxi.txt', 'r') as file_5:
+  list_cat_cols = json.load(file_5)
+def run():
+    with st.form(key='from_taxi_nyc'):
+        trip_distance = st.number_input('Trip Distance', min_value=0, max_value=100000000, value=0)
+        passenger_count = st.number_input('Passenger Count', min_value=0, max_value=20, value=0)
+        trip_type = st.selectbox('Trip Type', ('Standard Rate', 'JFK Airport and Others'), index=1)
+        submitted = st.form_submit_button('Predict')
+    data_inf = {
+        'trip_distance': float(trip_distance),
+        'passenger_count': float(passenger_count),
+        'trip_type': trip_type
+    }
+    data_inf = pd.DataFrame([data_inf])
+    st.dataframe(data_inf)
+    if submitted:
+        # Split between Numerical Columns and Categorical Columns
+        data_inf_num = data_inf[list_num_cols]
+        data_inf_cat = data_inf[list_cat_cols]
+        # Feature Scaling and Feature Encoding
+        data_inf_num_scaled = model_scaler.transform(data_inf_num)
+        data_inf_cat_encoded = model_encoder.transform(data_inf_cat)
+        data_inf_final = np.concatenate([data_inf_num_scaled, data_inf_cat_encoded], axis=1)
+        # Predict using Linear Regression
+        y_pred_inf = model_lin_reg.predict(data_inf_final)
+        st.write('# Price : ', str(int(y_pred_inf)))
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+pandas
+seaborn
+matplotlib
+Pillow
+plotly
+scikit-learn==1.2.2
+streamlit as st
+numpy

taxi.jpeg ADDED Viewed

taxi_dataset.csv ADDED Viewed

The diff for this file is too large to render. See raw diff