didev007 commited on
Commit
693c50e
1 Parent(s): 24be49a

Upload 8 files

Browse files
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import eda
4
+ import prediction
5
+
6
+ page = st.sidebar.selectbox("choose page: ", ("Home page","Data exploration","Data Prediction"))
7
+
8
+ if page == "Home page":
9
+ st.title("Default Payment Prediction")
10
+ st.write("Name :Dicky Gabriel")
11
+ st.write("Batch :SBY-002")
12
+ st.write("Objective : Predict Default Payment")
13
+ elif page == "Data exploration":
14
+ eda.run()
15
+ else:
16
+ prediction.run()
best_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9acb821bd4b7e6e41b2e2acb705449faadef9229d54078f5ac2a49ecd9fe0b2
3
+ size 72927667
eda.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from matplotlib.ticker import MultipleLocator
7
+
8
+ def run():
9
+ st.title("Flight Passenger Satisfaction Prediction")
10
+
11
+ st.subheader("Analysis Data for Flight Passenger")
12
+
13
+ st.write("This page made by Dicky Gabriel")
14
+ st.markdown("---")
15
+
16
+ df = pd.read_csv("Fligh_satification.csv")
17
+ st.dataframe(df)
18
+ df.columns = df.columns.str.lower()
19
+
20
+ st.write("## Passenger Satisfaction")
21
+ sns.set_style("whitegrid")
22
+ fig_pie, ax_pie = plt.subplots(figsize=(6, 6))
23
+ ax_pie.pie(df.satisfaction.value_counts(), labels=["Neutral or dissatisfied", "Satisfied"], autopct='%1.1f%%')
24
+ ax_pie.set_title('Perbandingan kolom satisfied dan neutral or disastified')
25
+ st.pyplot(fig_pie)
26
+
27
+ st.write("## Age Histogram")
28
+ fig_hist, ax_hist = plt.subplots(figsize=(20, 20))
29
+ ax_hist.minorticks_on()
30
+ ax_hist.xaxis.set_minor_locator(MultipleLocator(5))
31
+ ax_hist.yaxis.set_minor_locator(MultipleLocator(100))
32
+ ax_hist.set_title('Ages Histogram', size=20, fontweight='bold', y=1.04)
33
+
34
+ sns.histplot(x='age', data=df, edgecolor='black', kde=True, line_kws={'lw': 1, 'linestyle': '--'}, ax=ax_hist)
35
+
36
+ ax_hist.set_xlabel('Age', size=15)
37
+ ax_hist.set_ylabel('Count', size=15)
38
+ st.pyplot(fig_hist)
39
+
40
+ st.write("## Scatterplot of age and satisfaction")
41
+ fig_scatter, ax_scatter = plt.subplots(figsize=(20, 20))
42
+ ax_scatter.minorticks_on()
43
+ ax_scatter.xaxis.set_minor_locator(MultipleLocator(5))
44
+ ax_scatter.yaxis.set_minor_locator(MultipleLocator(100))
45
+ ax_scatter.set_title('Ages Histogram with Satisfaction', size=20, fontweight='bold', y=1.04)
46
+
47
+ sns.histplot(x='age', data=df, edgecolor='black', hue="satisfaction", kde=True,
48
+ line_kws={'lw': 1, 'linestyle': '--'}, ax=ax_scatter)
49
+
50
+ ax_scatter.set_xlabel('Age', size=15)
51
+ ax_scatter.set_ylabel('Count', size=15)
52
+ st.pyplot(fig_scatter)
53
+
54
+ st.write("## plane type")
55
+ fig_plane_type = plt.figure(figsize=(10, 5), dpi=200)
56
+
57
+ ax_plane_type = sns.countplot(x='type of travel', hue='satisfaction', data=df)
58
+ ax_plane_type.set_title('type of travel', size=15)
59
+ ax_plane_type.legend(fontsize='10')
60
+ plt.xlabel('')
61
+ plt.ylabel('Count', size=15)
62
+
63
+ st.write("## plane type")
64
+ fig_plane_type = plt.figure(figsize=(10, 5), dpi=200)
65
+
66
+ ax_plane_type = sns.countplot(x='type of travel', hue='satisfaction', data=df)
67
+ ax_plane_type.set_title('type of travel', size=15)
68
+ ax_plane_type.legend(fontsize='10')
69
+ plt.xlabel('')
70
+ plt.ylabel('Count', size=15)
71
+
72
+
73
+ for p in ax_plane_type.patches:
74
+ ax_plane_type.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
75
+ ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
76
+ textcoords='offset points')
77
+
78
+ st.pyplot(fig_plane_type)
79
+
80
+ st.write("## Class type")
81
+ fig_class_type = plt.figure(figsize=(10,5),dpi=200)
82
+
83
+ ax=sns.countplot(x='class',hue='satisfaction',data=df)
84
+ ax.set_title('class',size=15)
85
+ ax.legend(fontsize='10')
86
+ plt.xlabel('')
87
+ plt.ylabel('Count',size=15)
88
+
89
+ for p in ax.patches:
90
+ ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
91
+ ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
92
+ textcoords='offset points')
93
+
94
+ plt.show()
95
+
96
+ st.pyplot(fig_class_type)
97
+
98
+ if __name__ == "__main__":
99
+ run()
list_cat_nominal_columns.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["type of travel", "generation"]
list_cat_ordinal_columns.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["class", "inflight wifi service", "ease of online booking", "food and drink", "online boarding", "seat comfort", "inflight entertainment", "on-board service", "leg room service", "baggage handling", "checkin service", "inflight service", "cleanliness"]
list_num_columns.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["flight distance"]
prediction.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import json
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+ with open("list_num_columns.txt", 'r') as file_1:
8
+ list_num_skew_columns = json.load(file_1)
9
+
10
+ with open("list_cat_nominal_columns.txt", "r") as file_2:
11
+ nom_col_skew = json.load(file_2)
12
+
13
+ with open("list_cat_ordinal_columns.txt", "r") as file_3:
14
+ ord_col_skew = json.load(file_3)
15
+
16
+ with open("best_pipeline.pkl", "rb") as file_4:
17
+ best_pipeline = pickle.load(file_4)
18
+
19
+ def run():
20
+ # create form
21
+ with st.form("form"):
22
+ age = st.number_input("age",
23
+ min_value= 5,
24
+ max_value= 90,
25
+ value=30,
26
+ step=2)
27
+ flight_distance = st.number_input("flight distance",
28
+ min_value= 31,
29
+ max_value= 5000,
30
+ value=850,
31
+ step=10)
32
+ departure_delay_in_minutes = st.number_input("departure delay in minutes",
33
+ min_value= 0,
34
+ max_value= 1600,
35
+ value=200,
36
+ step=10)
37
+ arrival_delay_in_minutes = st.number_input("arrival delay in minutes",
38
+ min_value= 0,
39
+ max_value= 1600,
40
+ value=200,
41
+ step=10)
42
+
43
+ st.markdown("---")
44
+ gender = st.radio("gender",("Male","Female"),index= 0)
45
+ customer_type = st.radio("customer type",("Loyal customer","disloyal customer"),index= 0)
46
+ type_of_travel = st.radio("type of travel",('Personal Travel', 'Business travel'),index= 0)
47
+ class_flight = st.radio("class flight",('Eco Plus', 'Business', 'Eco'),index= 0)
48
+
49
+ st.markdown("---")
50
+ inflight_wifi_service = st.radio("inflight_wifi_service",(0,1,2,3,4,5),index= 0)
51
+ departure_arrival_time_convenient = st.radio("departure/arrival_time_convenient",(0,1,2,3,4,5),index= 0)
52
+ ease_of_online_booking = st.radio("ease_of_online_booking",(0,1,2,3,4,5),index= 0)
53
+ gate_location = st.radio("gate_location",(0,1,2,3,4,5),index= 0)
54
+ food_and_drink = st.radio("food_and_drink",(0,1,2,3,4,5),index= 0)
55
+ online_boarding = st.radio("online_boarding",(0,1,2,3,4,5),index= 0)
56
+ seat_comfort = st.radio("seat_comfort",(0,1,2,3,4,5),index= 0)
57
+ inflight_entertainment = st.radio("inflight_entertainment",(0,1,2,3,4,5),index= 0)
58
+ on_board_service = st.radio("on_board_service",(0,1,2,3,4,5),index= 0)
59
+ leg_room_service = st.radio("leg_room_service",(0,1,2,3,4,5),index= 0)
60
+ baggage_handling = st.radio("baggage_handling",(0,1,2,3,4,5),index= 0)
61
+ checkin_service = st.radio("checkin_service",(0,1,2,3,4,5),index= 0)
62
+ inflight_service = st.radio("inflight_service",(0,1,2,3,4,5),index= 0)
63
+ cleanliness = st.radio("cleanliness",(0,1,2,3,4,5),index= 0)
64
+ st.markdown("---")
65
+
66
+ submitted = st.form_submit_button("predict")
67
+
68
+ data_inf = {
69
+ "gender" : gender,
70
+ "customer type" : customer_type,
71
+ "age" : age,
72
+ "type of travel" : type_of_travel,
73
+ "class" : class_flight,
74
+ "inflight wifi service" : inflight_wifi_service,
75
+ "departure/arrival time convenient" : departure_arrival_time_convenient,
76
+ "ease of online booking" : ease_of_online_booking,
77
+ "gate location" : gate_location,
78
+ "food and drink" : food_and_drink,
79
+ "online boarding" : online_boarding,
80
+ "seat comfort" : seat_comfort,
81
+ "inflight entertainment" : inflight_entertainment,
82
+ "on-board service" : on_board_service,
83
+ "leg room service" : leg_room_service,
84
+ "baggage handling" : baggage_handling,
85
+ "checkin service" : checkin_service,
86
+ "inflight service" : inflight_service,
87
+ "cleanliness" : cleanliness,
88
+ "flight distance" : flight_distance,
89
+ "departure delay in minutes" : departure_delay_in_minutes,
90
+ "arrival delay in minutes" : arrival_delay_in_minutes
91
+ }
92
+
93
+ data_inf = pd.DataFrame([data_inf])
94
+ st.dataframe(data_inf)
95
+
96
+
97
+ age_category = []
98
+ for x in data_inf["age"]:
99
+ if 6 <= x <= 21:
100
+ age_category.append('Generation z')
101
+ elif 22 <= x <= 36:
102
+ age_category.append('Millennials')
103
+ elif 37 <= x <= 52:
104
+ age_category.append('Generation X')
105
+ elif 53 <= x <= 73:
106
+ age_category.append('Baby Boomers')
107
+ else:
108
+ age_category.append('Silent Generation')
109
+
110
+ data_inf["generation"] = age_category
111
+
112
+ if submitted:
113
+ data_inf_num_skew = data_inf[list_num_skew_columns]
114
+ data_inf_cat_nom = data_inf[nom_col_skew]
115
+ data_inf_cat_ord = data_inf[ord_col_skew]
116
+ y_predict_inf = best_pipeline.predict(data_inf)
117
+
118
+ st.write("# Satisfaction: ", str(y_predict_inf[0]))
119
+
120
+ if __name__=="__main__":
121
+ run()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ plotly
7
+ scikit-learn