Gigisghifari commited on
Commit
11714f4
1 Parent(s): 8826a17

Upload 6 files

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. Airplane Dataset.csv +3 -0
  3. app.py +11 -0
  4. eda.py +131 -0
  5. model_xgb.pkl +3 -0
  6. predict.py +91 -0
  7. requirements.txt +7 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Airplane[[:space:]]Dataset.csv filter=lfs diff=lfs merge=lfs -text
Airplane Dataset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a15fab6d36a6123dedb035e1bc11b31f27b547671ce8d27762e74d96527c364f
3
+ size 12193089
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import predict
4
+
5
+ navigation = st.sidebar.selectbox('Pilih Halaman:', ('EDA','Predict'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ predict.run()
11
+
eda.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+
7
+ st.set_page_config(
8
+ page_title = 'Airline Survey - EDA',
9
+ layout='wide',
10
+ initial_sidebar_state='expanded'
11
+ )
12
+
13
+ def run():
14
+ # Making the title
15
+ st.title('Airline Survey Prediction Model')
16
+
17
+ # making the Subheader
18
+ st.subheader('Exploratory Data Analysis for the result of the Airline Survey')
19
+
20
+ # adding picture
21
+ st.image('https://static.vecteezy.com/system/resources/thumbnails/015/400/665/small/flying-plane-above-the-clouds-aircraft-in-the-sky-travel-concept-illustration-for-advertising-airline-website-to-search-for-air-tickets-travel-agency-traveling-flyer-banner-illustration-vector.jpg',
22
+ caption= 'Airline Banner - source from google')
23
+
24
+ # adding Deskripsi
25
+ st.write('-'*50)
26
+ st.write('Milestone 2')
27
+ st.write('Nama : Achmad Abdillah Ghifari')
28
+ st.write('Batch : BSD-006')
29
+ st.write('-'*50)
30
+ st.write('### OBJECTIVE')
31
+ st.write('we want to create a prediction model in order to help the airline predict the satisfaction of a customer after using their services to find out whether the customer will leave a positive or negative comment. this exploratory data analysis is done in order help improve the services of the airline in order to achieve an overall 90% customer satisfaction score. This is done by finding factors that affect a customer satisfaction and improving it which will increase customer retention and positive comment which in turn will bring more customers and profits for the airline.')
32
+ st.write('**(Please use the submenu on the left to navigate to the relevant feature that has been explored using exploratory data analysis)**')
33
+ st.write('-'*50)
34
+ data = pd.read_csv('Airplane Dataset.csv')
35
+ data = data.drop('Unnamed: 0', axis=1)
36
+ data.satisfaction = data.satisfaction.replace('neutral or dissatisfied', 0)
37
+ data.satisfaction = data.satisfaction.replace('satisfied', 1)
38
+
39
+ submenu = st.sidebar.selectbox('Submenu',['Survey Data','Flight Distance','Customer Type', 'Age', 'Class', 'Type of Travel', 'satisfaction'])
40
+ if submenu=="Survey Data":
41
+ st.write('## Data Info')
42
+ with st.expander("Descriptive Table"):
43
+ st.subheader("Data Description")
44
+ st.dataframe(data)
45
+ st.write(f"Shape of DataFrame is:- {data.shape}")
46
+ with st.expander("Data Types"):
47
+ st.write('## Data Types')
48
+ st.dataframe(data.dtypes)
49
+ with st.expander("Descriptive Table"):
50
+ st.write('## Descriptive Table')
51
+ data = data.drop(['id','Gender','Customer Type','Type of Travel','Class'], axis=1)
52
+ data_describe = data.describe(include='all').fillna("").astype("str")
53
+ st.write(data_describe)
54
+
55
+ elif submenu=='Flight Distance':
56
+ st.write('### Histogram of Flight Distance')
57
+ fig = plt.figure(figsize=[15,5])
58
+ sns.histplot(data['Flight Distance'], kde=True, bins = 30)
59
+ st.pyplot(fig)
60
+ with st.expander("Insight"):
61
+ st.write('1. The distribution of the flight distance is highly positively skewed. meaning that the airline customer usually travel short distance')
62
+ st.write('2. most of the airline customers travel around 300 KM. from this data we can infer that most travel happen domestically')
63
+ st.write('3. customer rarely travel above 3000 km. this means that while the airline mostly does domestic travle they also do international flight but a bit rarely')
64
+ st.write('from the insight of this data we can see that the airline customer mostly travel a small flight rather than long flight. from this data, we can conclude that most of the airline data is mostly domestic flight with only some international flight.')
65
+
66
+ st.write('### Barchart of Flight Distance and Satisfaction')
67
+ fig = plt.figure(figsize=[15,5])
68
+ sns.barplot(data=data, x='satisfaction', y='Flight Distance',)
69
+ plt.legend(labels=['0 = neutral or dissatisfaction', '1 = satisfaction'])
70
+ st.pyplot(fig)
71
+ with st.expander("Insight"):
72
+ st.write('1. customer with higher flight distance (internationally) at an average of 1500km usually is satisfied with the airline services')
73
+ st.write('2. customer with lower flight distance (domestically) at an average of 900km usually is not satisfied with the airline services')
74
+ st.write('from the insight of this data we can conclude that customer is far more satisfied on long flight distance or international flight rather than shorter distance or domestic flight. we can infer that this happens as the customers that travel internationally usually fly more and have experienced the quality of multiple aircraft compared to people that travel domestically.')
75
+
76
+ elif submenu=='Customer Type':
77
+ st.write('### Barchart of Customer Type and Satisfaction')
78
+ fig = plt.figure(figsize=[15,5])
79
+ sns.barplot(data=data, x='Customer Type', y='satisfaction',)
80
+ st.pyplot(fig)
81
+ with st.expander("Insight"):
82
+ st.write('1. loyal customer is usually satisfied with the airline services with an average of around 0.5 satisfaction')
83
+ st.write('2. disloyal customer is usually more dissatisfied with the airline Service with an average of around 0.25 satisfaction')
84
+ st.write('from the insight of this data we can conclude that loyal customer seems to be more satisfied with the airline Service compared to disloyal customer. This could happen as loyal customer can get benefits such as faster queueing and more baggage space.')
85
+
86
+ elif submenu=='Age':
87
+ st.write('### satisfaction of age')
88
+ Sex_Bal= data.groupby(['Age', 'satisfaction']).size().unstack(fill_value=0)
89
+ fig, ax = plt.subplots(figsize=[15, 5])
90
+ Sex_Bal.plot(kind='line', ax=ax)
91
+ st.pyplot(fig)
92
+ with st.expander("Insight"):
93
+ st.write('1. dissatisfied customer mostly come from customers around the age 25 and become significantly lower at age 40')
94
+ st.write('2. satisfied customer mostly come from customers around the age 40 and become significantly lower at age 60')
95
+ st.write('from the insight of this data we can conclude that most of the dissatisfied review come from younger customer around the age of 25 and significantly lower at the older age of 40 where people start to give more satisfied review. this could happen as younger audience might prioritize feauture such as online booking and wifi which older audience might not care about much and prefer feature such as comfortable seat and legroom.')
96
+
97
+ elif submenu=='Class':
98
+ st.write('### Barchart of Class and Satisfaction')
99
+ fig = plt.figure(figsize=[15,5])
100
+ sns.barplot(data=data, x='Class', y='satisfaction',)
101
+ st.pyplot(fig)
102
+ with st.expander("Insight"):
103
+ st.write('1. customer in business class significantly have higher satisfaction at an average satisfaction of 0.7 compared to both the economy class')
104
+ st.write('2. customer in economy and economy class have a lower satisfaction at around 0.2 to 0.25')
105
+ st.write('from the insight of this data we can conclude that people in business class usually have higher overall satisfaction compared to both economy class. this could happen as the airline business class usually have extra benefit such as a more comfortable chair, better food and drinks, priority baggage and queue, and more compared to economy or economy plus.')
106
+
107
+ elif submenu=='Type of Travel':
108
+ st.write('### The amount of Travel Type')
109
+ fig = plt.figure(figsize=[15,5])
110
+ data['Type of Travel'].value_counts().plot(kind='bar')
111
+ st.pyplot(fig)
112
+ with st.expander("Insight"):
113
+ st.write('1. most of the airline customer use the airline for business travel purpose with around 70000 customers')
114
+ st.write('2. only some of the airline customer use the airline for personal travel purpose with around 35000 customers')
115
+ st.write('from the insight of this data we can conclude that most people use the airline for business travel purposes and rarely uses them for personal travel purpose. This could happen because due to the airline high price, people might pick the airline if their work pay for their travel expense.')
116
+
117
+ elif submenu=='satisfaction':
118
+ st.write('### Pie chart of satisfaction')
119
+ fig = plt.figure(figsize=[30,10])
120
+ data['satisfaction'].value_counts().plot(kind='pie')
121
+ plt.legend(labels=['0 = neutral or dissatisfaction', '1 = satisfaction'])
122
+ st.pyplot(fig)
123
+ with st.expander("Insight"):
124
+ st.write('1. customer are mostly dissatisfied with the current service of the airline at a total of 56.67%')
125
+ st.write('2. despite most customers is dissatisfied with the airline service. the data is almost balanced between satisfied and dissatisfied')
126
+ st.write('from the insight of this data we can conclude that most of the airline consumers are dissatisfied with our current service. this could happen because as we can see from the flight history, most people travel domestically and most domestic flight customer is dissatisfied.')
127
+
128
+
129
+
130
+ if __name__ == '__main__':
131
+ run()
model_xgb.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15252ffb23408aca76c1cbc02aeb1b44fd9a030816da6b41c53be787e516df90
3
+ size 406177
predict.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import pickle
4
+ import streamlit as st
5
+ with open('model_xgb.pkl', 'rb') as file_1:
6
+ model = pickle.load(file_1)
7
+
8
+ def run():
9
+ with st.form("my_form"):
10
+ st.write("## Airplane Satisfaction Survey")
11
+ st.write('Dear valued passenger, Airline strive to give the best flight experience to our passenger and we would like to hear your experience flying with us. rest assured your feedback will help us enchance our service to provide the best flight experience. Thank you for taking the time to fill in this survey ')
12
+
13
+ st.write('## Personal Information')
14
+ id = st.number_input('What is your id number?', 0, 10000000000, 100, help= 'id number refer to the number on your identification card')
15
+ gender = st.selectbox('What is your gender?', ['Male', 'Female'], index=0, help= "please select the gender you identify as")
16
+ cust_type = st.selectbox('How many time have you fly with us this month?', ['less than 3 flights', 'more than or equal to 3 flights'], index=1, help= 'please select the amount of time you fly with us this month')
17
+ if cust_type == 'less than 3 flights':
18
+ customer = 'disloyal Customer'
19
+ else:
20
+ customer = 'Loyal Customer'
21
+ age = st.number_input('What is your age?', 18, 100, 30, help= 'please select your current age (age should be between 18-100 years)')
22
+ trav_type = st.selectbox('What is the purpose of your travel?', ['Business travel', 'Personal Travel'], index=1, help= 'please select the purpose of your trip')
23
+ fl_class = st.selectbox('What is the purpose of your travel?', ['Business', 'Eco', 'Eco Plus'], index=1, help= 'please select the travel class you choose for this flight')
24
+ distance = st.number_input('How long was your trip', 30, 5000, 850, help= 'please select the distance of your trip (if you dont know give estimates)')
25
+
26
+ st.write('## Satisfaction Survey')
27
+ wifi = st.slider('on a scale from 1 to 5, how satisfied are you with our inflight wifi service', 0, 5, 5, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
28
+ time = st.slider('on a scale from 1 to 5, how satisfied are you with our departure/arrival time convenient', 0, 5, 3, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied)')
29
+ on_booking = st.slider('on a scale from 1 to 5, how satisfied are you with our ease of online booking', 0, 5, 5, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
30
+ gate_loc = st.slider('on a scale from 1 to 5, how satisfied are you with our gate location distance', 0, 5, 3, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied)')
31
+ fnd = st.slider('on a scale from 1 to 5, how satisfied are you with our selection of inflight food and drink', 0, 5, 2, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
32
+ on_boarding = st.slider('on a scale from 1 to 5, how satisfied are you with our online boarding service', 0, 5, 4, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
33
+ seat_com = st.slider('on a scale from 1 to 5, how satisfied are you with our inflight seat comfortability', 0, 5, 1, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied)')
34
+ fl_entertainment = st.slider('on a scale from 1 to 5, how satisfied are you with our selection of inflight entertainment', 0, 5, 3, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
35
+ on_board = st.slider('on a scale from 1 to 5, how satisfied are you with our on-board service', 0, 5, 2, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
36
+ leg_room = st.slider('on a scale from 1 to 5, how satisfied are you with your seats legroom', 0, 5, 2, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied)')
37
+ bag_hand = st.slider('on a scale from 1 to 5, how satisfied are you with the handling of your baggage', 0, 5, 4, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
38
+ checkin = st.slider('on a scale from 1 to 5, how satisfied are you with our check-in process', 0, 5, 5, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied)')
39
+ inflight = st.slider('on a scale from 1 to 5, how satisfied are you with our selection of inflight service', 0, 5, 4, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied), pick 0 if you dont experience this service')
40
+ clean = st.slider('on a scale from 1 to 5, how satisfied are you with the cleanliness of our plane', 0, 5, 3, help='please select how satisfied are you from 1(very unsatisfied) to 5(very satisfied)')
41
+ dep_delay = st.number_input('Did you experience any delay on your departure? if so how long (in minutes)', 0, 2000, 0, help= 'please input the amount of departure delay you experienced in minutes (if no delay please input 0, if delay more than 2000 minute input 2000)')
42
+ arr_delay = st.number_input('Did you experience any delay on your arrival? if so how long (in minutes)', 0, 2000, 0, help= 'please input the amount of departure delay you experienced in minutes (if no delay please input 0, if delay more than 2000 minute input 2000)')
43
+
44
+ submitted = st.form_submit_button("Submit")
45
+
46
+ data_inf = {
47
+ 'id': id,
48
+ 'Gender': gender,
49
+ 'Customer Type': customer,
50
+ 'Age': age,
51
+ 'Type Travel': trav_type,
52
+ 'Class': fl_class,
53
+ 'Flight Distance': distance,
54
+ 'Wifi': wifi,
55
+ 'Time Convenient': time,
56
+ 'Online Booking': on_booking,
57
+ 'Gate location': gate_loc,
58
+ 'Food and drink': fnd,
59
+ 'Online boarding': on_boarding,
60
+ 'Seat comfort': seat_com,
61
+ 'Inflight entertainment': fl_entertainment,
62
+ 'On-board service': on_board,
63
+ 'Leg room service': leg_room,
64
+ 'Baggage handling': bag_hand,
65
+ 'Checkin service': checkin,
66
+ 'Inflight service': inflight,
67
+ 'Cleanliness': clean,
68
+ 'Departure Delay': dep_delay,
69
+ 'Arrival Delay': arr_delay,
70
+ }
71
+
72
+ data_inf = pd.DataFrame([data_inf])
73
+
74
+ st.write('-'*50)
75
+ st.write('(Hidden from users)')
76
+ st.write('# Result')
77
+ if submitted:
78
+ result= model.predict(data_inf)
79
+ for i in result:
80
+ if i == 0:
81
+ st.write('### customer is more likely to give a negative comment')
82
+ elif i == 1:
83
+ st.write('### customer is more likely to give a positive comment')
84
+ else:
85
+ st.write('### no data has been inputed')
86
+ st.write('-'*50)
87
+
88
+ if __name__ =='__main__':
89
+ run()
90
+
91
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ numpy
6
+ scikit-learn
7
+ plotly