Gigisghifari
commited on
Commit
•
fe36b98
1
Parent(s):
628ad3c
Update eda.py
Browse files
eda.py
CHANGED
@@ -1,131 +1,127 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
import seaborn as sns
|
4 |
-
import matplotlib.pyplot as plt
|
5 |
-
import plotly.express as px
|
6 |
-
|
7 |
-
st.set_page_config(
|
8 |
-
page_title = 'Airline Survey - EDA',
|
9 |
-
layout='wide',
|
10 |
-
initial_sidebar_state='expanded'
|
11 |
-
)
|
12 |
-
|
13 |
-
def run():
|
14 |
-
# Making the title
|
15 |
-
st.title('Airline Survey Prediction Model')
|
16 |
-
|
17 |
-
# making the Subheader
|
18 |
-
st.subheader('Exploratory Data Analysis for the result of the Airline Survey')
|
19 |
-
|
20 |
-
# adding picture
|
21 |
-
st.image('https://static.vecteezy.com/system/resources/thumbnails/015/400/665/small/flying-plane-above-the-clouds-aircraft-in-the-sky-travel-concept-illustration-for-advertising-airline-website-to-search-for-air-tickets-travel-agency-traveling-flyer-banner-illustration-vector.jpg',
|
22 |
-
caption= 'Airline Banner - source from google')
|
23 |
-
|
24 |
-
# adding Deskripsi
|
25 |
-
st.write('-'*50)
|
26 |
-
st.write('
|
27 |
-
st.write('
|
28 |
-
st.write('
|
29 |
-
st.write('-'*50)
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
with st.expander("Data
|
43 |
-
st.
|
44 |
-
st.dataframe(data)
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
st.
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
st.
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
st.
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
st.
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
st.
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
st.
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
st.
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
if __name__ == '__main__':
|
131 |
run()
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
|
7 |
+
st.set_page_config(
|
8 |
+
page_title = 'Airline Survey - EDA',
|
9 |
+
layout='wide',
|
10 |
+
initial_sidebar_state='expanded'
|
11 |
+
)
|
12 |
+
|
13 |
+
def run():
|
14 |
+
# Making the title
|
15 |
+
st.title('Airline Survey Prediction Model')
|
16 |
+
|
17 |
+
# making the Subheader
|
18 |
+
st.subheader('Exploratory Data Analysis for the result of the Airline Survey')
|
19 |
+
|
20 |
+
# adding picture
|
21 |
+
st.image('https://static.vecteezy.com/system/resources/thumbnails/015/400/665/small/flying-plane-above-the-clouds-aircraft-in-the-sky-travel-concept-illustration-for-advertising-airline-website-to-search-for-air-tickets-travel-agency-traveling-flyer-banner-illustration-vector.jpg',
|
22 |
+
caption= 'Airline Banner - source from google')
|
23 |
+
|
24 |
+
# adding Deskripsi
|
25 |
+
st.write('-'*50)
|
26 |
+
st.write('### OBJECTIVE')
|
27 |
+
st.write('we want to create a prediction model in order to help the airline predict the satisfaction of a customer after using their services to find out whether the customer will leave a positive or negative comment. this exploratory data analysis is done in order help improve the services of the airline in order to achieve an overall 90% customer satisfaction score. This is done by finding factors that affect a customer satisfaction and improving it which will increase customer retention and positive comment which in turn will bring more customers and profits for the airline.')
|
28 |
+
st.write('**(Please use the submenu on the left to navigate to the relevant feature that has been explored using exploratory data analysis)**')
|
29 |
+
st.write('-'*50)
|
30 |
+
data = pd.read_csv('Airplane Dataset.csv')
|
31 |
+
data = data.drop('Unnamed: 0', axis=1)
|
32 |
+
data.satisfaction = data.satisfaction.replace('neutral or dissatisfied', 0)
|
33 |
+
data.satisfaction = data.satisfaction.replace('satisfied', 1)
|
34 |
+
|
35 |
+
submenu = st.sidebar.selectbox('Submenu',['Survey Data','Flight Distance','Customer Type', 'Age', 'Class', 'Type of Travel', 'satisfaction'])
|
36 |
+
if submenu=="Survey Data":
|
37 |
+
st.write('## Data Info')
|
38 |
+
with st.expander("Data Description"):
|
39 |
+
st.subheader("Data Description")
|
40 |
+
st.dataframe(data)
|
41 |
+
st.write(f"Shape of DataFrame is:- {data.shape}")
|
42 |
+
with st.expander("Data Types"):
|
43 |
+
st.write('## Data Types')
|
44 |
+
st.dataframe(data.dtypes)
|
45 |
+
with st.expander("Descriptive Table"):
|
46 |
+
st.write('## Descriptive Table')
|
47 |
+
data = data.drop(['id','Gender','Customer Type','Type of Travel','Class'], axis=1)
|
48 |
+
data_describe = data.describe(include='all').fillna("").astype("str")
|
49 |
+
st.write(data_describe)
|
50 |
+
|
51 |
+
elif submenu=='Flight Distance':
|
52 |
+
st.write('### Histogram of Flight Distance')
|
53 |
+
fig = plt.figure(figsize=[15,5])
|
54 |
+
sns.histplot(data['Flight Distance'], kde=True, bins = 30)
|
55 |
+
st.pyplot(fig)
|
56 |
+
with st.expander("Insight"):
|
57 |
+
st.write('1. The distribution of the flight distance is highly positively skewed. meaning that the airline customer usually travel short distance')
|
58 |
+
st.write('2. most of the airline customers travel around 300 KM. from this data we can infer that most travel happen domestically')
|
59 |
+
st.write('3. customer rarely travel above 3000 km. this means that while the airline mostly does domestic travle they also do international flight but a bit rarely')
|
60 |
+
st.write('from the insight of this data we can see that the airline customer mostly travel a small flight rather than long flight. from this data, we can conclude that most of the airline data is mostly domestic flight with only some international flight.')
|
61 |
+
|
62 |
+
st.write('### Barchart of Flight Distance and Satisfaction')
|
63 |
+
fig = plt.figure(figsize=[15,5])
|
64 |
+
sns.barplot(data=data, x='satisfaction', y='Flight Distance',)
|
65 |
+
plt.legend(labels=['0 = neutral or dissatisfaction', '1 = satisfaction'])
|
66 |
+
st.pyplot(fig)
|
67 |
+
with st.expander("Insight"):
|
68 |
+
st.write('1. customer with higher flight distance (internationally) at an average of 1500km usually is satisfied with the airline services')
|
69 |
+
st.write('2. customer with lower flight distance (domestically) at an average of 900km usually is not satisfied with the airline services')
|
70 |
+
st.write('from the insight of this data we can conclude that customer is far more satisfied on long flight distance or international flight rather than shorter distance or domestic flight. we can infer that this happens as the customers that travel internationally usually fly more and have experienced the quality of multiple aircraft compared to people that travel domestically.')
|
71 |
+
|
72 |
+
elif submenu=='Customer Type':
|
73 |
+
st.write('### Barchart of Customer Type and Satisfaction')
|
74 |
+
fig = plt.figure(figsize=[15,5])
|
75 |
+
sns.barplot(data=data, x='Customer Type', y='satisfaction',)
|
76 |
+
st.pyplot(fig)
|
77 |
+
with st.expander("Insight"):
|
78 |
+
st.write('1. loyal customer is usually satisfied with the airline services with an average of around 0.5 satisfaction')
|
79 |
+
st.write('2. disloyal customer is usually more dissatisfied with the airline Service with an average of around 0.25 satisfaction')
|
80 |
+
st.write('from the insight of this data we can conclude that loyal customer seems to be more satisfied with the airline Service compared to disloyal customer. This could happen as loyal customer can get benefits such as faster queueing and more baggage space.')
|
81 |
+
|
82 |
+
elif submenu=='Age':
|
83 |
+
st.write('### satisfaction of age')
|
84 |
+
Sex_Bal= data.groupby(['Age', 'satisfaction']).size().unstack(fill_value=0)
|
85 |
+
fig, ax = plt.subplots(figsize=[15, 5])
|
86 |
+
Sex_Bal.plot(kind='line', ax=ax)
|
87 |
+
st.pyplot(fig)
|
88 |
+
with st.expander("Insight"):
|
89 |
+
st.write('1. dissatisfied customer mostly come from customers around the age 25 and become significantly lower at age 40')
|
90 |
+
st.write('2. satisfied customer mostly come from customers around the age 40 and become significantly lower at age 60')
|
91 |
+
st.write('from the insight of this data we can conclude that most of the dissatisfied review come from younger customer around the age of 25 and significantly lower at the older age of 40 where people start to give more satisfied review. this could happen as younger audience might prioritize feauture such as online booking and wifi which older audience might not care about much and prefer feature such as comfortable seat and legroom.')
|
92 |
+
|
93 |
+
elif submenu=='Class':
|
94 |
+
st.write('### Barchart of Class and Satisfaction')
|
95 |
+
fig = plt.figure(figsize=[15,5])
|
96 |
+
sns.barplot(data=data, x='Class', y='satisfaction',)
|
97 |
+
st.pyplot(fig)
|
98 |
+
with st.expander("Insight"):
|
99 |
+
st.write('1. customer in business class significantly have higher satisfaction at an average satisfaction of 0.7 compared to both the economy class')
|
100 |
+
st.write('2. customer in economy and economy class have a lower satisfaction at around 0.2 to 0.25')
|
101 |
+
st.write('from the insight of this data we can conclude that people in business class usually have higher overall satisfaction compared to both economy class. this could happen as the airline business class usually have extra benefit such as a more comfortable chair, better food and drinks, priority baggage and queue, and more compared to economy or economy plus.')
|
102 |
+
|
103 |
+
elif submenu=='Type of Travel':
|
104 |
+
st.write('### The amount of Travel Type')
|
105 |
+
fig = plt.figure(figsize=[15,5])
|
106 |
+
data['Type of Travel'].value_counts().plot(kind='bar')
|
107 |
+
st.pyplot(fig)
|
108 |
+
with st.expander("Insight"):
|
109 |
+
st.write('1. most of the airline customer use the airline for business travel purpose with around 70000 customers')
|
110 |
+
st.write('2. only some of the airline customer use the airline for personal travel purpose with around 35000 customers')
|
111 |
+
st.write('from the insight of this data we can conclude that most people use the airline for business travel purposes and rarely uses them for personal travel purpose. This could happen because due to the airline high price, people might pick the airline if their work pay for their travel expense.')
|
112 |
+
|
113 |
+
elif submenu=='satisfaction':
|
114 |
+
st.write('### Pie chart of satisfaction')
|
115 |
+
fig = plt.figure(figsize=[30,10])
|
116 |
+
data['satisfaction'].value_counts().plot(kind='pie')
|
117 |
+
plt.legend(labels=['0 = neutral or dissatisfaction', '1 = satisfaction'])
|
118 |
+
st.pyplot(fig)
|
119 |
+
with st.expander("Insight"):
|
120 |
+
st.write('1. customer are mostly dissatisfied with the current service of the airline at a total of 56.67%')
|
121 |
+
st.write('2. despite most customers is dissatisfied with the airline service. the data is almost balanced between satisfied and dissatisfied')
|
122 |
+
st.write('from the insight of this data we can conclude that most of the airline consumers are dissatisfied with our current service. this could happen because as we can see from the flight history, most people travel domestically and most domestic flight customer is dissatisfied.')
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
127 |
run()
|