ahishamm commited on
Commit
5fbe234
1 Parent(s): 84115b3

Uploaded files

Browse files
Files changed (4) hide show
  1. WA_Fn-UseC_-Telco-Customer-Churn.csv +0 -0
  2. dashboard.py +145 -0
  3. func.py +235 -0
  4. requirements.txt +8 -0
WA_Fn-UseC_-Telco-Customer-Churn.csv ADDED
The diff for this file is too large to render. See raw diff
 
dashboard.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from plotly.subplots import make_subplots
8
+ import warnings
9
+ import streamlit as st
10
+ warnings.filterwarnings('ignore')
11
+ from sklearn.preprocessing import StandardScaler
12
+ from sklearn.preprocessing import LabelEncoder
13
+ from sklearn.tree import DecisionTreeClassifier
14
+ from sklearn.ensemble import RandomForestClassifier
15
+ from sklearn.naive_bayes import GaussianNB
16
+ from sklearn.neighbors import KNeighborsClassifier
17
+ from sklearn.svm import SVC
18
+ from sklearn.neural_network import MLPClassifier
19
+ from sklearn.ensemble import AdaBoostClassifier
20
+ from sklearn.ensemble import GradientBoostingClassifier
21
+ from sklearn.ensemble import ExtraTreesClassifier
22
+ from sklearn.linear_model import LogisticRegression
23
+ from sklearn.model_selection import train_test_split
24
+ from sklearn.metrics import accuracy_score
25
+ from xgboost import XGBClassifier
26
+ from sklearn import metrics
27
+ from sklearn.metrics import roc_curve
28
+ from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report
29
+ import func as fc
30
+ from io import StringIO
31
+ st.set_page_config(layout='wide')
32
+ tab1, tab2 = st.tabs(['Data','ML'])
33
+ #loading the options list from the functions file func.py
34
+ optionList = fc.OPTION_LIST
35
+ modelList = fc.MODEL_SELECTOR
36
+ #option to upload the dataframe
37
+ with tab1:
38
+ option = st.selectbox('Select the plot you want to visualize',optionList)
39
+ uploaded_dataframe = st.file_uploader("Choose a file")
40
+ #print(type(uploaded_dataframe))
41
+ if uploaded_dataframe is not None:
42
+ if option is not None :
43
+ fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13, processed_df = fc.take_input(uploaded_dataframe)
44
+ with tab1:
45
+ st.dataframe(processed_df)
46
+ with st.container():
47
+ col1, col2, col3 = st.columns(3)
48
+ with col1:
49
+ st.plotly_chart(fig1, use_container_width=True)
50
+ with col2:
51
+ st.plotly_chart(fig2,use_container_width=True)
52
+ with col3:
53
+ st.plotly_chart(fig3,use_container_width=True)
54
+ with st.container():
55
+ col1, col2, col3 = st.columns(3)
56
+ with col1:
57
+ st.plotly_chart(fig4, use_container_width=True)
58
+ with col2:
59
+ st.plotly_chart(fig5,use_container_width=True)
60
+ with col3:
61
+ st.plotly_chart(fig6,use_container_width=True)
62
+ with st.container():
63
+ col1, col2, col3 = st.columns(3)
64
+ with col1:
65
+ st.plotly_chart(fig7, use_container_width=True)
66
+ with col2:
67
+ st.plotly_chart(fig8,use_container_width=True)
68
+ with col3:
69
+ st.plotly_chart(fig9,use_container_width=True)
70
+ with st.container():
71
+ col1, col2, col3, col4 = st.columns(4)
72
+ with col1:
73
+ st.plotly_chart(fig10, use_container_width=True)
74
+ with col2:
75
+ st.plotly_chart(fig11,use_container_width=True)
76
+ with col3:
77
+ st.plotly_chart(fig12,use_container_width=True)
78
+ with col4:
79
+ st.plotly_chart(fig13,use_container_width=True)
80
+
81
+
82
+ #removing the secondary tab
83
+ #with tab2:
84
+ # st.plotly_chart(figure,use_container_width=True)
85
+ with tab2:
86
+ modeloption = st.selectbox('Select an ML Model',modelList)
87
+ uploaded_dataframe = st.file_uploader("Choose a file", key=2)
88
+ test_size_slider = st.slider('Enter the test size: ',0.0,1.0)
89
+ random_state_input = st.number_input('Select a random seed',0,1000)
90
+ #print(test_size_slider)
91
+ if uploaded_dataframe is not None:
92
+ #Add a slider later the test_size, and a input box for the random state
93
+ #print(uploaded_dataframe)
94
+ acc_score, classification_rep, output_df,original_df = fc.standardize_dataframe(uploaded_dataframe,modeloption,test_size_slider,random_state_input)
95
+ st.dataframe(output_df)
96
+ #st.write('Accuracy Score of '+modeloption+' is: '+str(acc_score))
97
+ st.metric(label='Accuracy Score of '+modeloption,value=str(acc_score))
98
+ st.markdown('```bash \t \n'+classification_rep+'```')
99
+ #print(acc_score,'\n',classification_rep)
100
+ st.write('Enter some information to predict the churn:')
101
+ pr_1 = st.selectbox('Select the gender:',['Female','Male'])
102
+ pr_2 = st.selectbox('Is the customer a senior citizen?',['Yes','No'])
103
+ pr_3 = st.selectbox('Does the customer have a partner?',['Yes','No'])
104
+ pr_4 = st.selectbox('Does the customer have dependents?',['Yes','No'])
105
+ pr_5 = st.number_input('What is the customer tenure?',0,100)
106
+ pr_6 = st.selectbox('Does the customer have phone service?',['Yes','No'])
107
+ pr_7 = st.selectbox('Does the customer have multiple lines?',['Yes','No','No phone service'])
108
+ pr_8 = st.selectbox('Does the customer have internet service?',['No','DSL','Fiber optic'])
109
+ pr_9 = st.selectbox('Does the customer have online security?',['Yes','No','No internet service'])
110
+ pr_10 = st.selectbox('Does the customer have online backup?',['Yes','No','No internet service'])
111
+ pr_11 = st.selectbox('Does the customer have device protection?',['Yes','No','No internet service'])
112
+ pr_12 = st.selectbox('Does the customer have tech support?',['Yes','No','No internet service'])
113
+ pr_13 = st.selectbox('Does the customer have streaming TV?',['Yes','No','No internet service'])
114
+ pr_14 = st.selectbox('Does the customer have streaming movies?',['Yes','No','No internet service'])
115
+ pr_15 = st.selectbox('Does the customer have a contract?',['Month-to-month','One year','Two year'])
116
+ pr_16 = st.selectbox('Does the customer have paperless billing?',['Yes','No'])
117
+ pr_17 = st.selectbox('What is the payment method of the customer?',['Electronic check','Mailed check','Bank transfer (automatic)','Credit card (automatic)'])
118
+ pr_18 = st.number_input('What are the monthly charges of the customer?')
119
+ pr_19 = st.number_input('What are the total charges of the customer?')
120
+ if st.button('Predict Churn'):
121
+ #convert the inputs to a vector and pass it to a voting classifier algorithm
122
+ feature_vector = pd.DataFrame({'customerID':[1],
123
+ 'gender':[pr_1],
124
+ 'SeniorCitizen':[pr_2],
125
+ 'Partner':[pr_3],
126
+ 'Dependents':[pr_4],
127
+ 'tenure':[pr_5],
128
+ 'PhoneService':[pr_6],
129
+ 'MultipleLines':[pr_7],
130
+ 'InternetService':[pr_8],
131
+ 'OnlineSecurity':[pr_9],
132
+ 'OnlineBackup':[pr_10],
133
+ 'DeviceProtection':[pr_11],
134
+ 'TechSupport':[pr_12],
135
+ 'StreamingTV':[pr_13],
136
+ 'StreamingMovies':[pr_14],
137
+ 'Contract':[pr_15],
138
+ 'PaperlessBilling':[pr_16],
139
+ 'PaymentMethod':[pr_17],
140
+ 'MonthlyCharges':[pr_18],
141
+ 'TotalCharges':[pr_19]})
142
+ #passing the feature vector to be processed and predict a churn output
143
+ #print(feature_vector)
144
+ response = fc.standardize_feature_vector(feature_vector,original_df,test_size_slider,random_state_input)
145
+ st.metric(label='Prediction Response',value=response)
func.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from plotly.subplots import make_subplots
4
+ import plotly.graph_objects as go
5
+ import matplotlib.pyplot as plt
6
+ import plotly.express as px
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.preprocessing import LabelEncoder
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.metrics import classification_report,accuracy_score
11
+ from sklearn.neighbors import KNeighborsClassifier
12
+ from sklearn.ensemble import VotingClassifier
13
+ from sklearn.ensemble import AdaBoostClassifier
14
+ from sklearn.ensemble import GradientBoostingClassifier
15
+ from sklearn.svm import SVC
16
+ from sklearn.tree import DecisionTreeClassifier
17
+ from sklearn.ensemble import RandomForestClassifier
18
+ from sklearn.linear_model import LogisticRegression
19
+ OPTION_LIST = ['Gender and Churn Distribution','Customer Contract Distribution','Payment Method Distribution','Payment Method Distribution Churn',
20
+ 'Churn Distribution w.r.t Internet Service and Gender','Dependents Distribution Churn',
21
+ 'Churn Distribution w.r.t Partners','Churn Distribution w.r.t Senior Citizens',
22
+ 'Churn Distribution w.r.t Online Security','Churn Distribution w.r.t Paperless Billing',
23
+ 'Churn Distribution w.r.t Tech Support','Churn Distribution w.r.t Phone Service',
24
+ 'Tenure vs. Churn']
25
+ MODEL_SELECTOR = ['KNN','SVC','RF','LR','DT','Adaboost','Gradient Boosting','Voting Classifier']
26
+ num_cols = ["tenure", 'MonthlyCharges', 'TotalCharges']
27
+ scaler= StandardScaler()
28
+ def preprocess(df):
29
+ df = df.drop(['customerID'], axis = 1)
30
+ df['TotalCharges'] = pd.to_numeric(df.TotalCharges, errors='coerce')
31
+ df[np.isnan(df['TotalCharges'])]
32
+ df[df['tenure'] == 0].index
33
+ df.drop(labels=df[df['tenure'] == 0].index, axis=0, inplace=True)
34
+ df[df['tenure'] == 0].index
35
+ df.fillna(df["TotalCharges"].mean())
36
+ df["SeniorCitizen"]= df["SeniorCitizen"].map({0: "No", 1: "Yes"})
37
+ return df
38
+ def object_to_int(dataframe_series):
39
+ if dataframe_series.dtype=='object':
40
+ dataframe_series = LabelEncoder().fit_transform(dataframe_series)
41
+ return dataframe_series
42
+ def evaluate_voter(test_feature_vector, df,test_size,random_state):
43
+ print(df)
44
+ df = preprocess(df)
45
+ df = df.apply(lambda x: object_to_int(x))
46
+ X = df.drop(columns = ['Churn'])
47
+ y = df['Churn'].values
48
+ X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = test_size, random_state = random_state, stratify=y)
49
+ df_std = pd.DataFrame(StandardScaler().fit_transform(df[num_cols].astype('float64')),columns=num_cols)
50
+ X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
51
+ X_test[num_cols] = scaler.transform(X_test[num_cols])
52
+ clf1 = GradientBoostingClassifier()
53
+ clf2 = LogisticRegression()
54
+ clf3 = AdaBoostClassifier()
55
+ eclf1 = VotingClassifier(estimators=[('gbc', clf1), ('lr', clf2), ('abc', clf3)], voting='soft')
56
+ eclf1.fit(X_train, y_train)
57
+ #feeding the feature vector as a test input
58
+ predicted_y = eclf1.predict(test_feature_vector)
59
+ if predicted_y[0] == 1:
60
+ #print('The customer is likely to stop using the services')
61
+ return 'Customer is likely to stop using the telecom services'
62
+ else:
63
+ #print('The customer is likely to continue using the services')
64
+ return 'Customer is likely to continue using the telecom services'
65
+
66
+
67
+ def standardize_feature_vector(df,original_df, test_size,random_state):
68
+ df = df.drop(['customerID'], axis = 1)
69
+ df['TotalCharges'] = pd.to_numeric(df.TotalCharges, errors='coerce')
70
+ #Manual label encoding is the only solution here...
71
+ df["SeniorCitizen"]= df["SeniorCitizen"].map({"No": 0, "Yes": 1})
72
+ df['gender'] = df['gender'].map({'Female':0,'Male':1})
73
+ df['Partner'] = df['Partner'].map({"No":0,"Yes":1})
74
+ df['Dependents'] = df['Dependents'].map({"No":0,"Yes":1})
75
+ df['PhoneService'] = df['PhoneService'].map({"No":0,"Yes":1})
76
+ df['MultipleLines'] = df['MultipleLines'].map({"No phone service":1,"No":0,"Yes":2})
77
+ df['InternetService'] = df['InternetService'].map({'DSL':0,'Fiber optic':1,'No':2})
78
+ df['OnlineSecurity'] = df['OnlineSecurity'].map({'No':0,'Yes':2,'No internet service':1})
79
+ df['OnlineBackup'] = df['OnlineBackup'].map({'No':0,'Yes':2,'No internet service':1})
80
+ df['DeviceProtection'] = df['DeviceProtection'].map({'No':0,'Yes':2,'No internet service':1})
81
+ df['TechSupport'] = df['TechSupport'].map({'No':0,'Yes':2,'No internet service':1})
82
+ df['StreamingTV'] = df['StreamingTV'].map({'No':0,'Yes':2,'No internet service':1})
83
+ df['StreamingMovies'] = df['StreamingMovies'].map({'No':0,'Yes':2,'No internet service':1})
84
+ df['Contract'] = df['Contract'].map({'Month-to-month':0,'One year':1,'Two year':2})
85
+ df['PaperlessBilling'] = df['PaperlessBilling'].map({"No":0,"Yes":1})
86
+ df['PaymentMethod'] = df['PaymentMethod'].map({'Electronic check':2, 'Mailed check':3,'Bank transfer (automatic)':0,'Credit card (automatic)':1})
87
+ #Churn -> No:0, Yes:1
88
+ numpy_vector = df.to_numpy()
89
+ print(df)
90
+ print(numpy_vector)
91
+ #passing the vector as a test vector to a trained voting classifier
92
+ return evaluate_voter(df,original_df,test_size,random_state)
93
+
94
+
95
+ def standardize_dataframe(filepath,option,test_size,random_state):
96
+ df = pd.read_csv(filepath)
97
+ #print(df)
98
+ df_new = preprocess(df)
99
+ #print(df)
100
+ #label encoding the dataframe
101
+ df_new = df_new.apply(lambda x: object_to_int(x))
102
+ #inputs and target selection
103
+ X = df_new.drop(columns = ['Churn'])
104
+ y = df_new['Churn'].values
105
+ #train test split (Allowing the user to choose the optimal train/test split percentage)
106
+ X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = test_size, random_state = random_state, stratify=y)
107
+ #Standardizing the variables
108
+ df_std = pd.DataFrame(StandardScaler().fit_transform(df_new[num_cols].astype('float64')),columns=num_cols)
109
+ X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
110
+ X_test[num_cols] = scaler.transform(X_test[num_cols])
111
+ if option == 'KNN':
112
+ knn_model = KNeighborsClassifier(n_neighbors = 11)
113
+ knn_model.fit(X_train,y_train)
114
+ predicted_y = knn_model.predict(X_test)
115
+ return accuracy_score(predicted_y,y_test), classification_report(y_test, predicted_y),df_new,df
116
+ elif option == 'SVC':
117
+ svc_model = SVC(random_state = 1)
118
+ svc_model.fit(X_train,y_train)
119
+ predicted_y = svc_model.predict(X_test)
120
+ return accuracy_score(predicted_y,y_test), classification_report(y_test,predicted_y),df_new,df
121
+ elif option == 'RF':
122
+ model_rf = RandomForestClassifier(n_estimators=500 , oob_score = True, n_jobs = -1,
123
+ random_state =50, max_features = "auto",
124
+ max_leaf_nodes = 30)
125
+ model_rf.fit(X_train, y_train)
126
+ predicted_y = model_rf.predict(X_test)
127
+ return accuracy_score(y_test, predicted_y), classification_report(y_test,predicted_y),df_new,df
128
+ elif option == 'LR':
129
+ lr_model = LogisticRegression()
130
+ lr_model.fit(X_train,y_train)
131
+ predicted_y = lr_model.predict(X_test)
132
+ return accuracy_score(predicted_y,y_test), classification_report(y_test,predicted_y),df_new,df
133
+ elif option == 'DT':
134
+ dt_model = DecisionTreeClassifier()
135
+ dt_model.fit(X_train,y_train)
136
+ predicted_y = dt_model.predict(X_test)
137
+ return accuracy_score(predicted_y,y_test), classification_report(y_test,predicted_y),df_new,df
138
+ elif option == 'Adaboost':
139
+ a_model = AdaBoostClassifier()
140
+ a_model.fit(X_train,y_train)
141
+ predicted_y = a_model.predict(X_test)
142
+ return accuracy_score(predicted_y,y_test), classification_report(y_test,predicted_y),df_new,df
143
+ elif option == 'Gradient Boosting':
144
+ gb = GradientBoostingClassifier()
145
+ gb.fit(X_train, y_train)
146
+ predicted_y = gb.predict(X_test)
147
+ return accuracy_score(predicted_y,y_test), classification_report(y_test,predicted_y),df_new,df
148
+ elif option == 'Voting Classifier':
149
+ clf1 = GradientBoostingClassifier()
150
+ clf2 = LogisticRegression()
151
+ clf3 = AdaBoostClassifier()
152
+ eclf1 = VotingClassifier(estimators=[('gbc', clf1), ('lr', clf2), ('abc', clf3)], voting='soft')
153
+ eclf1.fit(X_train, y_train)
154
+ predicted_y = eclf1.predict(X_test)
155
+ return accuracy_score(predicted_y,y_test), classification_report(y_test,predicted_y),df_new,df
156
+
157
+
158
+ def visualize(df):
159
+ g_labels = ['Male', 'Female']
160
+ c_labels = ['No', 'Yes']
161
+ # Create subplots: use 'domain' type for Pie subplot
162
+ fig1 = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
163
+ fig1.add_trace(go.Pie(labels=g_labels, values=df['gender'].value_counts(), name="Gender"),
164
+ 1, 1)
165
+ fig1.add_trace(go.Pie(labels=c_labels, values=df['Churn'].value_counts(), name="Churn"),
166
+ 1, 2)
167
+ # Use `hole` to create a donut-like pie chart
168
+ fig1.update_traces(hole=.4, hoverinfo="label+percent+name", textfont_size=16)
169
+ fig1.update_layout(
170
+ title_text="Gender and Churn Distributions",
171
+ # Add annotations in the center of the donut pies.
172
+ annotations=[dict(text='Gender', x=0.16, y=0.5, font_size=20, showarrow=False),
173
+ dict(text='Churn', x=0.84, y=0.5, font_size=20, showarrow=False)])
174
+ fig2 = px.histogram(df, x="Churn", color="Contract", barmode="group", title="<b>Customer contract distribution<b>")
175
+ fig2.update_layout(width=700, height=500, bargap=0.1)
176
+ labels = df['PaymentMethod'].unique()
177
+ values = df['PaymentMethod'].value_counts()
178
+ fig3 = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
179
+ fig3.update_layout(title_text="<b>Payment Method Distribution</b>")
180
+ fig4 = px.histogram(df, x="Churn", color="PaymentMethod", title="<b>Customer Payment Method distribution w.r.t. Churn</b>")
181
+ fig4.update_layout(width=700, height=500, bargap=0.1)
182
+ fig5 = go.Figure()
183
+ fig5.add_trace(go.Bar(
184
+ x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
185
+ ["Female", "Male", "Female", "Male"]],
186
+ y = [965, 992, 219, 240],
187
+ name = 'DSL',
188
+ ))
189
+ fig5.add_trace(go.Bar(
190
+ x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
191
+ ["Female", "Male", "Female", "Male"]],
192
+ y = [889, 910, 664, 633],
193
+ name = 'Fiber optic',
194
+ ))
195
+ fig5.add_trace(go.Bar(
196
+ x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
197
+ ["Female", "Male", "Female", "Male"]],
198
+ y = [690, 717, 56, 57],
199
+ name = 'No Internet',
200
+ ))
201
+ fig5.update_layout(title_text="<b>Churn Distribution w.r.t. Internet Service and Gender</b>")
202
+ color_map = {"Yes": "#FF97FF", "No": "#AB63FA"}
203
+ fig6 = px.histogram(df, x="Churn", color="Dependents", barmode="group", title="<b>Dependents distribution</b>", color_discrete_map=color_map)
204
+ fig6.update_layout(width=700, height=500, bargap=0.1)
205
+ color_map = {"Yes": '#FFA15A', "No": '#00CC96'}
206
+ fig7 = px.histogram(df, x="Churn", color="Partner", barmode="group", title="<b>Churn distribution w.r.t. Partners</b>", color_discrete_map=color_map)
207
+ fig7.update_layout(width=700, height=500, bargap=0.1)
208
+ color_map = {"Yes": '#00CC96', "No": '#B6E880'}
209
+ fig8 = px.histogram(df, x="Churn", color="SeniorCitizen", title="<b>Churn distribution w.r.t. Senior Citizen</b>", color_discrete_map=color_map)
210
+ fig8.update_layout(width=700, height=500, bargap=0.1)
211
+ color_map = {"Yes": "#FF97FF", "No": "#AB63FA"}
212
+ fig9 = px.histogram(df, x="Churn", color="OnlineSecurity", barmode="group", title="<b>Churn distribution w.r.t Online Security</b>", color_discrete_map=color_map)
213
+ fig9.update_layout(width=700, height=500, bargap=0.1)
214
+ color_map = {"Yes": '#FFA15A', "No": '#00CC96'}
215
+ fig10 = px.histogram(df, x="Churn", color="PaperlessBilling", title="<b>Churn distribution w.r.t. Paperless Billing</b>", color_discrete_map=color_map)
216
+ fig10.update_layout(width=700, height=500, bargap=0.1)
217
+ fig11 = px.histogram(df, x="Churn", color="TechSupport",barmode="group", title="<b>Churn distribution w.r.t. Tech Support</b>")
218
+ fig11.update_layout(width=700, height=500, bargap=0.1)
219
+ color_map = {"Yes": '#00CC96', "No": '#B6E880'}
220
+ fig12 = px.histogram(df, x="Churn", color="PhoneService", title="<b>Churn Distribution w.r.t. Phone Service</b>", color_discrete_map=color_map)
221
+ fig12.update_layout(width=700, height=500, bargap=0.1)
222
+ fig13 = px.box(df, x='Churn', y = 'tenure')
223
+ fig13.update_yaxes(title_text='Tenure (Months)', row=1, col=1)
224
+ fig13.update_xaxes(title_text='Churn', row=1, col=1)
225
+ fig13.update_layout(autosize=True, width=750, height=600,
226
+ title_font=dict(size=25, family='Courier'),
227
+ title='<b>Tenure vs Churn</b>',
228
+ )
229
+ return fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13
230
+
231
+ def take_input(filepath):
232
+ df = pd.read_csv(filepath)
233
+ processed_df = preprocess(df)
234
+ fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13 = visualize(processed_df)
235
+ return fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13, processed_df
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ scikit-learn
2
+ pandas
3
+ numpy
4
+ plotly
5
+ streamlit
6
+ matplotlib
7
+ seaborn
8
+ xgboost