sanjana commited on
Commit
3f7f93d
·
1 Parent(s): 98cded4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +244 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  ! pip install gradio
 
2
  import pandas as pd
3
  import numpy as np
4
  import seaborn as sns
@@ -8,3 +9,246 @@ from sklearn import preprocessing
8
  from sklearn.preprocessing import LabelEncoder
9
  import gradio as gr
10
  from array import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ! pip install gradio
2
+ ! pip install transformers
3
  import pandas as pd
4
  import numpy as np
5
  import seaborn as sns
 
9
  from sklearn.preprocessing import LabelEncoder
10
  import gradio as gr
11
  from array import *
12
+ from transformers import pipeline
13
+
14
+ #from google.colab import drive
15
+ #drive.mount('/content/drive')
16
+
17
+ df_train = pd.read_csv("train_ctrUa4K.csv") #Reading the dataset in a dataframe using Pandas
18
+
19
+ df_train.head()
20
+
21
+ df_train.describe()
22
+
23
+ df_train.shape
24
+
25
+ df_train.info()
26
+
27
+ df_train.isnull().sum()
28
+
29
+ print(df_train['Gender'].value_counts())
30
+ print(df_train['Married'].value_counts())
31
+ print(df_train['Dependents'].value_counts())
32
+ print(df_train['Self_Employed'].value_counts())
33
+ print(df_train['Credit_History'].value_counts())
34
+ print(df_train['Property_Area'].value_counts())
35
+
36
+ df_train['Gender'].fillna("Male", inplace = True)
37
+ df_train['Married'].fillna("Yes", inplace = True)
38
+ df_train['Dependents'].fillna("0", inplace = True)
39
+ df_train['Self_Employed'].fillna("No", inplace = True)
40
+ df_train['Credit_History'].fillna(1.0, inplace = True)
41
+ df_train.isnull().sum()
42
+
43
+ duplicate=df_train.duplicated()
44
+ print(duplicate.sum())
45
+ df_train[duplicate]
46
+
47
+ fig, ax = plt.subplots(3, 2, figsize = (10, 7))
48
+ sns.boxplot(x= df_train["ApplicantIncome"], ax = ax[0,0])
49
+ sns.distplot(df_train['ApplicantIncome'], ax = ax[0,1])
50
+ sns.boxplot(x= df_train["CoapplicantIncome"], ax = ax[1,0])
51
+ sns.distplot(df_train['CoapplicantIncome'], ax = ax[1,1])
52
+ sns.boxplot(x= df_train["Loan_Amount_Term"], ax = ax[2,0])
53
+ sns.distplot(df_train['Loan_Amount_Term'], ax = ax[2,1])
54
+
55
+ def remove_outlier(col):
56
+ sorted(col)
57
+ Q1, Q3=col.quantile([0.25, 0.75])
58
+ IQR=Q3-Q1
59
+ lower_range=Q1-(1.5*IQR)
60
+ upper_range=Q3+(1.5*IQR)
61
+ return lower_range, upper_range
62
+
63
+ low_AI, high_AI=remove_outlier(df_train['ApplicantIncome'])
64
+ df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']>high_AI, high_AI, df_train['ApplicantIncome'])
65
+ df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']<low_AI, low_AI, df_train['ApplicantIncome'])
66
+
67
+ low_CI, high_CI=remove_outlier(df_train['CoapplicantIncome'])
68
+ df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']>high_CI, high_CI, df_train['CoapplicantIncome'])
69
+ df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']<low_CI, low_CI, df_train['CoapplicantIncome'])
70
+
71
+ low_LAT, high_LAT=remove_outlier(df_train['Loan_Amount_Term'])
72
+ df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']>high_LAT, high_LAT, df_train['Loan_Amount_Term'])
73
+ df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']<low_LAT, low_LAT, df_train['Loan_Amount_Term'])
74
+
75
+ df_train.boxplot(column=['ApplicantIncome'])
76
+ plt.show()
77
+
78
+ df_train.boxplot(column=['CoapplicantIncome'])
79
+ plt.show()
80
+
81
+ df_train.boxplot(column=['Loan_Amount_Term'])
82
+ plt.show()
83
+
84
+ df_train.isnull().sum()
85
+
86
+ df_train['Loan_Amount_Term'].fillna(360, inplace = True)
87
+
88
+ table = df_train.pivot_table(values='LoanAmount', index='Self_Employed' ,columns='Education', aggfunc=np.median)
89
+ table
90
+
91
+ def val(x):
92
+ return table.loc[x['Self_Employed'],x['Education']]
93
+
94
+ df_train['LoanAmount'].fillna(df_train[df_train['LoanAmount'].isnull()].apply(val, axis=1), inplace=True)
95
+
96
+ df_train['Total_income']=df_train['ApplicantIncome']+df_train['CoapplicantIncome']
97
+
98
+ df_train.head()
99
+
100
+ df=df_train
101
+
102
+ label_encoder = preprocessing.LabelEncoder()
103
+ df['Gender']= label_encoder.fit_transform(df['Gender'])
104
+
105
+ df
106
+
107
+ df['Married']= label_encoder.fit_transform(df['Married'])
108
+ df['Education']= label_encoder.fit_transform(df['Education'])
109
+ df['Self_Employed']= label_encoder.fit_transform(df['Self_Employed'])
110
+ df['Property_Area']= label_encoder.fit_transform(df['Property_Area'])
111
+ df['Dependents']= label_encoder.fit_transform(df['Dependents'])
112
+
113
+ df.head()
114
+
115
+ x=df_train[['Gender','Married','Dependents','Education','Self_Employed', 'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area', 'Total_income']]
116
+
117
+ y=df_train[['Loan_Status']]
118
+
119
+ from sklearn.model_selection import train_test_split
120
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)
121
+
122
+ """LOGISTIC REGRESSION"""
123
+
124
+ from sklearn.metrics import classification_report, confusion_matrix
125
+ import itertools
126
+ def plot_confusion_matrix(cm, classes,
127
+ normalize=False,
128
+ title='Confusion matrix',
129
+ cmap=plt.cm.Blues):
130
+ """
131
+ This function prints and plots the confusion matrix.
132
+ Normalization can be applied by setting `normalize=True`.
133
+ """
134
+ if normalize:
135
+ cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
136
+ print("Normalized confusion matrix")
137
+ else:
138
+ print('Confusion matrix, without normalization')
139
+
140
+ print(cm)
141
+
142
+ plt.imshow(cm, interpolation='nearest', cmap=cmap)
143
+ plt.title(title)
144
+ plt.colorbar()
145
+ tick_marks = np.arange(len(classes))
146
+ plt.xticks(tick_marks, classes, rotation=45)
147
+ plt.yticks(tick_marks, classes)
148
+ fmt = '.2f' if normalize else 'd'
149
+ thresh = cm.max() / 2.
150
+ for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
151
+ plt.text(j, i, format(cm[i, j], fmt),
152
+ horizontalalignment="center",
153
+ color="white" if cm[i, j] > thresh else "black")
154
+
155
+ plt.tight_layout()
156
+ plt.ylabel('True label')
157
+ plt.xlabel('Predicted label')
158
+
159
+ from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
160
+ from sklearn.linear_model import LogisticRegression
161
+ #from sklearn.metrics import confusion_matrix
162
+ parametersLR={ 'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
163
+ 'C': [1, 0.5, 0.1, 0.01],
164
+ 'fit_intercept': [True, False],
165
+ 'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
166
+ 'random_state':[10, 50, 100, 'none']
167
+ }
168
+ LR = LogisticRegression()
169
+ #r = RandomizedSearchCV(LR,parametersLR)
170
+ g=GridSearchCV(LR, parametersLR)
171
+ g.fit(x_train, y_train)
172
+
173
+ ypred = g.predict(x_test)
174
+ ypred
175
+
176
+ print (classification_report(y_test, ypred))
177
+
178
+ l = {'Gender': [1],
179
+ 'Married': [0],
180
+ 'Dependents':[0],
181
+ 'Education':[0],
182
+ 'Self_Employed':[0],
183
+ 'LoanAmount':[130],
184
+ 'Loan_Amount_Term':[360],
185
+ 'Credit_History':[1],
186
+ 'Property_Area':[2],
187
+ 'Total_income':[5849]
188
+ }
189
+ df=pd.DataFrame(l)
190
+ ans = g.predict(df)
191
+ ans2 = ans.tolist()
192
+ ans2[0]
193
+ df
194
+
195
+ def pred(Gender, Marital_Status, Dependents, Education, Self_Employed, Loan_Amount, Credit_History, Property_Area, Total_Income):
196
+ if Gender == "Male":
197
+ gen=1
198
+ elif Gender =="Female":
199
+ gen=0
200
+ if Marital_Status=="Married":
201
+ m=1
202
+ elif Marital_Status=="Unmarried":
203
+ m=0
204
+ if Dependents=="0":
205
+ d=0
206
+ elif Dependents=="1":
207
+ d=1
208
+ elif Dependents=="2":
209
+ d=2
210
+ elif Dependents=="3+":
211
+ d=3
212
+ if Education=="Educated":
213
+ e=1
214
+ elif Education == "Uneducated":
215
+ e=0
216
+ if Self_Employed=="Yes":
217
+ se=1
218
+ elif Self_Employed=="No":
219
+ se=0
220
+ if Credit_History=="1":
221
+ ch=1
222
+ elif Credit_History=="0":
223
+ ch=0
224
+ if Property_Area=="0":
225
+ pa=0
226
+ elif Property_Area=="1":
227
+ pa=1
228
+ elif Propert_Area=="2":
229
+ pa=2
230
+
231
+ l = {'Gender': [gen],
232
+ 'Married': [m],
233
+ 'Dependents':[d],
234
+ 'Education':[e],
235
+ 'Self_Employed':[se],
236
+ 'LoanAmount':[Loan_Amount],
237
+ 'Loan_Amount_Term':[360],
238
+ 'Credit_History':[ch],
239
+ 'Property_Area':[pa],
240
+ 'Total_income':[Total_Income]
241
+ }
242
+ df=pd.DataFrame(l)
243
+ ans = g.predict(df)
244
+ ans2 = ans.tolist()
245
+ if ans2[0]=="Y":
246
+ return "Loan Status: Approved!"
247
+ elif ans2[0]=="N":
248
+ return "Loan Status: Disapproved"
249
+
250
+ iface = gr.Interface(
251
+ fn=pred,
252
+ inputs=[gr.inputs.Radio(["Male", "Female"]), gr.inputs.Radio(["Married", "Unmarried"]),gr.inputs.Radio(["0", "1","2", "3+"]), gr.inputs.Radio(["Educated", "Uneducated"]), gr.inputs.Radio(["Yes", "No"]), "text", gr.inputs.Radio(["1", "0"]), gr.inputs.Radio(["0", "1", "2"]), "text"],
253
+ outputs="text")
254
+ iface.launch(inline=False)