Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
! pip install gradio
|
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import seaborn as sns
|
@@ -8,3 +9,246 @@ from sklearn import preprocessing
|
|
8 |
from sklearn.preprocessing import LabelEncoder
|
9 |
import gradio as gr
|
10 |
from array import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
! pip install gradio
|
2 |
+
! pip install transformers
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
5 |
import seaborn as sns
|
|
|
9 |
from sklearn.preprocessing import LabelEncoder
|
10 |
import gradio as gr
|
11 |
from array import *
|
12 |
+
from transformers import pipeline
|
13 |
+
|
14 |
+
#from google.colab import drive
|
15 |
+
#drive.mount('/content/drive')
|
16 |
+
|
17 |
+
df_train = pd.read_csv("train_ctrUa4K.csv") #Reading the dataset in a dataframe using Pandas
|
18 |
+
|
19 |
+
df_train.head()
|
20 |
+
|
21 |
+
df_train.describe()
|
22 |
+
|
23 |
+
df_train.shape
|
24 |
+
|
25 |
+
df_train.info()
|
26 |
+
|
27 |
+
df_train.isnull().sum()
|
28 |
+
|
29 |
+
print(df_train['Gender'].value_counts())
|
30 |
+
print(df_train['Married'].value_counts())
|
31 |
+
print(df_train['Dependents'].value_counts())
|
32 |
+
print(df_train['Self_Employed'].value_counts())
|
33 |
+
print(df_train['Credit_History'].value_counts())
|
34 |
+
print(df_train['Property_Area'].value_counts())
|
35 |
+
|
36 |
+
df_train['Gender'].fillna("Male", inplace = True)
|
37 |
+
df_train['Married'].fillna("Yes", inplace = True)
|
38 |
+
df_train['Dependents'].fillna("0", inplace = True)
|
39 |
+
df_train['Self_Employed'].fillna("No", inplace = True)
|
40 |
+
df_train['Credit_History'].fillna(1.0, inplace = True)
|
41 |
+
df_train.isnull().sum()
|
42 |
+
|
43 |
+
duplicate=df_train.duplicated()
|
44 |
+
print(duplicate.sum())
|
45 |
+
df_train[duplicate]
|
46 |
+
|
47 |
+
fig, ax = plt.subplots(3, 2, figsize = (10, 7))
|
48 |
+
sns.boxplot(x= df_train["ApplicantIncome"], ax = ax[0,0])
|
49 |
+
sns.distplot(df_train['ApplicantIncome'], ax = ax[0,1])
|
50 |
+
sns.boxplot(x= df_train["CoapplicantIncome"], ax = ax[1,0])
|
51 |
+
sns.distplot(df_train['CoapplicantIncome'], ax = ax[1,1])
|
52 |
+
sns.boxplot(x= df_train["Loan_Amount_Term"], ax = ax[2,0])
|
53 |
+
sns.distplot(df_train['Loan_Amount_Term'], ax = ax[2,1])
|
54 |
+
|
55 |
+
def remove_outlier(col):
|
56 |
+
sorted(col)
|
57 |
+
Q1, Q3=col.quantile([0.25, 0.75])
|
58 |
+
IQR=Q3-Q1
|
59 |
+
lower_range=Q1-(1.5*IQR)
|
60 |
+
upper_range=Q3+(1.5*IQR)
|
61 |
+
return lower_range, upper_range
|
62 |
+
|
63 |
+
low_AI, high_AI=remove_outlier(df_train['ApplicantIncome'])
|
64 |
+
df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']>high_AI, high_AI, df_train['ApplicantIncome'])
|
65 |
+
df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']<low_AI, low_AI, df_train['ApplicantIncome'])
|
66 |
+
|
67 |
+
low_CI, high_CI=remove_outlier(df_train['CoapplicantIncome'])
|
68 |
+
df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']>high_CI, high_CI, df_train['CoapplicantIncome'])
|
69 |
+
df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']<low_CI, low_CI, df_train['CoapplicantIncome'])
|
70 |
+
|
71 |
+
low_LAT, high_LAT=remove_outlier(df_train['Loan_Amount_Term'])
|
72 |
+
df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']>high_LAT, high_LAT, df_train['Loan_Amount_Term'])
|
73 |
+
df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']<low_LAT, low_LAT, df_train['Loan_Amount_Term'])
|
74 |
+
|
75 |
+
df_train.boxplot(column=['ApplicantIncome'])
|
76 |
+
plt.show()
|
77 |
+
|
78 |
+
df_train.boxplot(column=['CoapplicantIncome'])
|
79 |
+
plt.show()
|
80 |
+
|
81 |
+
df_train.boxplot(column=['Loan_Amount_Term'])
|
82 |
+
plt.show()
|
83 |
+
|
84 |
+
df_train.isnull().sum()
|
85 |
+
|
86 |
+
df_train['Loan_Amount_Term'].fillna(360, inplace = True)
|
87 |
+
|
88 |
+
table = df_train.pivot_table(values='LoanAmount', index='Self_Employed' ,columns='Education', aggfunc=np.median)
|
89 |
+
table
|
90 |
+
|
91 |
+
def val(x):
|
92 |
+
return table.loc[x['Self_Employed'],x['Education']]
|
93 |
+
|
94 |
+
df_train['LoanAmount'].fillna(df_train[df_train['LoanAmount'].isnull()].apply(val, axis=1), inplace=True)
|
95 |
+
|
96 |
+
df_train['Total_income']=df_train['ApplicantIncome']+df_train['CoapplicantIncome']
|
97 |
+
|
98 |
+
df_train.head()
|
99 |
+
|
100 |
+
df=df_train
|
101 |
+
|
102 |
+
label_encoder = preprocessing.LabelEncoder()
|
103 |
+
df['Gender']= label_encoder.fit_transform(df['Gender'])
|
104 |
+
|
105 |
+
df
|
106 |
+
|
107 |
+
df['Married']= label_encoder.fit_transform(df['Married'])
|
108 |
+
df['Education']= label_encoder.fit_transform(df['Education'])
|
109 |
+
df['Self_Employed']= label_encoder.fit_transform(df['Self_Employed'])
|
110 |
+
df['Property_Area']= label_encoder.fit_transform(df['Property_Area'])
|
111 |
+
df['Dependents']= label_encoder.fit_transform(df['Dependents'])
|
112 |
+
|
113 |
+
df.head()
|
114 |
+
|
115 |
+
x=df_train[['Gender','Married','Dependents','Education','Self_Employed', 'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area', 'Total_income']]
|
116 |
+
|
117 |
+
y=df_train[['Loan_Status']]
|
118 |
+
|
119 |
+
from sklearn.model_selection import train_test_split
|
120 |
+
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)
|
121 |
+
|
122 |
+
"""LOGISTIC REGRESSION"""
|
123 |
+
|
124 |
+
from sklearn.metrics import classification_report, confusion_matrix
|
125 |
+
import itertools
|
126 |
+
def plot_confusion_matrix(cm, classes,
|
127 |
+
normalize=False,
|
128 |
+
title='Confusion matrix',
|
129 |
+
cmap=plt.cm.Blues):
|
130 |
+
"""
|
131 |
+
This function prints and plots the confusion matrix.
|
132 |
+
Normalization can be applied by setting `normalize=True`.
|
133 |
+
"""
|
134 |
+
if normalize:
|
135 |
+
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
|
136 |
+
print("Normalized confusion matrix")
|
137 |
+
else:
|
138 |
+
print('Confusion matrix, without normalization')
|
139 |
+
|
140 |
+
print(cm)
|
141 |
+
|
142 |
+
plt.imshow(cm, interpolation='nearest', cmap=cmap)
|
143 |
+
plt.title(title)
|
144 |
+
plt.colorbar()
|
145 |
+
tick_marks = np.arange(len(classes))
|
146 |
+
plt.xticks(tick_marks, classes, rotation=45)
|
147 |
+
plt.yticks(tick_marks, classes)
|
148 |
+
fmt = '.2f' if normalize else 'd'
|
149 |
+
thresh = cm.max() / 2.
|
150 |
+
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
|
151 |
+
plt.text(j, i, format(cm[i, j], fmt),
|
152 |
+
horizontalalignment="center",
|
153 |
+
color="white" if cm[i, j] > thresh else "black")
|
154 |
+
|
155 |
+
plt.tight_layout()
|
156 |
+
plt.ylabel('True label')
|
157 |
+
plt.xlabel('Predicted label')
|
158 |
+
|
159 |
+
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
|
160 |
+
from sklearn.linear_model import LogisticRegression
|
161 |
+
#from sklearn.metrics import confusion_matrix
|
162 |
+
parametersLR={ 'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
|
163 |
+
'C': [1, 0.5, 0.1, 0.01],
|
164 |
+
'fit_intercept': [True, False],
|
165 |
+
'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
|
166 |
+
'random_state':[10, 50, 100, 'none']
|
167 |
+
}
|
168 |
+
LR = LogisticRegression()
|
169 |
+
#r = RandomizedSearchCV(LR,parametersLR)
|
170 |
+
g=GridSearchCV(LR, parametersLR)
|
171 |
+
g.fit(x_train, y_train)
|
172 |
+
|
173 |
+
ypred = g.predict(x_test)
|
174 |
+
ypred
|
175 |
+
|
176 |
+
print (classification_report(y_test, ypred))
|
177 |
+
|
178 |
+
l = {'Gender': [1],
|
179 |
+
'Married': [0],
|
180 |
+
'Dependents':[0],
|
181 |
+
'Education':[0],
|
182 |
+
'Self_Employed':[0],
|
183 |
+
'LoanAmount':[130],
|
184 |
+
'Loan_Amount_Term':[360],
|
185 |
+
'Credit_History':[1],
|
186 |
+
'Property_Area':[2],
|
187 |
+
'Total_income':[5849]
|
188 |
+
}
|
189 |
+
df=pd.DataFrame(l)
|
190 |
+
ans = g.predict(df)
|
191 |
+
ans2 = ans.tolist()
|
192 |
+
ans2[0]
|
193 |
+
df
|
194 |
+
|
195 |
+
def pred(Gender, Marital_Status, Dependents, Education, Self_Employed, Loan_Amount, Credit_History, Property_Area, Total_Income):
|
196 |
+
if Gender == "Male":
|
197 |
+
gen=1
|
198 |
+
elif Gender =="Female":
|
199 |
+
gen=0
|
200 |
+
if Marital_Status=="Married":
|
201 |
+
m=1
|
202 |
+
elif Marital_Status=="Unmarried":
|
203 |
+
m=0
|
204 |
+
if Dependents=="0":
|
205 |
+
d=0
|
206 |
+
elif Dependents=="1":
|
207 |
+
d=1
|
208 |
+
elif Dependents=="2":
|
209 |
+
d=2
|
210 |
+
elif Dependents=="3+":
|
211 |
+
d=3
|
212 |
+
if Education=="Educated":
|
213 |
+
e=1
|
214 |
+
elif Education == "Uneducated":
|
215 |
+
e=0
|
216 |
+
if Self_Employed=="Yes":
|
217 |
+
se=1
|
218 |
+
elif Self_Employed=="No":
|
219 |
+
se=0
|
220 |
+
if Credit_History=="1":
|
221 |
+
ch=1
|
222 |
+
elif Credit_History=="0":
|
223 |
+
ch=0
|
224 |
+
if Property_Area=="0":
|
225 |
+
pa=0
|
226 |
+
elif Property_Area=="1":
|
227 |
+
pa=1
|
228 |
+
elif Propert_Area=="2":
|
229 |
+
pa=2
|
230 |
+
|
231 |
+
l = {'Gender': [gen],
|
232 |
+
'Married': [m],
|
233 |
+
'Dependents':[d],
|
234 |
+
'Education':[e],
|
235 |
+
'Self_Employed':[se],
|
236 |
+
'LoanAmount':[Loan_Amount],
|
237 |
+
'Loan_Amount_Term':[360],
|
238 |
+
'Credit_History':[ch],
|
239 |
+
'Property_Area':[pa],
|
240 |
+
'Total_income':[Total_Income]
|
241 |
+
}
|
242 |
+
df=pd.DataFrame(l)
|
243 |
+
ans = g.predict(df)
|
244 |
+
ans2 = ans.tolist()
|
245 |
+
if ans2[0]=="Y":
|
246 |
+
return "Loan Status: Approved!"
|
247 |
+
elif ans2[0]=="N":
|
248 |
+
return "Loan Status: Disapproved"
|
249 |
+
|
250 |
+
iface = gr.Interface(
|
251 |
+
fn=pred,
|
252 |
+
inputs=[gr.inputs.Radio(["Male", "Female"]), gr.inputs.Radio(["Married", "Unmarried"]),gr.inputs.Radio(["0", "1","2", "3+"]), gr.inputs.Radio(["Educated", "Uneducated"]), gr.inputs.Radio(["Yes", "No"]), "text", gr.inputs.Radio(["1", "0"]), gr.inputs.Radio(["0", "1", "2"]), "text"],
|
253 |
+
outputs="text")
|
254 |
+
iface.launch(inline=False)
|