Spaces:

sanjana
/

Loan-Prediction-Analysis

Runtime error

App Files Files Community

sanjana commited on Jan 20, 2022

Commit

3f7f93d

1 Parent(s): 98cded4

Update app.py

Browse files

Files changed (1) hide show

app.py +244 -0

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 ! pip install gradio
 import pandas as pd
 import numpy as np
 import seaborn as sns
@@ -8,3 +9,246 @@ from sklearn import preprocessing
 from sklearn.preprocessing import LabelEncoder
 import gradio as gr
 from array import *

 ! pip install gradio
+! pip install transformers
 import pandas as pd
 import numpy as np
 import seaborn as sns
 from sklearn.preprocessing import LabelEncoder
 import gradio as gr
 from array import *
+from transformers import pipeline
+#from google.colab import drive
+#drive.mount('/content/drive')
+df_train = pd.read_csv("train_ctrUa4K.csv") #Reading the dataset in a dataframe using Pandas
+df_train.head()
+df_train.describe()
+df_train.shape
+df_train.info()
+df_train.isnull().sum()
+print(df_train['Gender'].value_counts())
+print(df_train['Married'].value_counts())
+print(df_train['Dependents'].value_counts())
+print(df_train['Self_Employed'].value_counts())
+print(df_train['Credit_History'].value_counts())
+print(df_train['Property_Area'].value_counts())
+df_train['Gender'].fillna("Male", inplace = True)
+df_train['Married'].fillna("Yes", inplace = True)
+df_train['Dependents'].fillna("0", inplace = True)
+df_train['Self_Employed'].fillna("No", inplace = True)
+df_train['Credit_History'].fillna(1.0, inplace = True)
+df_train.isnull().sum()
+duplicate=df_train.duplicated()
+print(duplicate.sum())
+df_train[duplicate]
+fig, ax = plt.subplots(3, 2, figsize = (10, 7))
+sns.boxplot(x= df_train["ApplicantIncome"], ax = ax[0,0])
+sns.distplot(df_train['ApplicantIncome'], ax = ax[0,1])
+sns.boxplot(x= df_train["CoapplicantIncome"], ax = ax[1,0])
+sns.distplot(df_train['CoapplicantIncome'], ax = ax[1,1])
+sns.boxplot(x= df_train["Loan_Amount_Term"], ax = ax[2,0])
+sns.distplot(df_train['Loan_Amount_Term'], ax = ax[2,1])
+def remove_outlier(col):
+  sorted(col)
+  Q1, Q3=col.quantile([0.25, 0.75])
+  IQR=Q3-Q1
+  lower_range=Q1-(1.5*IQR)
+  upper_range=Q3+(1.5*IQR)
+  return lower_range, upper_range
+low_AI, high_AI=remove_outlier(df_train['ApplicantIncome'])
+df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']>high_AI, high_AI, df_train['ApplicantIncome'])
+df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']<low_AI, low_AI, df_train['ApplicantIncome'])
+low_CI, high_CI=remove_outlier(df_train['CoapplicantIncome'])
+df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']>high_CI, high_CI, df_train['CoapplicantIncome'])
+df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']<low_CI, low_CI, df_train['CoapplicantIncome'])
+low_LAT, high_LAT=remove_outlier(df_train['Loan_Amount_Term'])
+df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']>high_LAT, high_LAT, df_train['Loan_Amount_Term'])
+df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']<low_LAT, low_LAT, df_train['Loan_Amount_Term'])
+df_train.boxplot(column=['ApplicantIncome'])
+plt.show()
+df_train.boxplot(column=['CoapplicantIncome'])
+plt.show()
+df_train.boxplot(column=['Loan_Amount_Term'])
+plt.show()
+df_train.isnull().sum()
+df_train['Loan_Amount_Term'].fillna(360, inplace = True)
+table = df_train.pivot_table(values='LoanAmount', index='Self_Employed' ,columns='Education', aggfunc=np.median)
+table
+def val(x):
+ return table.loc[x['Self_Employed'],x['Education']]
+df_train['LoanAmount'].fillna(df_train[df_train['LoanAmount'].isnull()].apply(val, axis=1), inplace=True)
+df_train['Total_income']=df_train['ApplicantIncome']+df_train['CoapplicantIncome']
+df_train.head()
+df=df_train
+label_encoder = preprocessing.LabelEncoder()
+df['Gender']= label_encoder.fit_transform(df['Gender'])
+df
+df['Married']= label_encoder.fit_transform(df['Married'])
+df['Education']= label_encoder.fit_transform(df['Education'])
+df['Self_Employed']= label_encoder.fit_transform(df['Self_Employed'])
+df['Property_Area']= label_encoder.fit_transform(df['Property_Area'])
+df['Dependents']= label_encoder.fit_transform(df['Dependents'])
+df.head()
+x=df_train[['Gender','Married','Dependents','Education','Self_Employed', 'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area', 'Total_income']]
+y=df_train[['Loan_Status']]
+from sklearn.model_selection import train_test_split
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)
+"""LOGISTIC REGRESSION"""
+from sklearn.metrics import classification_report, confusion_matrix
+import itertools
+def plot_confusion_matrix(cm, classes,
+                          normalize=False,
+                          title='Confusion matrix',
+                          cmap=plt.cm.Blues):
+    """
+    This function prints and plots the confusion matrix.
+    Normalization can be applied by setting `normalize=True`.
+    """
+    if normalize:
+        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+        print("Normalized confusion matrix")
+    else:
+        print('Confusion matrix, without normalization')
+    print(cm)
+    plt.imshow(cm, interpolation='nearest', cmap=cmap)
+    plt.title(title)
+    plt.colorbar()
+    tick_marks = np.arange(len(classes))
+    plt.xticks(tick_marks, classes, rotation=45)
+    plt.yticks(tick_marks, classes)
+    fmt = '.2f' if normalize else 'd'
+    thresh = cm.max() / 2.
+    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
+        plt.text(j, i, format(cm[i, j], fmt),
+                 horizontalalignment="center",
+                 color="white" if cm[i, j] > thresh else "black")
+    plt.tight_layout()
+    plt.ylabel('True label')
+    plt.xlabel('Predicted label')
+from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
+from sklearn.linear_model import LogisticRegression
+#from sklearn.metrics import confusion_matrix
+parametersLR={ 'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
+              'C': [1, 0.5, 0.1, 0.01],
+              'fit_intercept': [True, False],
+              'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
+              'random_state':[10, 50, 100, 'none']
+}
+LR = LogisticRegression()
+#r = RandomizedSearchCV(LR,parametersLR)
+g=GridSearchCV(LR, parametersLR)
+g.fit(x_train, y_train)
+ypred = g.predict(x_test)
+ypred
+print (classification_report(y_test, ypred))
+l = {'Gender': [1],
+       'Married': [0],
+       'Dependents':[0],
+       'Education':[0],
+       'Self_Employed':[0],
+       'LoanAmount':[130],
+       'Loan_Amount_Term':[360],
+       'Credit_History':[1],
+       'Property_Area':[2],
+       'Total_income':[5849]
+       }
+df=pd.DataFrame(l)
+ans = g.predict(df)
+ans2 = ans.tolist()
+ans2[0]
+df
+def pred(Gender, Marital_Status, Dependents, Education, Self_Employed, Loan_Amount, Credit_History, Property_Area, Total_Income):
+  if Gender == "Male":
+    gen=1
+  elif Gender =="Female":
+    gen=0
+  if Marital_Status=="Married":
+    m=1
+  elif Marital_Status=="Unmarried":
+    m=0
+  if Dependents=="0":
+    d=0
+  elif Dependents=="1":
+    d=1
+  elif Dependents=="2":
+    d=2
+  elif Dependents=="3+":
+    d=3
+  if Education=="Educated":
+    e=1
+  elif Education == "Uneducated":
+    e=0
+  if Self_Employed=="Yes":
+    se=1
+  elif Self_Employed=="No":
+    se=0
+  if Credit_History=="1":
+    ch=1
+  elif Credit_History=="0":
+    ch=0
+  if Property_Area=="0":
+    pa=0
+  elif Property_Area=="1":
+    pa=1
+  elif Propert_Area=="2":
+    pa=2
+  l = {'Gender': [gen],
+       'Married': [m],
+       'Dependents':[d],
+       'Education':[e],
+       'Self_Employed':[se],
+       'LoanAmount':[Loan_Amount],
+       'Loan_Amount_Term':[360],
+       'Credit_History':[ch],
+       'Property_Area':[pa],
+       'Total_income':[Total_Income]
+       }
+  df=pd.DataFrame(l)
+  ans = g.predict(df)
+  ans2 = ans.tolist()
+  if ans2[0]=="Y":
+    return "Loan Status: Approved!"
+  elif ans2[0]=="N":
+    return "Loan Status: Disapproved"
+iface = gr.Interface(
+  fn=pred,
+  inputs=[gr.inputs.Radio(["Male", "Female"]), gr.inputs.Radio(["Married", "Unmarried"]),gr.inputs.Radio(["0", "1","2", "3+"]), gr.inputs.Radio(["Educated", "Uneducated"]), gr.inputs.Radio(["Yes", "No"]), "text", gr.inputs.Radio(["1", "0"]), gr.inputs.Radio(["0", "1", "2"]), "text"],
+  outputs="text")
+iface.launch(inline=False)