Spaces:
Sleeping
Sleeping
| from sklearn.preprocessing import LabelEncoder | |
| def feature_engineering(df): | |
| # Loan_ID is just an identifier, so we remove it | |
| if "Loan_ID" in df.columns: | |
| df = df.drop("Loan_ID", axis=1) | |
| # create some useful new features | |
| df["Total_Income"] = df["ApplicantIncome"] + df["CoapplicantIncome"] | |
| df["EMI"] = (df["LoanAmount"] * 1000) / df["Loan_Amount_Term"] | |
| df["Balance_Income"] = df["Total_Income"] - df["EMI"] | |
| return df | |
| def encode_data(df): | |
| encoders = {} | |
| # convert target (Y/N) into numeric | |
| target_encoder = LabelEncoder() | |
| df["Loan_Status"] = target_encoder.fit_transform(df["Loan_Status"]) | |
| # encode categorical columns | |
| cols = ["Gender", "Married", "Dependents", "Education", "Self_Employed", "Property_Area"] | |
| for col in cols: | |
| le = LabelEncoder() | |
| df[col] = le.fit_transform(df[col].astype(str)) | |
| encoders[col] = le | |
| return df, encoders, target_encoder |