Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
"""Data pre-processing functions.""" | |
import numpy | |
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, StandardScaler | |
def _get_pipeline_replace_one_hot(func, value): | |
return Pipeline([ | |
("replace", FunctionTransformer( | |
func, | |
kw_args={"value": value}, | |
feature_names_out='one-to-one', | |
)), | |
("one_hot", OneHotEncoder(),), | |
]) | |
def _replace_values_eq(column, value): | |
for desired_value, values_to_replace in value.items(): | |
column = numpy.where(numpy.isin(column, values_to_replace), desired_value, column) | |
return column | |
def get_pre_processors(): | |
pre_processor_applicant = ColumnTransformer( | |
transformers=[ | |
( | |
"replace_occupation_type_labor", | |
_get_pipeline_replace_one_hot( | |
_replace_values_eq, | |
{ | |
"Labor_work": [ | |
"Cooking Staff", "Carpenter", "Plumber", "Factory Worker", "Bus Driver" | |
], | |
"Office_work": [ | |
"Business Owners", "Office Worker", "Accountant", "Entrepreneur", "Salesperson" | |
], | |
"High_tech_work": ["Engineer", "Manager", "Consultant", "Software Developer"], | |
}, | |
), | |
['Occupation_type'] | |
), | |
('one_hot_others', OneHotEncoder(), ['Housing_type', 'Family_status', 'Education_type', 'Income_type']), | |
('standard_scaler', StandardScaler(), ['Num_children', 'Household_size', 'Total_income', 'Age']), | |
], | |
remainder='passthrough', | |
verbose_feature_names_out=False, | |
) | |
pre_processor_bank = ColumnTransformer( | |
transformers=[ | |
('standard_scaler', StandardScaler(), ['Account_age']), | |
], | |
remainder='passthrough', | |
verbose_feature_names_out=False, | |
) | |
pre_processor_credit_bureau = ColumnTransformer( | |
transformers=[], | |
remainder='passthrough', | |
verbose_feature_names_out=False, | |
) | |
return pre_processor_applicant, pre_processor_bank, pre_processor_credit_bureau |