health_insurance_app / models /HealthInsurance.py
gilmar's picture
feat: health insurance app
ae43c28 verified
import pandas as pd
class HealthInsurance():
def __init__(self, model, column_transformer, bins_annual_premium_type):
"""model : the sklearn model already trainned.
colums_transformer : the column transformer with all transformations.
bins_annual_premium_type : bins to create annual_premium_type feature"""
self.model = model
self.transformer = column_transformer
self.bins_annual_premium_type = bins_annual_premium_type
def feature_engineering(self, df):
df[['previously_insured','vintage','age','driving_license']] = df[['previously_insured','vintage','age','driving_license']].astype(int)
df[['annual_premium','region_code','policy_sales_channel']] = df[['annual_premium','region_code','policy_sales_channel']].astype(float)
df['vehicle_age'] = df['vehicle_age'].apply(self.get_vehicle_age)
premium_categories = ['very_low', 'low', 'moderate', 'high', 'very_high']
df['annual_premium_type'] = pd.cut(x = df['annual_premium'],
bins = self.bins_annual_premium_type,
labels = premium_categories)
return df
def get_vehicle_age(self, vehicle_age):
vehicle_labels = {
'> 2 Years' : 'over_2_years',
'1-2 Year' : 'between_1_2_year',
'< 1 Year' : 'below_1_year'
}
return vehicle_labels.get(vehicle_age)
def data_preparation(self, df):
return self.transformer.transform(df)
def predict(self, df):
np_array = (df.pipe(self.feature_engineering)
.pipe(self.data_preparation)
)
df['score'] = self.model.predict_proba(np_array)[:, 1]
df.drop('annual_premium_type', axis=1, inplace=True)
return df