Spaces:
Sleeping
Sleeping
feat: health insurance app
Browse files- .gitignore +1 -0
- app.py +53 -4
- models/HealthInsurance.py +10 -7
- models/__pycache__/HealthInsurance.cpython-310.pyc +0 -0
- requirements.txt +50 -5
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
flagged/log.csv
|
app.py
CHANGED
@@ -1,7 +1,56 @@
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
-
def
|
4 |
-
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import joblib
|
2 |
import gradio as gr
|
3 |
+
from models import HealthInsurance
|
4 |
|
5 |
+
def load_data():
|
6 |
+
global _model
|
7 |
+
global _column_transformer
|
8 |
+
global _bins_annual_premium_type
|
9 |
|
10 |
+
_model = joblib.load(filename = 'parameters/random_forrest.gz')
|
11 |
+
_column_transformer = joblib.load(filename = 'parameters/column_transformer.joblib')
|
12 |
+
_bins_annual_premium_type = joblib.load(filename = 'parameters/bins_annual_premium_type.joblib')
|
13 |
+
|
14 |
+
def predict(df):
|
15 |
+
|
16 |
+
health_insurance = HealthInsurance(_model,_column_transformer,
|
17 |
+
_bins_annual_premium_type)
|
18 |
+
|
19 |
+
df_predicted = health_insurance.predict(df)
|
20 |
+
|
21 |
+
return df_predicted
|
22 |
+
|
23 |
+
def input():
|
24 |
+
return gr.Dataframe(headers = ['previously_insured',
|
25 |
+
'annual_premium','vintage','gender',
|
26 |
+
'age','region_code','policy_sales_channel',
|
27 |
+
'driving_license','vehicle_age',
|
28 |
+
'vehicle_damage'],
|
29 |
+
datatype = ['number','number','number','str','number',
|
30 |
+
'number','number','number','str','str'],
|
31 |
+
row_count= 1,
|
32 |
+
col_count= (10,'fixed'),
|
33 |
+
type = 'pandas',
|
34 |
+
label = 'Input')
|
35 |
+
|
36 |
+
def output():
|
37 |
+
return gr.Dataframe(headers = ['previously_insured',
|
38 |
+
'annual_premium','vintage','gender',
|
39 |
+
'age','region_code','policy_sales_channel',
|
40 |
+
'driving_license','vehicle_age',
|
41 |
+
'vehicle_damage', 'score'],
|
42 |
+
datatype = ['number','number','number','str','number',
|
43 |
+
'number','number','number','str','str',
|
44 |
+
'number'],
|
45 |
+
type = 'pandas',
|
46 |
+
label = 'Output')
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
|
50 |
+
load_data()
|
51 |
+
iface = gr.Interface(fn=predict,
|
52 |
+
inputs=input(),
|
53 |
+
outputs=output(),
|
54 |
+
title='Health Insurance App'
|
55 |
+
)
|
56 |
+
iface.launch()
|
models/HealthInsurance.py
CHANGED
@@ -15,9 +15,13 @@ class HealthInsurance():
|
|
15 |
|
16 |
|
17 |
def feature_engineering(self, df):
|
18 |
-
|
|
|
|
|
19 |
|
20 |
df['vehicle_age'] = df['vehicle_age'].apply(self.get_vehicle_age)
|
|
|
|
|
21 |
df['annual_premium_type'] = pd.cut(x = df['annual_premium'],
|
22 |
bins = self.bins_annual_premium_type,
|
23 |
labels = premium_categories)
|
@@ -36,16 +40,15 @@ class HealthInsurance():
|
|
36 |
def data_preparation(self, df):
|
37 |
return self.transformer.transform(df)
|
38 |
|
39 |
-
def predict(self,
|
40 |
-
|
41 |
-
|
42 |
np_array = (df.pipe(self.feature_engineering)
|
43 |
.pipe(self.data_preparation)
|
44 |
)
|
45 |
|
46 |
-
df['score'] = self.model.predict_proba(np_array)[:, 1]
|
47 |
-
|
48 |
-
return df
|
49 |
|
50 |
|
51 |
|
|
|
15 |
|
16 |
|
17 |
def feature_engineering(self, df):
|
18 |
+
|
19 |
+
df[['previously_insured','vintage','age','driving_license']] = df[['previously_insured','vintage','age','driving_license']].astype(int)
|
20 |
+
df[['annual_premium','region_code','policy_sales_channel']] = df[['annual_premium','region_code','policy_sales_channel']].astype(float)
|
21 |
|
22 |
df['vehicle_age'] = df['vehicle_age'].apply(self.get_vehicle_age)
|
23 |
+
|
24 |
+
premium_categories = ['very_low', 'low', 'moderate', 'high', 'very_high']
|
25 |
df['annual_premium_type'] = pd.cut(x = df['annual_premium'],
|
26 |
bins = self.bins_annual_premium_type,
|
27 |
labels = premium_categories)
|
|
|
40 |
def data_preparation(self, df):
|
41 |
return self.transformer.transform(df)
|
42 |
|
43 |
+
def predict(self, df):
|
44 |
+
|
|
|
45 |
np_array = (df.pipe(self.feature_engineering)
|
46 |
.pipe(self.data_preparation)
|
47 |
)
|
48 |
|
49 |
+
df['score'] = self.model.predict_proba(np_array)[:, 1]
|
50 |
+
df.drop('annual_premium_type', axis=1, inplace=True)
|
51 |
+
return df
|
52 |
|
53 |
|
54 |
|
models/__pycache__/HealthInsurance.cpython-310.pyc
CHANGED
Binary files a/models/__pycache__/HealthInsurance.cpython-310.pyc and b/models/__pycache__/HealthInsurance.cpython-310.pyc differ
|
|
requirements.txt
CHANGED
@@ -1,23 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
category-encoders==2.5.0
|
|
|
|
|
|
|
2 |
click==8.1.3
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
Jinja2==3.1.2
|
7 |
joblib==1.1.0
|
|
|
|
|
|
|
8 |
MarkupSafe==2.1.1
|
|
|
|
|
|
|
|
|
|
|
9 |
numpy==1.23.2
|
|
|
10 |
packaging==21.3
|
11 |
pandas==1.4.4
|
|
|
12 |
patsy==0.5.2
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
pyparsing==3.0.9
|
14 |
python-dateutil==2.8.2
|
|
|
15 |
pytz==2022.2.1
|
|
|
|
|
16 |
scikit-learn==1.1.2
|
17 |
scipy==1.9.1
|
18 |
six==1.16.0
|
19 |
sklearn==0.0
|
|
|
|
|
20 |
statsmodels==0.13.2
|
21 |
threadpoolctl==3.1.0
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.1
|
2 |
+
aiosignal==1.2.0
|
3 |
+
analytics-python==1.4.0
|
4 |
+
anyio==3.6.1
|
5 |
+
async-timeout==4.0.2
|
6 |
+
attrs==22.1.0
|
7 |
+
backoff==1.10.0
|
8 |
+
bcrypt==4.0.0
|
9 |
category-encoders==2.5.0
|
10 |
+
certifi==2022.6.15
|
11 |
+
cffi==1.15.1
|
12 |
+
charset-normalizer==2.1.1
|
13 |
click==8.1.3
|
14 |
+
cryptography==38.0.1
|
15 |
+
cycler==0.11.0
|
16 |
+
fastapi==0.82.0
|
17 |
+
ffmpy==0.3.0
|
18 |
+
fonttools==4.37.1
|
19 |
+
frozenlist==1.3.1
|
20 |
+
fsspec==2022.8.2
|
21 |
+
gradio==3.2
|
22 |
+
h11==0.12.0
|
23 |
+
httpcore==0.15.0
|
24 |
+
httpx==0.23.0
|
25 |
+
idna==3.3
|
26 |
Jinja2==3.1.2
|
27 |
joblib==1.1.0
|
28 |
+
kiwisolver==1.4.4
|
29 |
+
linkify-it-py==1.0.3
|
30 |
+
markdown-it-py==2.1.0
|
31 |
MarkupSafe==2.1.1
|
32 |
+
matplotlib==3.5.3
|
33 |
+
mdit-py-plugins==0.3.0
|
34 |
+
mdurl==0.1.2
|
35 |
+
monotonic==1.6
|
36 |
+
multidict==6.0.2
|
37 |
numpy==1.23.2
|
38 |
+
orjson==3.8.0
|
39 |
packaging==21.3
|
40 |
pandas==1.4.4
|
41 |
+
paramiko==2.11.0
|
42 |
patsy==0.5.2
|
43 |
+
Pillow==9.2.0
|
44 |
+
pycparser==2.21
|
45 |
+
pycryptodome==3.15.0
|
46 |
+
pydantic==1.10.2
|
47 |
+
pydub==0.25.1
|
48 |
+
PyNaCl==1.5.0
|
49 |
pyparsing==3.0.9
|
50 |
python-dateutil==2.8.2
|
51 |
+
python-multipart==0.0.5
|
52 |
pytz==2022.2.1
|
53 |
+
requests==2.28.1
|
54 |
+
rfc3986==1.5.0
|
55 |
scikit-learn==1.1.2
|
56 |
scipy==1.9.1
|
57 |
six==1.16.0
|
58 |
sklearn==0.0
|
59 |
+
sniffio==1.3.0
|
60 |
+
starlette==0.19.1
|
61 |
statsmodels==0.13.2
|
62 |
threadpoolctl==3.1.0
|
63 |
+
typing_extensions==4.3.0
|
64 |
+
uc-micro-py==1.0.1
|
65 |
+
urllib3==1.26.12
|
66 |
+
uvicorn==0.18.3
|
67 |
+
websockets==10.3
|
68 |
+
yarl==1.8.1
|