titanic-feature / app.py
davidt123's picture
Update app.py
f5e2057
raw
history blame contribute delete
No virus
4.17 kB
import gradio as gr
import numpy as np
#from PIL import Image
#import requests
import random
import hopsworks
import joblib
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
project = hopsworks.login()
fs = project.get_feature_store()
mr = project.get_model_registry()
model = mr.get_model("titanic_modal_RF", version=1)
model_dir = model.download()
model = joblib.load(model_dir + "/titanic_model_RF.pkl")
def encode_features(df):
non_numeric_features = ['embarked', 'sex', 'deck', 'title', 'family_size_grouped']
for feature in non_numeric_features:
df[feature] = LabelEncoder().fit_transform(df[feature])
cat_features = ['pclass', 'sex', 'deck', 'embarked', 'title', 'family_size_grouped']
encoded_features = []
for feature in cat_features:
encoded_feat = OneHotEncoder().fit_transform(df[feature].values.reshape(-1, 1)).toarray()
n = df[feature].nunique()
cols = ['{}_{}'.format(feature, n) for n in range(1, n + 1)]
encoded_df = pd.DataFrame(encoded_feat, columns=cols)
encoded_df.index = df.index
encoded_features.append(encoded_df)
df = pd.concat([df, *encoded_features], axis=1)
drop_cols = ['deck', 'embarked', 'family', 'family_size', 'family_size_grouped',
'name', 'parch', 'pclass', 'sex', 'sibsp', 'ticket', 'title']
df.drop(columns=drop_cols, inplace=True)
return df #Return a df with encoded labels
def titanic(age, deck, embarked, name, family_size, family_size_grouped, fare, is_married, parch, sibsp, sex, ticket_number, title):
pclass = 0
if deck == "ABC":
pclass = 1
elif deck == "DE":
pclass = random.choice([2,3])
elif deck == "FG":
pclass = random.choice([2,3])
else:
pclass = 3
df = pd.DataFrame({
"age" : [age],
"deck" : [deck],
"embarked" : [embarked],
"family" : [name.split()[1]],
"family_size" : [family_size],
"family_size_grouped" : [family_size_grouped],
"fare" : [fare],
"is_married" : [is_married],
"name" : [name.split()[1] + ", " + name.split()[0]],
"parch" : [parch],
"pclass" : [pclass],
"sex" : [sex],
"sibsp" : [sibsp],
"ticket" : [str(ticket_number)],
"ticket_frequency" : [1],
"title" : [title]
})
return df
def predictor(age, deck, embarked, name, family_size, family_size_grouped, fare, is_married, parch, sibsp, sex, ticket_number, title):
feature_view = fs.get_feature_view(name="titanic_modal_2", version=1)
batch_data = feature_view.get_batch_data()
row = titanic(age, deck, embarked, name, family_size, family_size_grouped, fare, is_married, parch, sibsp, sex, ticket_number, title)
batch_data = batch_data.append(row)
encoded = encode_features(batch_data)
y_pred = model.predict(encoded)
prediction = y_pred[y_pred.size-1]
if prediction == 0:
survived = "This person survived"
if prediction == 1:
survived = "This person did not survive"
return survived
demo = gr.Interface(
fn=predictor,
title="Titanic Survivor Predictive Analytics",
description="Titanic survivor experiment.",
allow_flagging="never",
inputs=[
gr.inputs.Number(label="age (10-99)"),
gr.inputs.Textbox(label="Deck (ABC, DE, FG, or M)"),
gr.inputs.Textbox(label="Embarked (S, C, or Q)"),
gr.inputs.Textbox(label="Name of person"),
#gr.inputs.Textbox(label="Family name"),
gr.inputs.Number(label="Size of family (integer)"),
gr.inputs.Textbox(label="Family Size (Alone, Small, Medium or Large)"),
gr.inputs.Number(label="Fare (Integer)"),
gr.inputs.Number(label="Is the person married (0 for no, 1 for yes)"),
gr.inputs.Number(label="Number of Parent/Children onboard (Integer)"),
gr.inputs.Number(label="Number of siblings onboard (Integer)"),
gr.inputs.Textbox(label="Sex"),
gr.inputs.Number(label="Ticket number"),
gr.inputs.Textbox(label="Title (Mr, Miss, etc)"),
],
outputs="text")
demo.launch()