Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
import tensorflow_decision_forests as tfdf | |
import tensorflow as tf | |
from tensorflow import keras | |
from tensorflow.keras import layers | |
import pandas as pd | |
import gradio as gr | |
import urllib | |
input_path = "https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census-income" | |
input_column_header = "income_level" | |
#Load data | |
BASE_PATH = input_path | |
CSV_HEADER = [ l.decode("utf-8").split(":")[0].replace(" ", "_") | |
for l in urllib.request.urlopen(f"{BASE_PATH}.names") | |
if not l.startswith(b"|")][2:] | |
CSV_HEADER.append(input_column_header) | |
train_data = pd.read_csv(f"{BASE_PATH}.data.gz", header=None, names=CSV_HEADER) | |
test_data = pd.read_csv(f"{BASE_PATH}.test.gz", header=None, names=CSV_HEADER) | |
#subset data | |
train_data = train_data.loc[:, ["education", "sex", "capital_gains", "capital_losses", "income_level"]] | |
test_data = test_data.loc[:, ["education", "sex", "capital_gains", "capital_losses", "income_level"]] | |
def encode_df(df): | |
sex_mapping = {" Male": 0, " Female": 1} | |
df = df.replace({"sex": sex_mapping}) | |
education_mapping = {" High school graduate": 1, " Some college but no degree": 2, | |
" 10th grade": 3, " Children": 4, " Bachelors degree(BA AB BS)": 5, | |
" Masters degree(MA MS MEng MEd MSW MBA)": 6, " Less than 1st grade": 7, | |
" Associates degree-academic program": 8, " 7th and 8th grade": 9, | |
" 12th grade no diploma": 10, " Associates degree-occup /vocational": 11, | |
" Prof school degree (MD DDS DVM LLB JD)": 12, " 5th or 6th grade": 13, | |
" 11th grade": 14, " Doctorate degree(PhD EdD)": 15, " 9th grade": 16, | |
" 1st 2nd 3rd or 4th grade": 17} | |
df = df.replace({"education": education_mapping}) | |
income_mapping = {' - 50000.': 0, ' 50000+.': 1} | |
df = df.replace({"income_level": income_mapping}) | |
return df | |
train_data = encode_df(train_data) | |
test_data = encode_df(test_data) | |
feature_a = tfdf.keras.FeatureUsage(name="education", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
feature_b = tfdf.keras.FeatureUsage(name="sex", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
feature_c = tfdf.keras.FeatureUsage(name="capital_gains", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
feature_d = tfdf.keras.FeatureUsage(name="capital_losses", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
# Convert the dataset into a TensorFlow dataset. | |
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_data, label="income_level") | |
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_data, label="income_level") | |
import logging | |
logging.getLogger("tensorflow").setLevel(logging.ERROR) | |
# Train a GB Trees model | |
model = tfdf.keras.GradientBoostedTreesModel( | |
features = [feature_a, feature_b, feature_c, feature_d], | |
exclude_non_specified_features = True, | |
growing_strategy = "BEST_FIRST_GLOBAL", | |
num_trees = 350, | |
max_depth = 7, | |
min_examples = 6, | |
subsample = 0.65, | |
sampling_method = "GOSS", | |
validation_ratio = 0.1, | |
task = tfdf.keras.Task.CLASSIFICATION, | |
loss = "DEFAULT", | |
verbose=0) | |
model.compile(metrics=[keras.metrics.BinaryAccuracy(name="accuracy")]) | |
model.fit(train_ds) | |
#model.evaluate(test_ds) | |
#prepare user input for the model | |
def process_inputs(education, sex, capital_gains, capital_losses): | |
df = pd.DataFrame.from_dict( | |
{ | |
"education": [education], | |
"sex": [sex], | |
"capital_gains": [capital_gains], | |
"capital_losses": [capital_losses] | |
} | |
) | |
df = encode_df(df) | |
feature_a = tfdf.keras.FeatureUsage(name="education", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
feature_b = tfdf.keras.FeatureUsage(name="sex", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
feature_c = tfdf.keras.FeatureUsage(name="capital_gains", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
feature_d = tfdf.keras.FeatureUsage(name="capital_losses", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
df = tfdf.keras.pd_dataframe_to_tf_dataset(df) | |
pred = model.predict(df) | |
if pred > .5: | |
pred_bi = 1 | |
return {"> $50,000": pred_bi} | |
else: | |
pred_bi = 0 | |
return {"<= $50,000": pred_bi} | |
iface = gr.Interface( | |
process_inputs, | |
[ | |
gr.inputs.Dropdown([" 1st 2nd 3rd or 4th grade", " High school graduate", | |
" Bachelors degree(BA AB BS)", " Masters degree(MA MS MEng MEd MSW MBA)", | |
" Prof school degree (MD DDS DVM LLB JD)", | |
" Doctorate degree(PhD EdD)"], type="index", label="education"), | |
gr.inputs.Radio([" Male", " Female"], label="Sex (M=0, F=1)", type="index"), | |
gr.inputs.Slider(minimum = 0, maximum = 99999, label="capital gains"), | |
gr.inputs.Slider(minimum = 0, maximum = 4608, label="capital losses") | |
], | |
gr.outputs.Label(num_top_classes=2), | |
live=True, | |
analytics_enabled=False, | |
examples=[ | |
[" Masters degree(MA MS MEng MEd MSW MBA)", 0, 7298, 0], | |
[" Bachelors degree(BA AB BS)", 0, 6514, 0], | |
], | |
) | |
iface.launch(debug=True) | |