shivi's picture
Added all app files for gradio demo for Binary classification on structured data using GRN-VSN model
563baab
raw history blame
No virus
2.43 kB
import pandas as pd
from .preprocess import load_test_data
# Column names.
CSV_HEADER = [
"age",
"class_of_worker",
"detailed_industry_recode",
"detailed_occupation_recode",
"education",
"wage_per_hour",
"enroll_in_edu_inst_last_wk",
"marital_stat",
"major_industry_code",
"major_occupation_code",
"race",
"hispanic_origin",
"sex",
"member_of_a_labor_union",
"reason_for_unemployment",
"full_or_part_time_employment_stat",
"capital_gains",
"capital_losses",
"dividends_from_stocks",
"tax_filer_stat",
"region_of_previous_residence",
"state_of_previous_residence",
"detailed_household_and_family_stat",
"detailed_household_summary_in_household",
"instance_weight",
"migration_code-change_in_msa",
"migration_code-change_in_reg",
"migration_code-move_within_reg",
"live_in_this_house_1_year_ago",
"migration_prev_res_in_sunbelt",
"num_persons_worked_for_employer",
"family_members_under_18",
"country_of_birth_father",
"country_of_birth_mother",
"country_of_birth_self",
"citizenship",
"own_business_or_self_employed",
"fill_inc_questionnaire_for_veterans_admin",
"veterans_benefits",
"weeks_worked_in_year",
"year",
"income_level",
]
# Target feature name.
TARGET_FEATURE_NAME = "income_level"
# Weight column name.
WEIGHT_COLUMN_NAME = "instance_weight"
# Numeric feature names.
NUMERIC_FEATURE_NAMES = [
"age",
"wage_per_hour",
"capital_gains",
"capital_losses",
"dividends_from_stocks",
"num_persons_worked_for_employer",
"weeks_worked_in_year",
]
##Cols which will use "Number" component of gradio for taking user input
NUMBER_INPUT_COLS = ['age', 'num_persons_worked_for_employer','weeks_worked_in_year']
test_data = load_test_data()
CATEGORICAL_FEATURES_WITH_VOCABULARY = {
feature_name: sorted([str(value) for value in list(test_data[feature_name].unique())])
for feature_name in CSV_HEADER
if feature_name
not in list(NUMERIC_FEATURE_NAMES + [WEIGHT_COLUMN_NAME, TARGET_FEATURE_NAME])
}
# All features names.
FEATURE_NAMES = NUMERIC_FEATURE_NAMES + list(
CATEGORICAL_FEATURES_WITH_VOCABULARY.keys()
)
# Feature default values.
COLUMN_DEFAULTS = [
[0.0]
if feature_name in NUMERIC_FEATURE_NAMES + [TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME]
else ["NA"]
for feature_name in CSV_HEADER
]