Spaces:
Runtime error
Runtime error
import tensorflow as tf | |
import pandas as pd | |
from .constants import CSV_HEADER, TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME, NUMERIC_FEATURE_NAMES | |
##Helper functions for preprocessing of data: | |
def load_test_data(): | |
test_data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census-income.test.gz" | |
test_data = pd.read_csv(test_data_url, header=None, names=CSV_HEADER) | |
return test_data | |
test_data = load_test_data() | |
CATEGORICAL_FEATURES_WITH_VOCABULARY = { | |
feature_name: sorted([str(value) for value in list(test_data[feature_name].unique())]) | |
for feature_name in CSV_HEADER | |
if feature_name | |
not in list(NUMERIC_FEATURE_NAMES + [WEIGHT_COLUMN_NAME, TARGET_FEATURE_NAME]) | |
} | |
# All features names. | |
FEATURE_NAMES = NUMERIC_FEATURE_NAMES + list( | |
CATEGORICAL_FEATURES_WITH_VOCABULARY.keys() | |
) | |
# Feature default values. | |
COLUMN_DEFAULTS = [ | |
[0.0] | |
if feature_name in NUMERIC_FEATURE_NAMES + [TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME] | |
else ["NA"] | |
for feature_name in CSV_HEADER | |
] | |
def process(features, target): | |
for feature_name in features: | |
if feature_name in CATEGORICAL_FEATURES_WITH_VOCABULARY: | |
# Cast categorical feature values to string. | |
features[feature_name] = tf.cast(features[feature_name], tf.dtypes.string) | |
# Get the instance weight. | |
weight = features.pop(WEIGHT_COLUMN_NAME) | |
return features, target, weight | |
def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128): | |
dataset = tf.data.experimental.make_csv_dataset( | |
csv_file_path, | |
batch_size=batch_size, | |
column_names=CSV_HEADER, | |
column_defaults=COLUMN_DEFAULTS, | |
label_name=TARGET_FEATURE_NAME, | |
num_epochs=1, | |
header=False, | |
shuffle=shuffle, | |
).map(process) | |
return dataset | |
def create_max_values_map(): | |
max_values_map = {} | |
for col in NUMERIC_FEATURE_NAMES: | |
max_val = max(test_data[col]) | |
max_values_map["max_"+col] = max_val | |
return max_values_map | |
def create_dropdown_default_values_map(): | |
dropdown_default_values_map = {} | |
for col in CATEGORICAL_FEATURES_WITH_VOCABULARY.keys(): | |
max_val = test_data[col].max() | |
dropdown_default_values_map["max_"+col] = max_val | |
return dropdown_default_values_map | |
def create_sample_test_data(): | |
test_data["income_level"] = test_data["income_level"].apply( | |
lambda x: 0 if x == " - 50000." else 1) | |
sample_df = test_data.loc[:20,:] | |
sample_df_values = sample_df.values.tolist() | |
return sample_df_values | |