Spaces:

zama-fhe
/

encrypted_credit_scoring

Running

App Files Files Community

encrypted_credit_scoring / settings.py

romanbredehoft-zama

First working demo with multi-inputs XGB

9a997e4 8 months ago

raw

history blame

No virus

2.48 kB

	"All constants used in the project."

	from pathlib import Path
	import pandas

	# The directory of this project
	REPO_DIR = Path(__file__).parent

	# This repository's main necessary directories
	DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
	FHE_KEYS = REPO_DIR / ".fhe_keys"
	CLIENT_FILES = REPO_DIR / "client_files"
	SERVER_FILES = REPO_DIR / "server_files"

	# Path targeting pre-processor saved files
	PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
	PRE_PROCESSOR_THIRD_PARTY_PATH = DEPLOYMENT_PATH / 'pre_processor_third_party.pkl'

	# Create the necessary directories
	FHE_KEYS.mkdir(exist_ok=True)
	CLIENT_FILES.mkdir(exist_ok=True)
	SERVER_FILES.mkdir(exist_ok=True)

	# Store the server's URL
	SERVER_URL = "http://localhost:8000/"

	# Path to data file
	# The data was previously cleaned using this notebook : https://www.kaggle.com/code/samuelcortinhas/credit-cards-data-cleaning
	# Additionally, the "ID" columns has been removed and the "Total_income" has been adjusted so that
	# its median value corresponds to France's 2023 median annual salary (22050 euros)
	DATA_PATH = "data/clean_data.csv"

	# Developement settings
	RANDOM_STATE = 0
	INITIAL_INPUT_SHAPE = (1, 49)

	CLIENT_TYPES = ["user", "bank", "third_party"]
	INPUT_INDEXES = {
	"user": 0,
	"bank": 1,
	"third_party": 2,
	}
	INPUT_SLICES = {
	"user": slice(0, 42), # First position: start from 0
	"bank": slice(42, 43), # Second position: start from n_feature_user
	"third_party": slice(43, 49), # Third position: start from n_feature_user + n_feature_bank
	}

	_data = pandas.read_csv(DATA_PATH, encoding="utf-8")

	def get_min_max(data, column):
	"""Get min/max values of a column in order to input them in Gradio's API as key arguments."""
	return {
	"minimum": int(data[column].min()),
	"maximum": int(data[column].max()),
	}

	# App data min and max values
	ACCOUNT_MIN_MAX = get_min_max(_data, "Account_length")
	CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
	INCOME_MIN_MAX = get_min_max(_data, "Total_income")
	AGE_MIN_MAX = get_min_max(_data, "Age")
	EMPLOYED_MIN_MAX = get_min_max(_data, "Years_employed")
	FAMILY_MIN_MAX = get_min_max(_data, "Num_family")

	# App data choices
	INCOME_TYPES = list(_data["Income_type"].unique())
	OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
	HOUSING_TYPES = list(_data["Housing_type"].unique())
	EDUCATION_TYPES = list(_data["Education_type"].unique())
	FAMILY_STATUS = list(_data["Family_status"].unique())