Spaces:

zama-fhe
/

encrypted_credit_scoring

Running on CPU Upgrade

App Files Files Community

encrypted_credit_scoring / development /development.py

romanbredehoft-zama

Adding deployment files and updating app

c119738 about 1 year ago

raw

history blame

1.85 kB

	"A script to generate all development files necessary for the project."

	import shutil
	import numpy
	import pandas

	from sklearn.model_selection import train_test_split
	from imblearn.over_sampling import SMOTE

	from ..settings import DEPLOYMENT_PATH, RANDOM_STATE
	from client_server_interface import MultiInputsFHEModelDev
	from model import MultiInputXGBClassifier
	from development.pre_processing import pre_process_data


	print("Load and pre-process the data")

	data = pandas.read_csv("data/clean_data.csv", encoding="utf-8")

	# Make median annual salary similar to France (2023): from 157500 to 22050
	data["Total_income"] = data["Total_income"] * 0.14

	# Remove ID feature
	data.drop("ID", axis=1, inplace=True)

	# Feature engineer the data
	pre_processed_data, training_bins = pre_process_data(data)

	# Define input and target data
	y = pre_processed_data.pop("Target")
	x = pre_processed_data

	# The initial data-set is very imbalanced: use SMOTE to get better results
	x, y = SMOTE().fit_resample(x, y)

	# Retrieve the training data
	X_train, _, y_train, _ = train_test_split(
	x, y, stratify=y, test_size=0.3, random_state=RANDOM_STATE
	)

	# Convert the Pandas data frames into Numpy arrays
	X_train_np = X_train.to_numpy()
	y_train_np = y_train.to_numpy()


	print("Train and compile the model")

	model = MultiInputXGBClassifier(max_depth=3, n_estimators=40)

	model.fit(X_train_np, y_train_np)

	multi_inputs_train = numpy.array_split(X_train_np, 3, axis=1)

	model.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])

	# Delete the deployment folder and its content if it already exists
	if DEPLOYMENT_PATH.is_dir():
	shutil.rmtree(DEPLOYMENT_PATH)


	print("Save deployment files")

	# Save the files needed for deployment
	fhe_dev = MultiInputsFHEModelDev(model, DEPLOYMENT_PATH)
	fhe_dev.save()

	print("Done !")