Spaces:

zama-fhe
/

encrypted_credit_scoring

Running on CPU Upgrade

App Files Files Community

encrypted_credit_scoring / development.py

romanbredehoft-zama

Rename to applicant and credit bureau

8e0d56d 10 months ago

raw

history blame

2.95 kB

	"""Train and compile the model."""

	import shutil
	import numpy
	import pandas
	import pickle

	from settings import (
	DEPLOYMENT_PATH,
	DATA_PATH,
	INPUT_SLICES,
	PRE_PROCESSOR_APPLICANT_PATH,
	PRE_PROCESSOR_BANK_PATH,
	PRE_PROCESSOR_CREDIT_BUREAU_PATH,
	APPLICANT_COLUMNS,
	BANK_COLUMNS,
	CREDIT_BUREAU_COLUMNS,
	)
	from utils.client_server_interface import MultiInputsFHEModelDev
	from utils.model import MultiInputDecisionTreeClassifier, MultiInputDecisionTreeRegressor
	from utils.pre_processing import get_pre_processors


	def get_multi_inputs(data):
	"""Get inputs for all three parties from the input data, using fixed slices.

	Args:
	data (numpy.ndarray): The input data to consider.

	Returns:
	(Tuple[numpy.ndarray]): The inputs for all three parties.
	"""
	return (
	data[:, INPUT_SLICES["applicant"]],
	data[:, INPUT_SLICES["bank"]],
	data[:, INPUT_SLICES["credit_bureau"]]
	)


	print("Load and pre-process the data")

	# Load the data
	data = pandas.read_csv(DATA_PATH, encoding="utf-8")

	# Define input and target data
	data_x = data.copy()
	data_y = data_x.pop("Target").copy().to_frame()

	# Get data from all parties
	data_applicant = data_x[APPLICANT_COLUMNS].copy()
	data_bank = data_x[BANK_COLUMNS].copy()
	data_credit_bureau = data_x[CREDIT_BUREAU_COLUMNS].copy()

	# Feature engineer the data
	pre_processor_applicant, pre_processor_bank, pre_processor_credit_bureau = get_pre_processors()

	preprocessed_data_applicant = pre_processor_applicant.fit_transform(data_applicant)
	preprocessed_data_bank = pre_processor_bank.fit_transform(data_bank)
	preprocessed_data_credit_bureau = pre_processor_credit_bureau.fit_transform(data_credit_bureau)

	preprocessed_data_x = numpy.concatenate((preprocessed_data_applicant, preprocessed_data_bank, preprocessed_data_credit_bureau), axis=1)


	print("\nTrain and compile the model")

	model = MultiInputDecisionTreeClassifier()

	model, sklearn_model = model.fit_benchmark(preprocessed_data_x, data_y)

	multi_inputs_train = get_multi_inputs(preprocessed_data_x)

	model.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])

	print("\nSave deployment files")

	# Delete the deployment folder and its content if it already exists
	if DEPLOYMENT_PATH.is_dir():
	shutil.rmtree(DEPLOYMENT_PATH)

	# Save files needed for deployment (and enable cross-platform deployment)
	fhe_model_dev = MultiInputsFHEModelDev(DEPLOYMENT_PATH, model)
	fhe_model_dev.save(via_mlir=True)

	# Save pre-processors
	with (
	PRE_PROCESSOR_APPLICANT_PATH.open('wb') as file_applicant,
	PRE_PROCESSOR_BANK_PATH.open('wb') as file_bank,
	PRE_PROCESSOR_CREDIT_BUREAU_PATH.open('wb') as file_credit_bureau,
	):
	pickle.dump(pre_processor_applicant, file_applicant)
	pickle.dump(pre_processor_bank, file_bank)
	pickle.dump(pre_processor_credit_bureau, file_credit_bureau)

	print("\nDone !")