Spaces:

zama-fhe
/

encrypted_credit_scoring

Running

App Files Files Community

romanbredehoft-zama commited on Jan 11

Commit

8e0d56d

•

1 Parent(s): 61cd73f

Rename to applicant and credit bureau

Browse files

Files changed (11) hide show

app.py +27 -26
backend.py +34 -33
deployment_files/model/{pre_processor_user.pkl → pre_processor_applicant.pkl} +0 -0
deployment_files/model/{pre_processor_cs_agency.pkl → pre_processor_credit_bureau.pkl} +0 -0
deployment_files/{pre_processor_user.pkl → pre_processor_applicant.pkl} +0 -0
deployment_files/{pre_processor_cs_agency.pkl → pre_processor_credit_bureau.pkl} +0 -0
development.py +16 -16
server.py +2 -2
settings.py +10 -10
utils/client_server_interface.py +2 -2
utils/pre_processing.py +3 -3

app.py CHANGED Viewed

@@ -22,9 +22,9 @@ from settings import (
 )
 from backend import (
     keygen_send,
-    pre_process_encrypt_send_user,
     pre_process_encrypt_send_bank,
-    pre_process_encrypt_send_cs_agency,
     run_fhe,
     get_output_and_decrypt,
     explain_encrypt_run_decrypt,
@@ -61,7 +61,7 @@ with demo:
         """
     )
-    gr.Markdown("# Client, Bank and Credit Scoring Agency setup")
     gr.Markdown("## Step 1: Generate the keys.")
     gr.Markdown(
@@ -90,10 +90,11 @@ with demo:
         """
         Select the information that corresponds to the profile you want to evaluate. Three sources
         of information are represented in this model:
-        - a user's personal information in order to evaluate his/her credit card eligibility;
-        - the user’s bank account history, which provides any type of information on the user's
-        banking information relevant to the decision (here, we consider duration of account);
-        - and credit scoring agency information, which represents any other information (here,
         employment history) that could provide additional insight relevant to the decision.
         Please always encrypt and send the values (through the buttons on the right) once updated
@@ -103,7 +104,7 @@ with demo:
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### User")
             bool_inputs = gr.CheckboxGroup(
                 ["Car", "Property", "Mobile phone"],
                 label="Which of the following do you actively hold or own?"
@@ -167,15 +168,15 @@ with demo:
             )
         with gr.Column():
-            encrypt_button_user = gr.Button("Encrypt the inputs and send to server.")
-            encrypted_input_user = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
     with gr.Row():
         with gr.Column(scale=2):
-            gr.Markdown("### Bank ")
             account_age = gr.Slider(
                 **ACCOUNT_MIN_MAX,
                 step=1,
@@ -192,7 +193,7 @@ with demo:
     with gr.Row():
         with gr.Column(scale=2):
-            gr.Markdown("### Credit Scoring Agency ")
             employed = gr.Radio(["Yes", "No"], label="Is the person employed ?", value="Yes")
             years_employed = gr.Dropdown(
                 choices=YEARS_EMPLOYED_BINS,
@@ -202,19 +203,19 @@ with demo:
             )
         with gr.Column():
-            encrypt_button_cs_agency = gr.Button("Encrypt the inputs and send to server.")
-            encrypted_input_cs_agency = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
-    # Button to pre-process, generate the key, encrypt and send the user inputs from the client
     # side to the server
-    encrypt_button_user.click(
-        pre_process_encrypt_send_user,
         inputs=[client_id, bool_inputs, num_children, household_size, total_income, age, \
                 income_type, education_type, family_status, occupation_type, housing_type],
-        outputs=[encrypted_input_user],
     )
     # Button to pre-process, generate the key, encrypt and send the bank inputs from the client
@@ -225,12 +226,12 @@ with demo:
         outputs=[encrypted_input_bank],
     )
-    # Button to pre-process, generate the key, encrypt and send the credit scoring agency inputs from the
     # client side to the server
-    encrypt_button_cs_agency.click(
-        pre_process_encrypt_send_cs_agency,
         inputs=[client_id, years_employed, employed],
-        outputs=[encrypted_input_cs_agency],
     )
     gr.Markdown("# Server side")
@@ -253,10 +254,10 @@ with demo:
     # Button to send the encodings to the server using post method
     execute_fhe_button.click(run_fhe, inputs=[client_id], outputs=[fhe_execution_time])
-    gr.Markdown("# Client, Bank and Credit Scoring Agency decryption")
     gr.Markdown(
         """
-        Once the server completed the inference, the encrypted output is returned to the user.
         The three entities that provide the information to compute the credit score are the only
         ones that can decrypt the result. They take part in a decryption protocol that allows to
@@ -269,7 +270,7 @@ with demo:
         """
         The first value displayed below is a shortened byte representation of the actual encrypted
         output.
-        The user is then able to decrypt the value using its private key.
         """
     )
@@ -291,7 +292,7 @@ with demo:
     gr.Markdown("## Step 6 (optional): Explain the prediction.")
     gr.Markdown(
         """
-        In case the credit card is likely to be denied, the user can ask for how many years of
         employment would most likely be required in order to increase the chance of getting a
         credit card approval.

 )
 from backend import (
     keygen_send,
+    pre_process_encrypt_send_applicant,
     pre_process_encrypt_send_bank,
+    pre_process_encrypt_send_credit_bureau,
     run_fhe,
     get_output_and_decrypt,
     explain_encrypt_run_decrypt,
         """
     )
+    gr.Markdown("# Applicant, Bank and Credit bureau setup")
     gr.Markdown("## Step 1: Generate the keys.")
     gr.Markdown(
         """
         Select the information that corresponds to the profile you want to evaluate. Three sources
         of information are represented in this model:
+        - the applicant's personal information in order to evaluate his/her credit card eligibility;
+        - the applicant bank account history, which provides any type of information on the
+        applicant's banking information relevant to the decision (here, we consider duration of
+        account);
+        - and credit bureau information, which represents any other information (here,
         employment history) that could provide additional insight relevant to the decision.
         Please always encrypt and send the values (through the buttons on the right) once updated
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### Applicant information")
             bool_inputs = gr.CheckboxGroup(
                 ["Car", "Property", "Mobile phone"],
                 label="Which of the following do you actively hold or own?"
             )
         with gr.Column():
+            encrypt_button_applicant = gr.Button("Encrypt the inputs and send to server.")
+            encrypted_input_applicant = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
     with gr.Row():
         with gr.Column(scale=2):
+            gr.Markdown("### Bank information")
             account_age = gr.Slider(
                 **ACCOUNT_MIN_MAX,
                 step=1,
     with gr.Row():
         with gr.Column(scale=2):
+            gr.Markdown("### Credit bureau information ")
             employed = gr.Radio(["Yes", "No"], label="Is the person employed ?", value="Yes")
             years_employed = gr.Dropdown(
                 choices=YEARS_EMPLOYED_BINS,
             )
         with gr.Column():
+            encrypt_button_credit_bureau = gr.Button("Encrypt the inputs and send to server.")
+            encrypted_input_credit_bureau = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
+    # Button to pre-process, generate the key, encrypt and send the applicant inputs from the client
     # side to the server
+    encrypt_button_applicant.click(
+        pre_process_encrypt_send_applicant,
         inputs=[client_id, bool_inputs, num_children, household_size, total_income, age, \
                 income_type, education_type, family_status, occupation_type, housing_type],
+        outputs=[encrypted_input_applicant],
     )
     # Button to pre-process, generate the key, encrypt and send the bank inputs from the client
         outputs=[encrypted_input_bank],
     )
+    # Button to pre-process, generate the key, encrypt and send the credit bureau inputs from the
     # client side to the server
+    encrypt_button_credit_bureau.click(
+        pre_process_encrypt_send_credit_bureau,
         inputs=[client_id, years_employed, employed],
+        outputs=[encrypted_input_credit_bureau],
     )
     gr.Markdown("# Server side")
     # Button to send the encodings to the server using post method
     execute_fhe_button.click(run_fhe, inputs=[client_id], outputs=[fhe_execution_time])
+    gr.Markdown("# Applicant, Bank and Credit bureau decryption")
     gr.Markdown(
         """
+        Once the server completed the inference, the encrypted output is returned to the applicant.
         The three entities that provide the information to compute the credit score are the only
         ones that can decrypt the result. They take part in a decryption protocol that allows to
         """
         The first value displayed below is a shortened byte representation of the actual encrypted
         output.
+        The applicant is then able to decrypt the value using its private key.
         """
     )
     gr.Markdown("## Step 6 (optional): Explain the prediction.")
     gr.Markdown(
         """
+        In case the credit card is likely to be denied, the applicant can ask for how many years of
         employment would most likely be required in order to increase the chance of getting a
         credit card approval.

backend.py CHANGED Viewed

@@ -18,13 +18,13 @@ from settings import (
     PROCESSED_INPUT_SHAPE,
     INPUT_INDEXES,
     INPUT_SLICES,
-    PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_BANK_PATH,
-    PRE_PROCESSOR_CS_AGENCY_PATH,
     CLIENT_TYPES,
-    USER_COLUMNS,
     BANK_COLUMNS,
-    CS_AGENCY_COLUMNS,
     YEARS_EMPLOYED_BINS,
     YEARS_EMPLOYED_BIN_NAME_TO_INDEX,
 )
@@ -37,13 +37,13 @@ DENIED_MESSAGE = "Credit card is likely to be denied ❌"
 # Load pre-processor instances
 with (
-    PRE_PROCESSOR_USER_PATH.open('rb') as file_user,
     PRE_PROCESSOR_BANK_PATH.open('rb') as file_bank,
-    PRE_PROCESSOR_CS_AGENCY_PATH.open('rb') as file_cs_agency,
 ):
-    PRE_PROCESSOR_USER = pickle.load(file_user)
     PRE_PROCESSOR_BANK = pickle.load(file_bank)
-    PRE_PROCESSOR_CS_AGENCY = pickle.load(file_cs_agency)
 def shorten_bytes_object(bytes_object, limit=500):
@@ -114,8 +114,8 @@ def _get_client_file_path(name, client_id, client_type=None):
         name (str): The desired file name (either 'evaluation_key', 'encrypted_inputs' or
             'encrypted_outputs').
         client_id (int): The client ID to consider.
-        client_type (Optional[str]): The type of user to consider (either 'user', 'bank',
-            'cs_agency' or None). Default to None, which is used for evaluation key and output.
     Returns:
         pathlib.Path: The file path.
@@ -135,8 +135,8 @@ def _send_to_server(client_id, client_type, file_name):
     Args:
         client_id (int): The client ID to consider.
-        client_type (Optional[str]): The type of client to consider (either 'user', 'bank',
-            'cs_agency' or None).
         file_name (str): File name to send (either 'evaluation_key' or 'encrypted_inputs').
     """
     # Get the paths to the encrypted inputs
@@ -208,7 +208,8 @@ def _encrypt_send(client_id, inputs, client_type):
     Args:
         client_id (str): The current client ID to consider.
         inputs (numpy.ndarray): The inputs to encrypt.
-        client_type (str): The type of client to consider (either 'user', 'bank' or 'cs_agency').
     Returns:
         encrypted_inputs_short (str): A short representation of the encrypted input to send in hex.
@@ -244,8 +245,8 @@ def _encrypt_send(client_id, inputs, client_type):
     return encrypted_inputs_short
-def pre_process_encrypt_send_user(client_id, *inputs):
-    """Pre-process, encrypt and send the user inputs for a specific client to the server.
     Args:
         client_id (str): The current client ID to consider.
@@ -262,7 +263,7 @@ def pre_process_encrypt_send_user(client_id, *inputs):
     own_property = "Property" in bool_inputs
     mobile_phone = "Mobile phone" in bool_inputs
-    user_inputs = pandas.DataFrame({
         "Own_car": [own_car],
         "Own_property": [own_property],
         "Mobile_phone": [mobile_phone],
@@ -277,11 +278,11 @@ def pre_process_encrypt_send_user(client_id, *inputs):
         "Housing_type": [housing_type],
     })
-    user_inputs = user_inputs.reindex(USER_COLUMNS, axis=1)
-    preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
-    return _encrypt_send(client_id, preprocessed_user_inputs, "user")
 def pre_process_encrypt_send_bank(client_id, *inputs):
@@ -307,8 +308,8 @@ def pre_process_encrypt_send_bank(client_id, *inputs):
     return _encrypt_send(client_id, preprocessed_bank_inputs, "bank")
-def pre_process_encrypt_send_cs_agency(client_id, *inputs):
-    """Pre-process, encrypt and send the credit scoring agency inputs for a specific client to the server.
     Args:
         client_id (str): The current client ID to consider.
@@ -322,15 +323,15 @@ def pre_process_encrypt_send_cs_agency(client_id, *inputs):
     years_employed = YEARS_EMPLOYED_BIN_NAME_TO_INDEX[years_employed_bin]
     is_employed = employed == "Yes"
-    cs_agency_inputs = pandas.DataFrame({
         "Years_employed": [years_employed],
         "Employed": [is_employed],
     })
-    cs_agency_inputs = cs_agency_inputs.reindex(CS_AGENCY_COLUMNS, axis=1)
-    preprocessed_cs_agency_inputs = PRE_PROCESSOR_CS_AGENCY.transform(cs_agency_inputs)
-    return _encrypt_send(client_id, preprocessed_cs_agency_inputs, "cs_agency")
 def run_fhe(client_id):
@@ -426,7 +427,7 @@ def explain_encrypt_run_decrypt(client_id, prediction_output, *inputs):
             "Explaining the prediction can only be done if the credit card is likely to be denied."
         )
-    # Retrieve the credit scoring agency inputs
     years_employed, employed = inputs
     # Years_employed is divided into several ordered bins. Here, we retrieve the index representing
@@ -435,14 +436,14 @@ def explain_encrypt_run_decrypt(client_id, prediction_output, *inputs):
     # If the bin is not the last (representing the most years of employment), we run the model in
     # FHE for each bins "older" or equal to the given bin, in order. Then, we retrieve the first
-    # bin that changes the model's prediction to "approval" and display it to the user.
     if bin_index != len(YEARS_EMPLOYED_BINS) - 1:
         # Loop over the bins starting with "older" or equal to the given bin
         for years_employed_bin in YEARS_EMPLOYED_BINS[bin_index:]:
             # Send the new encrypted input
-            pre_process_encrypt_send_cs_agency(client_id, years_employed_bin, employed)
             # Run the model in FHE
             run_fhe(client_id)
@@ -450,16 +451,16 @@ def explain_encrypt_run_decrypt(client_id, prediction_output, *inputs):
             # Retrieve the new prediction
             output_prediction = get_output_and_decrypt(client_id)
-            # If the bin made the model predict an approval, share it to the user
             if "approved" in output_prediction[0]:
-                # If the approval was made using the given input, that means the user most likely
-                # tried the bin suggested in a previous explainability run. In that case, we
                 # confirm that the credit card is likely to be approved
                 if years_employed_bin == years_employed:
                     return APPROVED_MESSAGE
-                # Else, that means the users is looking for some explainability. We therefore
                 # suggest to try the obtained bin
                 return (
                     DENIED_MESSAGE + f" However, having at least {years_employed_bin} years of "
@@ -474,7 +475,7 @@ def explain_encrypt_run_decrypt(client_id, prediction_output, *inputs):
             "bigger impact in this particular case."
         )
-    # In case the user tried the "oldest" bin (but still got denied), explain why
     return (
         DENIED_MESSAGE + " Unfortunately, you already have the maximum amount of years of "
         f"employment ({years_employed} years). Other inputs like the income or the account's age "

     PROCESSED_INPUT_SHAPE,
     INPUT_INDEXES,
     INPUT_SLICES,
+    PRE_PROCESSOR_APPLICANT_PATH,
     PRE_PROCESSOR_BANK_PATH,
+    PRE_PROCESSOR_CREDIT_BUREAU_PATH,
     CLIENT_TYPES,
+    APPLICANT_COLUMNS,
     BANK_COLUMNS,
+    CREDIT_BUREAU_COLUMNS,
     YEARS_EMPLOYED_BINS,
     YEARS_EMPLOYED_BIN_NAME_TO_INDEX,
 )
 # Load pre-processor instances
 with (
+    PRE_PROCESSOR_APPLICANT_PATH.open('rb') as file_applicant,
     PRE_PROCESSOR_BANK_PATH.open('rb') as file_bank,
+    PRE_PROCESSOR_CREDIT_BUREAU_PATH.open('rb') as file_credit_bureau,
 ):
+    PRE_PROCESSOR_APPLICANT = pickle.load(file_applicant)
     PRE_PROCESSOR_BANK = pickle.load(file_bank)
+    PRE_PROCESSOR_CREDIT_BUREAU = pickle.load(file_credit_bureau)
 def shorten_bytes_object(bytes_object, limit=500):
         name (str): The desired file name (either 'evaluation_key', 'encrypted_inputs' or
             'encrypted_outputs').
         client_id (int): The client ID to consider.
+        client_type (Optional[str]): The type of client to consider (either 'applicant', 'bank',
+            'credit_bureau' or None). Default to None, which is used for evaluation key and output.
     Returns:
         pathlib.Path: The file path.
     Args:
         client_id (int): The client ID to consider.
+        client_type (Optional[str]): The type of client to consider (either 'applicant', 'bank',
+            'credit_bureau' or None).
         file_name (str): File name to send (either 'evaluation_key' or 'encrypted_inputs').
     """
     # Get the paths to the encrypted inputs
     Args:
         client_id (str): The current client ID to consider.
         inputs (numpy.ndarray): The inputs to encrypt.
+        client_type (str): The type of client to consider (either 'applicant', 'bank' or
+            'credit_bureau').
     Returns:
         encrypted_inputs_short (str): A short representation of the encrypted input to send in hex.
     return encrypted_inputs_short
+def pre_process_encrypt_send_applicant(client_id, *inputs):
+    """Pre-process, encrypt and send the applicant inputs for a specific client to the server.
     Args:
         client_id (str): The current client ID to consider.
     own_property = "Property" in bool_inputs
     mobile_phone = "Mobile phone" in bool_inputs
+    applicant_inputs = pandas.DataFrame({
         "Own_car": [own_car],
         "Own_property": [own_property],
         "Mobile_phone": [mobile_phone],
         "Housing_type": [housing_type],
     })
+    applicant_inputs = applicant_inputs.reindex(APPLICANT_COLUMNS, axis=1)
+    preprocessed_applicant_inputs = PRE_PROCESSOR_APPLICANT.transform(applicant_inputs)
+    return _encrypt_send(client_id, preprocessed_applicant_inputs, "applicant")
 def pre_process_encrypt_send_bank(client_id, *inputs):
     return _encrypt_send(client_id, preprocessed_bank_inputs, "bank")
+def pre_process_encrypt_send_credit_bureau(client_id, *inputs):
+    """Pre-process, encrypt and send the credit bureau inputs for a specific client to the server.
     Args:
         client_id (str): The current client ID to consider.
     years_employed = YEARS_EMPLOYED_BIN_NAME_TO_INDEX[years_employed_bin]
     is_employed = employed == "Yes"
+    credit_bureau_inputs = pandas.DataFrame({
         "Years_employed": [years_employed],
         "Employed": [is_employed],
     })
+    credit_bureau_inputs = credit_bureau_inputs.reindex(CREDIT_BUREAU_COLUMNS, axis=1)
+    preprocessed_credit_bureau_inputs = PRE_PROCESSOR_CREDIT_BUREAU.transform(credit_bureau_inputs)
+    return _encrypt_send(client_id, preprocessed_credit_bureau_inputs, "credit_bureau")
 def run_fhe(client_id):
             "Explaining the prediction can only be done if the credit card is likely to be denied."
         )
+    # Retrieve the credit bureau inputs
     years_employed, employed = inputs
     # Years_employed is divided into several ordered bins. Here, we retrieve the index representing
     # If the bin is not the last (representing the most years of employment), we run the model in
     # FHE for each bins "older" or equal to the given bin, in order. Then, we retrieve the first
+    # bin that changes the model's prediction to "approval" and display it to the applicant.
     if bin_index != len(YEARS_EMPLOYED_BINS) - 1:
         # Loop over the bins starting with "older" or equal to the given bin
         for years_employed_bin in YEARS_EMPLOYED_BINS[bin_index:]:
             # Send the new encrypted input
+            pre_process_encrypt_send_credit_bureau(client_id, years_employed_bin, employed)
             # Run the model in FHE
             run_fhe(client_id)
             # Retrieve the new prediction
             output_prediction = get_output_and_decrypt(client_id)
+            # If the bin made the model predict an approval, share it to the applicant
             if "approved" in output_prediction[0]:
+                # If the approval was made using the given input, that means the applicant most
+                # likely tried the bin suggested in a previous explainability run. In that case, we
                 # confirm that the credit card is likely to be approved
                 if years_employed_bin == years_employed:
                     return APPROVED_MESSAGE
+                # Else, that means the applicant is looking for some explainability. We therefore
                 # suggest to try the obtained bin
                 return (
                     DENIED_MESSAGE + f" However, having at least {years_employed_bin} years of "
             "bigger impact in this particular case."
         )
+    # In case the applicant tried the "oldest" bin (but still got denied), explain why
     return (
         DENIED_MESSAGE + " Unfortunately, you already have the maximum amount of years of "
         f"employment ({years_employed} years). Other inputs like the income or the account's age "

deployment_files/model/{pre_processor_user.pkl → pre_processor_applicant.pkl} RENAMED Viewed

File without changes

deployment_files/model/{pre_processor_cs_agency.pkl → pre_processor_credit_bureau.pkl} RENAMED Viewed

File without changes

deployment_files/{pre_processor_user.pkl → pre_processor_applicant.pkl} RENAMED Viewed

File without changes

deployment_files/{pre_processor_cs_agency.pkl → pre_processor_credit_bureau.pkl} RENAMED Viewed

File without changes

development.py CHANGED Viewed

@@ -9,12 +9,12 @@ from settings import (
     DEPLOYMENT_PATH,
     DATA_PATH,
     INPUT_SLICES,
-    PRE_PROCESSOR_USER_PATH,
     PRE_PROCESSOR_BANK_PATH,
-    PRE_PROCESSOR_CS_AGENCY_PATH,
-    USER_COLUMNS,
     BANK_COLUMNS,
-    CS_AGENCY_COLUMNS,
 )
 from utils.client_server_interface import MultiInputsFHEModelDev
 from utils.model import MultiInputDecisionTreeClassifier, MultiInputDecisionTreeRegressor
@@ -31,9 +31,9 @@ def get_multi_inputs(data):
         (Tuple[numpy.ndarray]): The inputs for all three parties.
     """
     return (
-        data[:, INPUT_SLICES["user"]],
         data[:, INPUT_SLICES["bank"]],
-        data[:, INPUT_SLICES["cs_agency"]]
     )
@@ -47,18 +47,18 @@ data_x = data.copy()
 data_y = data_x.pop("Target").copy().to_frame()
 # Get data from all parties
-data_user = data_x[USER_COLUMNS].copy()
 data_bank = data_x[BANK_COLUMNS].copy()
-data_cs_agency = data_x[CS_AGENCY_COLUMNS].copy()
 # Feature engineer the data
-pre_processor_user, pre_processor_bank, pre_processor_cs_agency = get_pre_processors()
-preprocessed_data_user = pre_processor_user.fit_transform(data_user)
 preprocessed_data_bank = pre_processor_bank.fit_transform(data_bank)
-preprocessed_data_cs_agency = pre_processor_cs_agency.fit_transform(data_cs_agency)
-preprocessed_data_x = numpy.concatenate((preprocessed_data_user, preprocessed_data_bank, preprocessed_data_cs_agency), axis=1)
 print("\nTrain and compile the model")
@@ -83,12 +83,12 @@ fhe_model_dev.save(via_mlir=True)
 # Save pre-processors
 with (
-    PRE_PROCESSOR_USER_PATH.open('wb') as file_user,
     PRE_PROCESSOR_BANK_PATH.open('wb') as file_bank,
-    PRE_PROCESSOR_CS_AGENCY_PATH.open('wb') as file_cs_agency,
 ):
-    pickle.dump(pre_processor_user, file_user)
     pickle.dump(pre_processor_bank, file_bank)
-    pickle.dump(pre_processor_cs_agency, file_cs_agency)
 print("\nDone !")

     DEPLOYMENT_PATH,
     DATA_PATH,
     INPUT_SLICES,
+    PRE_PROCESSOR_APPLICANT_PATH,
     PRE_PROCESSOR_BANK_PATH,
+    PRE_PROCESSOR_CREDIT_BUREAU_PATH,
+    APPLICANT_COLUMNS,
     BANK_COLUMNS,
+    CREDIT_BUREAU_COLUMNS,
 )
 from utils.client_server_interface import MultiInputsFHEModelDev
 from utils.model import MultiInputDecisionTreeClassifier, MultiInputDecisionTreeRegressor
         (Tuple[numpy.ndarray]): The inputs for all three parties.
     """
     return (
+        data[:, INPUT_SLICES["applicant"]],
         data[:, INPUT_SLICES["bank"]],
+        data[:, INPUT_SLICES["credit_bureau"]]
     )
 data_y = data_x.pop("Target").copy().to_frame()
 # Get data from all parties
+data_applicant = data_x[APPLICANT_COLUMNS].copy()
 data_bank = data_x[BANK_COLUMNS].copy()
+data_credit_bureau = data_x[CREDIT_BUREAU_COLUMNS].copy()
 # Feature engineer the data
+pre_processor_applicant, pre_processor_bank, pre_processor_credit_bureau = get_pre_processors()
+preprocessed_data_applicant = pre_processor_applicant.fit_transform(data_applicant)
 preprocessed_data_bank = pre_processor_bank.fit_transform(data_bank)
+preprocessed_data_credit_bureau = pre_processor_credit_bureau.fit_transform(data_credit_bureau)
+preprocessed_data_x = numpy.concatenate((preprocessed_data_applicant, preprocessed_data_bank, preprocessed_data_credit_bureau), axis=1)
 print("\nTrain and compile the model")
 # Save pre-processors
 with (
+    PRE_PROCESSOR_APPLICANT_PATH.open('wb') as file_applicant,
     PRE_PROCESSOR_BANK_PATH.open('wb') as file_bank,
+    PRE_PROCESSOR_CREDIT_BUREAU_PATH.open('wb') as file_credit_bureau,
 ):
+    pickle.dump(pre_processor_applicant, file_applicant)
     pickle.dump(pre_processor_bank, file_bank)
+    pickle.dump(pre_processor_credit_bureau, file_credit_bureau)
 print("\nDone !")

server.py CHANGED Viewed

@@ -19,8 +19,8 @@ def _get_server_file_path(name, client_id, client_type=None):
         name (str): The desired file name (either 'evaluation_key', 'encrypted_inputs' or
             'encrypted_outputs').
         client_id (int): The client ID to consider.
-        client_type (Optional[str]): The type of user to consider (either 'user', 'bank',
-            'cs_agency' or None). Default to None, which is used for evaluation key and output.
     Returns:
         pathlib.Path: The file path.

         name (str): The desired file name (either 'evaluation_key', 'encrypted_inputs' or
             'encrypted_outputs').
         client_id (int): The client ID to consider.
+        client_type (Optional[str]): The type of client to consider (either 'applicant', 'bank',
+            'credit_bureau' or None). Default to None, which is used for evaluation key and output.
     Returns:
         pathlib.Path: The file path.

settings.py CHANGED Viewed

@@ -16,9 +16,9 @@ SERVER_FILES = REPO_DIR / "server_files"
 DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model"
 # Path targeting pre-processor saved files
-PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
 PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
-PRE_PROCESSOR_CS_AGENCY_PATH = DEPLOYMENT_PATH / 'pre_processor_cs_agency.pkl'
 # Create the necessary directories
 FHE_KEYS.mkdir(exist_ok=True)
@@ -34,26 +34,26 @@ DATA_PATH = "data/data.csv"
 # Development settings
 PROCESSED_INPUT_SHAPE = (1, 39)
-CLIENT_TYPES = ["user", "bank", "cs_agency"]
 INPUT_INDEXES = {
-    "user": 0,
     "bank": 1,
-    "cs_agency": 2,
 }
 INPUT_SLICES = {
-    "user": slice(0, 36),  # First position: start from 0
-    "bank": slice(36, 37),  # Second position: start from n_feature_user
-    "cs_agency": slice(37, 39),  # Third position: start from n_feature_user + n_feature_bank
 }
 # Fix column order for pre-processing steps
-USER_COLUMNS = [
     'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size',
     'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
     'Occupation_type',
 ]
 BANK_COLUMNS = ["Account_age"]
-CS_AGENCY_COLUMNS = ["Years_employed", "Employed"]
 _data = pandas.read_csv(DATA_PATH, encoding="utf-8")

 DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model"
 # Path targeting pre-processor saved files
+PRE_PROCESSOR_APPLICANT_PATH = DEPLOYMENT_PATH / 'pre_processor_applicant.pkl'
 PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
+PRE_PROCESSOR_CREDIT_BUREAU_PATH = DEPLOYMENT_PATH / 'pre_processor_credit_bureau.pkl'
 # Create the necessary directories
 FHE_KEYS.mkdir(exist_ok=True)
 # Development settings
 PROCESSED_INPUT_SHAPE = (1, 39)
+CLIENT_TYPES = ["applicant", "bank", "credit_bureau"]
 INPUT_INDEXES = {
+    "applicant": 0,
     "bank": 1,
+    "credit_bureau": 2,
 }
 INPUT_SLICES = {
+    "applicant": slice(0, 36),  # First position: start from 0
+    "bank": slice(36, 37),  # Second position: start from n_feature_applicant
+    "credit_bureau": slice(37, 39),  # Third position: start from n_feature_applicant + n_feature_bank
 }
 # Fix column order for pre-processing steps
+APPLICANT_COLUMNS = [
     'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size',
     'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
     'Occupation_type',
 ]
 BANK_COLUMNS = ["Account_age"]
+CREDIT_BUREAU_COLUMNS = ["Years_employed", "Employed"]
 _data = pandas.read_csv(DATA_PATH, encoding="utf-8")

utils/client_server_interface.py CHANGED Viewed

@@ -46,8 +46,8 @@ class MultiInputsFHEModelClient(FHEModelClient):
         Args:
             x (numpy.ndarray): The input to consider. Here, the input should only represent a
                 single party.
-            input_index (int): The index representing the type of model (0: "user", 1: "bank",
-                2: "cs_agency")
             processed_input_shape (Tuple[int]): The total input shape (all parties combined) after
                 pre-processing.
             input_slice (slice): The slices to consider for the given party.

         Args:
             x (numpy.ndarray): The input to consider. Here, the input should only represent a
                 single party.
+            input_index (int): The index representing the type of model (0: "applicant", 1: "bank",
+                2: "credit_bureau")
             processed_input_shape (Tuple[int]): The total input shape (all parties combined) after
                 pre-processing.
             input_slice (slice): The slices to consider for the given party.

utils/pre_processing.py CHANGED Viewed

@@ -22,7 +22,7 @@ def _replace_values_eq(column, value):
     return column
 def get_pre_processors():
-    pre_processor_user = ColumnTransformer(
         transformers=[
             (
                 "replace_occupation_type_labor",
@@ -55,10 +55,10 @@ def get_pre_processors():
         verbose_feature_names_out=False,
     )
-    pre_processor_cs_agency = ColumnTransformer(
         transformers=[],
         remainder='passthrough',
         verbose_feature_names_out=False,
     )
-    return pre_processor_user, pre_processor_bank, pre_processor_cs_agency

     return column
 def get_pre_processors():
+    pre_processor_applicant = ColumnTransformer(
         transformers=[
             (
                 "replace_occupation_type_labor",
         verbose_feature_names_out=False,
     )
+    pre_processor_credit_bureau = ColumnTransformer(
         transformers=[],
         remainder='passthrough',
         verbose_feature_names_out=False,
     )
+    return pre_processor_applicant, pre_processor_bank, pre_processor_credit_bureau