Spaces:

zama-fhe
/

encrypted_credit_scoring

Running

App Files Files Community

romanbredehoft-zama commited on Nov 22, 2023

Commit

9a997e4

•

1 Parent(s): 4337a72

First working demo with multi-inputs XGB

Browse files

Files changed (15) hide show

app.py +96 -365
backend.py +411 -0
data/clean_data.csv +0 -0
deployment_files/client.zip +2 -2
deployment_files/pre_processor_third_party.pkl +3 -0
deployment_files/pre_processor_user.pkl +3 -0
deployment_files/server.zip +2 -2
development.py +97 -0
development/development.py +0 -67
development/pre_processing.py +0 -122
server.py +29 -20
settings.py +41 -5
{development → utils}/client_server_interface.py +8 -7
{development → utils}/model.py +43 -0
utils/pre_processing.py +85 -0

app.py CHANGED Viewed

@@ -1,318 +1,39 @@
-"""A local gradio app that filters images using FHE."""
-import os
-import shutil
 import subprocess
 import time
 import gradio as gr
-import numpy
-import requests
-from itertools import chain
 from settings import (
     REPO_DIR,
-    SERVER_URL,
-    FHE_KEYS,
-    CLIENT_FILES,
-    SERVER_FILES,
-    DEPLOYMENT_PATH,
-    INITIAL_INPUT_SHAPE,
-    INPUT_INDEXES,
-    START_POSITIONS,
 )
-from development.client_server_interface import MultiInputsFHEModelClient
 subprocess.Popen(["uvicorn", "server:app"], cwd=REPO_DIR)
 time.sleep(3)
-def shorten_bytes_object(bytes_object, limit=500):
-    """Shorten the input bytes object to a given length.
-    Encrypted data is too large for displaying it in the browser using Gradio. This function
-    provides a shorten representation of it.
-    Args:
-        bytes_object (bytes): The input to shorten
-        limit (int): The length to consider. Default to 500.
-    Returns:
-        str: Hexadecimal string shorten representation of the input byte object.
-    """
-    # Define a shift for better display
-    shift = 100
-    return bytes_object[shift : limit + shift].hex()
-def get_client(client_id, client_type):
-    """Get the client API.
-    Args:
-        client_id (int): The client ID to consider.
-        client_type (str): The type of user to consider (either 'user', 'bank' or 'third_party').
-    Returns:
-        FHEModelClient: The client API.
-    """
-    key_dir = FHE_KEYS / f"{client_type}_{client_id}"
-    return MultiInputsFHEModelClient(DEPLOYMENT_PATH, key_dir=key_dir)
-def get_client_file_path(name, client_id, client_type):
-    """Get the correct temporary file path for the client.
-    Args:
-        name (str): The desired file name (either 'evaluation_key' or 'encrypted_inputs').
-        client_id (int): The client ID to consider.
-        client_type (str): The type of user to consider (either 'user', 'bank' or 'third_party').
-    Returns:
-        pathlib.Path: The file path.
-    """
-    return CLIENT_FILES / f"{name}_{client_type}_{client_id}"
-def clean_temporary_files(n_keys=20):
-    """Clean keys and encrypted images.
-    A maximum of n_keys keys and associated temporary files are allowed to be stored. Once this
-    limit is reached, the oldest files are deleted.
-    Args:
-        n_keys (int): The maximum number of keys and associated files to be stored. Default to 20.
-    """
-    # Get the oldest key files in the key directory
-    key_dirs = sorted(FHE_KEYS.iterdir(), key=os.path.getmtime)
-    # If more than n_keys keys are found, remove the oldest
-    user_ids = []
-    if len(key_dirs) > n_keys:
-        n_keys_to_delete = len(key_dirs) - n_keys
-        for key_dir in key_dirs[:n_keys_to_delete]:
-            user_ids.append(key_dir.name)
-            shutil.rmtree(key_dir)
-    # Get all the encrypted objects in the temporary folder
-    client_files = CLIENT_FILES.iterdir()
-    server_files = SERVER_FILES.iterdir()
-    # Delete all files related to the ids whose keys were deleted
-    for file in chain(client_files, server_files):
-        for user_id in user_ids:
-            if user_id in file.name:
-                file.unlink()
-def keygen(client_id, client_type):
-    """Generate the private key associated to a filter.
-    Args:
-        client_id (int): The client ID to consider.
-        client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
-    """
-    # Clean temporary files
-    clean_temporary_files()
-    # Retrieve the client instance
-    client = get_client(client_id, client_type)
-    # Generate a private key
-    client.generate_private_and_evaluation_keys(force=True)
-    # Retrieve the serialized evaluation key. In this case, as circuits are fully leveled, this
-    # evaluation key is empty. However, for software reasons, it is still needed for proper FHE
-    # execution
-    evaluation_key = client.get_serialized_evaluation_keys()
-    # Save evaluation_key as bytes in a file as it is too large to pass through regular Gradio
-    # buttons (see https://github.com/gradio-app/gradio/issues/1877)
-    evaluation_key_path = get_client_file_path("evaluation_key", client_id, client_type)
-    with evaluation_key_path.open("wb") as evaluation_key_file:
-        evaluation_key_file.write(evaluation_key)
-def send_input(client_id, client_type):
-    """Send the encrypted input image as well as the evaluation key to the server.
-    Args:
-        client_id (int): The client ID to consider.
-        client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
-    """
-    # Get the paths to the evaluation key and encrypted inputs
-    evaluation_key_path = get_client_file_path("evaluation_key", client_id, client_type)
-    encrypted_input_path = get_client_file_path("encrypted_inputs", client_id, client_type)
-    # Define the data and files to post
-    data = {
-        "client_id": client_id,
-        "client_type": client_type,
-    }
-    files = [
-        ("files", open(encrypted_input_path, "rb")),
-        ("files", open(evaluation_key_path, "rb")),
-    ]
-    # Send the encrypted input image and evaluation key to the server
-    url = SERVER_URL + "send_input"
-    with requests.post(
-        url=url,
-        data=data,
-        files=files,
-    ) as response:
-        return response.ok
-def keygen_encrypt_send(inputs, client_type):
-    """Encrypt the given inputs for a specific client.
-    Args:
-        inputs (numpy.ndarray): The inputs to encrypt.
-        client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
-    Returns:
-    """
-    # Create an ID for the current client to consider
-    client_id = numpy.random.randint(0, 2**32)
-    keygen(client_id, client_type)
-    # Retrieve the client instance
-    client = get_client(client_id, client_type)
-    # TODO : pre-process the data first
-    # Quantize, encrypt and serialize the inputs
-    encrypted_inputs = client.quantize_encrypt_serialize_multi_inputs(
-        inputs,
-        input_index=INPUT_INDEXES[client_type],
-        initial_input_shape=INITIAL_INPUT_SHAPE,
-        start_position=START_POSITIONS[client_type],
-    )
-    # Save encrypted_inputs to bytes in a file, since too large to pass through regular Gradio
-    # buttons, https://github.com/gradio-app/gradio/issues/1877
-    encrypted_inputs_path = get_client_file_path("encrypted_inputs", client_id, client_type)
-    with encrypted_inputs_path.open("wb") as encrypted_inputs_file:
-        encrypted_inputs_file.write(encrypted_inputs)
-    # Create a truncated version of the encrypted image for display
-    encrypted_inputs_short = shorten_bytes_object(encrypted_inputs)
-    send_input(client_id, client_type)
-    # TODO: also return private key representation if possible
-    return encrypted_inputs_short
-def run_fhe(client_id):
-    """Run the model on the encrypted inputs previously sent using FHE.
-    Args:
-        client_id (int): The client ID to consider.
-    """
-    # TODO : add a warning for users to send all client types' inputs
-    data = {
-        "client_id": client_id,
-    }
-    # Trigger the FHE execution on the encrypted inputs previously sent
-    url = SERVER_URL + "run_fhe"
-    with requests.post(
-        url=url,
-        data=data,
-    ) as response:
-        if response.ok:
-            return response.json()
-        else:
-            raise gr.Error("Please wait for the inputs to be sent to the server.")
-def get_output(client_id):
-    """Retrieve the encrypted output.
-    Args:
-        client_id (int): The client ID to consider.
-    Returns:
-        output_encrypted_representation (numpy.ndarray): A representation of the encrypted output.
-    """
-    data = {
-        "client_id": client_id,
-    }
-    # Retrieve the encrypted output image
-    url = SERVER_URL + "get_output"
-    with requests.post(
-        url=url,
-        data=data,
-    ) as response:
-        if response.ok:
-            encrypted_output = response.content
-            # Save the encrypted output to bytes in a file as it is too large to pass through regular
-            # Gradio buttons (see https://github.com/gradio-app/gradio/issues/1877)
-            # TODO : check if output to user is relevant
-            encrypted_output_path = get_client_file_path("encrypted_output", client_id, "user")
-            with encrypted_output_path.open("wb") as encrypted_output_file:
-                encrypted_output_file.write(encrypted_output)
-            # TODO
-            # Decrypt the output using a different (wrong) key for display
-            # output_encrypted_representation = decrypt_output_with_wrong_key(encrypted_output, client_type)
-            # return output_encrypted_representation
-            return None
-        else:
-            raise gr.Error("Please wait for the FHE execution to be completed.")
-def decrypt_output(client_id, client_type):
-    """Decrypt the result.
-    Args:
-        client_id (int): The client ID to consider.
-        client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
-    Returns:
-        output(numpy.ndarray): The decrypted output
-    """
-    # Get the encrypted output path
-    encrypted_output_path = get_client_file_path("encrypted_output", client_id, client_type)
-    if not encrypted_output_path.is_file():
-        raise gr.Error("Please run the FHE execution first.")
-    # Load the encrypted output as bytes
-    with encrypted_output_path.open("rb") as encrypted_output_file:
-        encrypted_output_proba = encrypted_output_file.read()
-    # Retrieve the client API
-    client = get_client(client_id, client_type)
-    # Deserialize, decrypt and post-process the encrypted output
-    output_proba = client.deserialize_decrypt_post_process(encrypted_output_proba)
-    # Determine the predicted class
-    output = numpy.argmax(output_proba, axis=1)
-    return output
 demo = gr.Blocks()
@@ -330,60 +51,68 @@ with demo:
     with gr.Row():
         with gr.Column():
             gr.Markdown("### User")
-            # TODO : change infos
-            choice_1 = gr.Dropdown(choices=["Yes, No"], label="Choose", interactive=True)
-            slide_1 = gr.Slider(2, 20, value=4, label="Count", info="Choose between 2 and 20")
         with gr.Column():
             gr.Markdown("### Bank ")
-            # TODO : change infos
-            checkbox_1 = gr.CheckboxGroup(["USA", "Japan", "Pakistan"], label="Countries", info="Where are they from?")
         with gr.Column():
-            gr.Markdown("### Third Party ")
-            # TODO : change infos
-            radio_1 = gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?")
     gr.Markdown("### Step 2: Keygen, encrypt  using FHE and send the inputs to the server.")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### User")
             encrypt_button_user = gr.Button("Encrypt the inputs and send to server.")
-            keys_user = gr.Textbox(
-                label="Keys representation:", max_lines=2, interactive=False
-            )
             encrypted_input_user = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
-            user_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
         with gr.Column():
             gr.Markdown("### Bank ")
             encrypt_button_bank = gr.Button("Encrypt the inputs and send to server.")
-            keys_bank = gr.Textbox(
-                label="Keys representation:", max_lines=2, interactive=False
-            )
             encrypted_input_bank = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
-            bank_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
         with gr.Column():
             gr.Markdown("### Third Party ")
             encrypt_button_third_party = gr.Button("Encrypt the inputs and send to server.")
-            keys_3 = gr.Textbox(
-                label="Keys representation:", max_lines=2, interactive=False
-            )
-            encrypted_input__third_party = gr.Textbox(
-                label="Encrypted input representation:", max_lines=2, interactive=False
-            )
             third_party_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
     gr.Markdown("## Server side")
     gr.Markdown(
@@ -412,9 +141,9 @@ with demo:
     )
     get_output_button = gr.Button("Receive the encrypted output from the server.")
-    encrypted_output_representation = gr.Textbox(
-        label="Encrypted output representation: ", max_lines=1, interactive=False
-    )
     gr.Markdown("### Step 8: Decrypt the output.")
     decrypt_button = gr.Button("Decrypt the output")
@@ -423,48 +152,50 @@ with demo:
         label="Credit card approval decision: ", max_lines=1, interactive=False
     )
-    # Button to encrypt inputs on the client side
-    # encrypt_button_user.click(
-    #     encrypt,
-    #     inputs=[user_id, input_image, filter_name],
-    #     outputs=[original_image, encrypted_input],
-    # )
-    # # Button to encrypt inputs on the client side
-    # encrypt_button_bank.click(
-    #     encrypt,
-    #     inputs=[user_id, input_image, filter_name],
-    #     outputs=[original_image, encrypted_input],
-    # )
-    # # Button to encrypt inputs on the client side
-    # encrypt_button_third_party.click(
-    #     encrypt,
-    #     inputs=[user_id, input_image, filter_name],
-    #     outputs=[original_image, encrypted_input],
-    # )
-    # # Button to send the encodings to the server using post method
-    # send_input_button.click(
-    #     send_input, inputs=[user_id, filter_name], outputs=[send_input_checkbox]
-    # )
-    # # Button to send the encodings to the server using post method
-    # execute_fhe_button.click(run_fhe, inputs=[user_id, filter_name], outputs=[fhe_execution_time])
-    # # Button to send the encodings to the server using post method
-    # get_output_button.click(
-    #     get_output,
-    #     inputs=[user_id, filter_name],
-    #     outputs=[encrypted_output_representation]
-    # )
-    # # Button to decrypt the output on the client side
-    # decrypt_button.click(
-    #     decrypt_output,
-    #     inputs=[user_id, filter_name],
-    #     outputs=[output_image, keygen_checkbox, send_input_checkbox],
-    # )
     gr.Markdown(
         "The app was built with [Concrete-ML](https://github.com/zama-ai/concrete-ml), a "

+"""A gradio app for credit card approval prediction using FHE."""
 import subprocess
 import time
 import gradio as gr
 from settings import (
     REPO_DIR,
+    ACCOUNT_MIN_MAX,
+    CHILDREN_MIN_MAX,
+    INCOME_MIN_MAX,
+    AGE_MIN_MAX,
+    EMPLOYED_MIN_MAX,
+    FAMILY_MIN_MAX,
+    INCOME_TYPES,
+    OCCUPATION_TYPES,
+    HOUSING_TYPES,
+    EDUCATION_TYPES,
+    FAMILY_STATUS,
+)
+from backend import (
+    shorten_bytes_object,
+    clean_temporary_files,
+    pre_process_keygen_encrypt_send_user,
+    pre_process_keygen_encrypt_send_bank,
+    pre_process_keygen_encrypt_send_third_party,
+    run_fhe,
+    get_output,
+    decrypt_output,
 )
 subprocess.Popen(["uvicorn", "server:app"], cwd=REPO_DIR)
 time.sleep(3)
 demo = gr.Blocks()
     with gr.Row():
         with gr.Column():
             gr.Markdown("### User")
+            gender = gr.Radio(["Female", "Male"], label="Gender")
+            bool_inputs = gr.CheckboxGroup(["Car", "Property", "Work phone", "Phone", "Email"], label="What do you own ?")
+            num_children = gr.Slider(**CHILDREN_MIN_MAX, step=1, label="Number of children", info="How many children do you have (0 to 19) ?")
+            num_family = gr.Slider(**FAMILY_MIN_MAX, step=1, label="Family", info="How many members does your family have? (1 to 20) ?")
+            total_income = gr.Slider(**INCOME_MIN_MAX, label="Income", info="What's you total yearly income (in euros, 3780 to 220500) ?")
+            age = gr.Slider(**AGE_MIN_MAX, step=1, label="Age", info="How old are you (20 to 68) ?")
+            income_type = gr.Dropdown(choices=INCOME_TYPES, label="Income type", info="What is your main type of income ?")
+            education_type = gr.Dropdown(choices=EDUCATION_TYPES, label="Education", info="What is your education background ?")
+            family_status = gr.Dropdown(choices=FAMILY_STATUS, label="Family", info="What is your family status ?")
+            occupation_type = gr.Dropdown(choices=OCCUPATION_TYPES, label="Occupation", info="What is your main occupation ?")
+            housing_type = gr.Dropdown(choices=HOUSING_TYPES, label="Housing", info="In what type of housing do you live ?")
         with gr.Column():
             gr.Markdown("### Bank ")
+            account_length = gr.Slider(**ACCOUNT_MIN_MAX, step=1, label="Account length", info="How long have this person had this account (in months, 0 to 60) ?")
         with gr.Column():
+            gr.Markdown("### Third party ")
+            employed = gr.Radio(["Yes", "No"], label="Is the person employed ?")
+            years_employed = gr.Slider(**EMPLOYED_MIN_MAX, step=1, label="Years of employment", info="How long have this person been employed (in years, 0 to 43) ?")
     gr.Markdown("### Step 2: Keygen, encrypt  using FHE and send the inputs to the server.")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### User")
             encrypt_button_user = gr.Button("Encrypt the inputs and send to server.")
+            user_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
             encrypted_input_user = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
+            # keys_user = gr.Textbox(
+            #     label="Keys representation:", max_lines=2, interactive=False
+            # )
         with gr.Column():
             gr.Markdown("### Bank ")
             encrypt_button_bank = gr.Button("Encrypt the inputs and send to server.")
+            bank_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
             encrypted_input_bank = gr.Textbox(
                 label="Encrypted input representation:", max_lines=2, interactive=False
             )
+            # keys_bank = gr.Textbox(
+            #     label="Keys representation:", max_lines=2, interactive=False
+            # )
         with gr.Column():
             gr.Markdown("### Third Party ")
             encrypt_button_third_party = gr.Button("Encrypt the inputs and send to server.")
             third_party_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
+            encrypted_input_third_party = gr.Textbox(
+                label="Encrypted input representation:", max_lines=2, interactive=False
+            )
+            # keys_3 = gr.Textbox(
+            #     label="Keys representation:", max_lines=2, interactive=False
+            # )
     gr.Markdown("## Server side")
     gr.Markdown(
     )
     get_output_button = gr.Button("Receive the encrypted output from the server.")
+    # encrypted_output_representation = gr.Textbox(
+    #     label="Encrypted output representation: ", max_lines=1, interactive=False
+    # )
     gr.Markdown("### Step 8: Decrypt the output.")
     decrypt_button = gr.Button("Decrypt the output")
         label="Credit card approval decision: ", max_lines=1, interactive=False
     )
+    # Button to pre-process, generate the key, encrypt and send the user inputs from the client
+    # side to the server
+    encrypt_button_user.click(
+        pre_process_keygen_encrypt_send_user,
+        inputs=[gender, bool_inputs, num_children, num_family, total_income, age, income_type, \
+                education_type, family_status, occupation_type, housing_type],
+        outputs=[user_id, encrypted_input_user],
+    )
+    # Button to pre-process, generate the key, encrypt and send the bank inputs from the client
+    # side to the server
+    encrypt_button_bank.click(
+        pre_process_keygen_encrypt_send_bank,
+        inputs=[account_length],
+        outputs=[bank_id, encrypted_input_bank],
+    )
+    # Button to pre-process, generate the key, encrypt and send the third party inputs from the
+    # client side to the server
+    encrypt_button_third_party.click(
+        pre_process_keygen_encrypt_send_third_party,
+        inputs=[employed, years_employed],
+        outputs=[third_party_id, encrypted_input_third_party],
+    )
+    # TODO : ID should be unique
+    # Button to send the encodings to the server using post method
+    execute_fhe_button.click(run_fhe, inputs=[user_id, bank_id, third_party_id], outputs=[fhe_execution_time])
+    # TODO : ID should be unique
+    # Button to send the encodings to the server using post method
+    get_output_button.click(
+        get_output,
+        inputs=[user_id, bank_id, third_party_id],
+        # outputs=[encrypted_output_representation]
+    )
+    # TODO : ID should be unique
+    # Button to decrypt the output as the user
+    decrypt_button.click(
+        decrypt_output,
+        inputs=[user_id, bank_id, third_party_id],
+        outputs=[prediction_output],
+    )
     gr.Markdown(
         "The app was built with [Concrete-ML](https://github.com/zama-ai/concrete-ml), a "

backend.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""Backend functions used in the app."""
+import os
+import shutil
+import gradio as gr
+import numpy
+import requests
+import pickle
+import pandas
+from itertools import chain
+from settings import (
+    SERVER_URL,
+    FHE_KEYS,
+    CLIENT_FILES,
+    SERVER_FILES,
+    DEPLOYMENT_PATH,
+    INITIAL_INPUT_SHAPE,
+    INPUT_INDEXES,
+    INPUT_SLICES,
+    PRE_PROCESSOR_USER_PATH,
+    PRE_PROCESSOR_THIRD_PARTY_PATH,
+    CLIENT_TYPES,
+)
+from utils.client_server_interface import MultiInputsFHEModelClient
+# Load pre-processor instances
+with PRE_PROCESSOR_USER_PATH.open('rb') as file:
+    PRE_PROCESSOR_USER = pickle.load(file)
+with PRE_PROCESSOR_THIRD_PARTY_PATH.open('rb') as file:
+    PRE_PROCESSOR_THIRD_PARTY = pickle.load(file)
+def shorten_bytes_object(bytes_object, limit=500):
+    """Shorten the input bytes object to a given length.
+    Encrypted data is too large for displaying it in the browser using Gradio. This function
+    provides a shorten representation of it.
+    Args:
+        bytes_object (bytes): The input to shorten
+        limit (int): The length to consider. Default to 500.
+    Returns:
+        str: Hexadecimal string shorten representation of the input byte object.
+    """
+    # Define a shift for better display
+    shift = 100
+    return bytes_object[shift : limit + shift].hex()
+def clean_temporary_files(n_keys=20):
+    """Clean keys and encrypted images.
+    A maximum of n_keys keys and associated temporary files are allowed to be stored. Once this
+    limit is reached, the oldest files are deleted.
+    Args:
+        n_keys (int): The maximum number of keys and associated files to be stored. Default to 20.
+    """
+    # Get the oldest key files in the key directory
+    key_dirs = sorted(FHE_KEYS.iterdir(), key=os.path.getmtime)
+    # If more than n_keys keys are found, remove the oldest
+    user_ids = []
+    if len(key_dirs) > n_keys:
+        n_keys_to_delete = len(key_dirs) - n_keys
+        for key_dir in key_dirs[:n_keys_to_delete]:
+            user_ids.append(key_dir.name)
+            shutil.rmtree(key_dir)
+    # Get all the encrypted objects in the temporary folder
+    client_files = CLIENT_FILES.iterdir()
+    server_files = SERVER_FILES.iterdir()
+    # Delete all files related to the ids whose keys were deleted
+    for file in chain(client_files, server_files):
+        for user_id in user_ids:
+            if user_id in file.name:
+                file.unlink()
+def _get_client(client_id, client_type):
+    """Get the client API.
+    Args:
+        client_id (int): The client ID to consider.
+        client_type (str): The type of user to consider (either 'user', 'bank' or 'third_party').
+    Returns:
+        FHEModelClient: The client API.
+    """
+    key_dir = FHE_KEYS / f"{client_type}_{client_id}"
+    return MultiInputsFHEModelClient(DEPLOYMENT_PATH, key_dir=key_dir, nb_inputs=len(CLIENT_TYPES))
+def _keygen(client_id, client_type):
+    """Generate the private key associated to a filter.
+    Args:
+        client_id (int): The client ID to consider.
+        client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
+    """
+    # Clean temporary files
+    clean_temporary_files()
+    # Retrieve the client instance
+    client = _get_client(client_id, client_type)
+    # Generate a private key
+    client.generate_private_and_evaluation_keys(force=True)
+    # Retrieve the serialized evaluation key. In this case, as circuits are fully leveled, this
+    # evaluation key is empty. However, for software reasons, it is still needed for proper FHE
+    # execution
+    evaluation_key = client.get_serialized_evaluation_keys()
+    # Save evaluation_key as bytes in a file as it is too large to pass through regular Gradio
+    # buttons (see https://github.com/gradio-app/gradio/issues/1877)
+    evaluation_key_path = _get_client_file_path("evaluation_key", client_id, client_type)
+    with evaluation_key_path.open("wb") as evaluation_key_file:
+        evaluation_key_file.write(evaluation_key)
+def _send_input(client_id, client_type):
+    """Send the encrypted input image as well as the evaluation key to the server.
+    Args:
+        client_id (int): The client ID to consider.
+        client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
+    """
+    # Get the paths to the evaluation key and encrypted inputs
+    evaluation_key_path = _get_client_file_path("evaluation_key", client_id, client_type)
+    encrypted_input_path = _get_client_file_path("encrypted_inputs", client_id, client_type)
+    # Define the data and files to post
+    data = {
+        "client_id": client_id,
+        "client_type": client_type,
+    }
+    files = [
+        ("files", open(encrypted_input_path, "rb")),
+        ("files", open(evaluation_key_path, "rb")),
+    ]
+    # Send the encrypted input image and evaluation key to the server
+    url = SERVER_URL + "send_input"
+    with requests.post(
+        url=url,
+        data=data,
+        files=files,
+    ) as response:
+        return response.ok
+def _get_client_file_path(name, client_id, client_type):
+    """Get the correct temporary file path for the client.
+    Args:
+        name (str): The desired file name (either 'evaluation_key' or 'encrypted_inputs').
+        client_id (int): The client ID to consider.
+        client_type (str): The type of user to consider (either 'user', 'bank' or 'third_party').
+    Returns:
+        pathlib.Path: The file path.
+    """
+    return CLIENT_FILES / f"{name}_{client_type}_{client_id}"
+def _keygen_encrypt_send(inputs, client_type):
+    """Encrypt the given inputs for a specific client.
+    Args:
+        inputs (numpy.ndarray): The inputs to encrypt.
+        client_type (str): The type of client to consider (either 'user', 'bank' or 'third_party').
+    Returns:
+        client_id, encrypted_inputs_short (int, bytes): Integer ID representing the current client
+            and a byte short representation of the encrypted input to send.
+    """
+    # Create an ID for the current client to consider
+    client_id = numpy.random.randint(0, 2**32)
+    _keygen(client_id, client_type)
+    # Retrieve the client instance
+    client = _get_client(client_id, client_type)
+    # TODO : pre-process the data first
+    # Quantize, encrypt and serialize the inputs
+    encrypted_inputs = client.quantize_encrypt_serialize_multi_inputs(
+        inputs,
+        input_index=INPUT_INDEXES[client_type],
+        initial_input_shape=INITIAL_INPUT_SHAPE,
+        input_slice=INPUT_SLICES[client_type],
+    )
+    # Save encrypted_inputs to bytes in a file, since too large to pass through regular Gradio
+    # buttons, https://github.com/gradio-app/gradio/issues/1877
+    encrypted_inputs_path = _get_client_file_path("encrypted_inputs", client_id, client_type)
+    with encrypted_inputs_path.open("wb") as encrypted_inputs_file:
+        encrypted_inputs_file.write(encrypted_inputs)
+    # Create a truncated version of the encrypted image for display
+    encrypted_inputs_short = shorten_bytes_object(encrypted_inputs)
+    _send_input(client_id, client_type)
+    # TODO: also return private key representation if possible
+    return client_id, encrypted_inputs_short
+def pre_process_keygen_encrypt_send_user(*inputs):
+    """Pre-process the given inputs for a specific client.
+    Args:
+        *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
+    Returns:
+        (int, bytes): Integer ID representing the current client and a byte short representation of
+            the encrypted input to send.
+    """
+    gender, bool_inputs, num_children, num_family, total_income, age, income_type, education_type, \
+        family_status, occupation_type, housing_type = inputs
+    # Encoding given in https://www.kaggle.com/code/samuelcortinhas/credit-cards-data-cleaning
+    # for "Gender" is M ('Male') -> 1 and F ('Female') -> 0
+    gender = gender == "Male"
+    # Retrieve boolean values
+    own_car = "Car" in bool_inputs
+    own_property = "Property" in bool_inputs
+    work_phone = "Work phone" in bool_inputs
+    phone = "Phone" in bool_inputs
+    email = "Email" in bool_inputs
+    user_inputs = pandas.DataFrame({
+        "Gender": [gender],
+        "Own_car": [own_car],
+        "Own_property": [own_property],
+        "Work_phone": [work_phone],
+        "Phone": [phone],
+        "Email": [email],
+        "Num_children": num_children,
+        "Num_family": num_family,
+        "Total_income": total_income,
+        "Age": age,
+        "Income_type": income_type,
+        "Education_type": education_type,
+        "Family_status": family_status,
+        "Occupation_type": occupation_type,
+        "Housing_type": housing_type,
+    })
+    preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
+    return _keygen_encrypt_send(preprocessed_user_inputs, "user")
+def pre_process_keygen_encrypt_send_bank(*inputs):
+    """Pre-process the given inputs for a specific client.
+    Args:
+        *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
+    Returns:
+        (int, bytes): Integer ID representing the current client and a byte short representation of
+            the encrypted input to send.
+    """
+    account_length = inputs[0]
+    return _keygen_encrypt_send(account_length, "bank")
+def pre_process_keygen_encrypt_send_third_party(*inputs):
+    """Pre-process the given inputs for a specific client.
+    Args:
+        *inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
+    Returns:
+        (int, bytes): Integer ID representing the current client and a byte short representation of
+            the encrypted input to send.
+    """
+    employed, years_employed = inputs
+    # Original dataset contains an "unemployed" feature instead of "employed"
+    unemployed = employed == "No"
+    third_party_inputs = pandas.DataFrame({
+        "Unemployed": [unemployed],
+        "Years_employed": [years_employed],
+    })
+    preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
+    return _keygen_encrypt_send(preprocessed_third_party_inputs, "third_party")
+def run_fhe(user_id, bank_id, third_party_id):
+    """Run the model on the encrypted inputs previously sent using FHE.
+    Args:
+        user_id (int): The user ID to consider.
+        bank_id (int): The bank ID to consider.
+        third_party_id (int): The third party ID to consider.
+    """
+    # TODO : add a warning for users to send all client types' inputs
+    data = {
+        "user_id": user_id,
+        "bank_id": bank_id,
+        "third_party_id": third_party_id,
+    }
+    # Trigger the FHE execution on the encrypted inputs previously sent
+    url = SERVER_URL + "run_fhe"
+    with requests.post(
+        url=url,
+        data=data,
+    ) as response:
+        if response.ok:
+            return response.json()
+        else:
+            raise gr.Error("Please wait for the inputs to be sent to the server.")
+def get_output(user_id, bank_id, third_party_id):
+    """Retrieve the encrypted output.
+    Args:
+        user_id (int): The user ID to consider.
+        bank_id (int): The bank ID to consider.
+        third_party_id (int): The third party ID to consider.
+    """
+    data = {
+        "user_id": user_id,
+        "bank_id": bank_id,
+        "third_party_id": third_party_id,
+    }
+    # Retrieve the encrypted output image
+    url = SERVER_URL + "get_output"
+    with requests.post(
+        url=url,
+        data=data,
+    ) as response:
+        if response.ok:
+            encrypted_output = response.content
+            # Save the encrypted output to bytes in a file as it is too large to pass through regular
+            # Gradio buttons (see https://github.com/gradio-app/gradio/issues/1877)
+            # TODO : check if output to user is relevant
+            encrypted_output_path = _get_client_file_path("encrypted_output", user_id + bank_id + third_party_id, "output")
+            with encrypted_output_path.open("wb") as encrypted_output_file:
+                encrypted_output_file.write(encrypted_output)
+            # TODO
+            # Decrypt the output using a different (wrong) key for display
+            # output_encrypted_representation = decrypt_output_with_wrong_key(encrypted_output, client_type)
+            # return output_encrypted_representation
+            return None
+        else:
+            raise gr.Error("Please wait for the FHE execution to be completed.")
+def decrypt_output(user_id, bank_id, third_party_id):
+    """Decrypt the result.
+    Args:
+        user_id (int): The user ID to consider.
+        bank_id (int): The bank ID to consider.
+        third_party_id (int): The third party ID to consider.
+    Returns:
+        output(numpy.ndarray): The decrypted output
+    """
+    # Get the encrypted output path
+    encrypted_output_path = _get_client_file_path("encrypted_output", user_id + bank_id + third_party_id, "output")
+    if not encrypted_output_path.is_file():
+        raise gr.Error("Please run the FHE execution first.")
+    # Load the encrypted output as bytes
+    with encrypted_output_path.open("rb") as encrypted_output_file:
+        encrypted_output_proba = encrypted_output_file.read()
+    # Retrieve the client API
+    client = _get_client(user_id, "user")
+    # Deserialize, decrypt and post-process the encrypted output
+    output_proba = client.deserialize_decrypt_dequantize(encrypted_output_proba)
+    # Determine the predicted class
+    output = numpy.argmax(output_proba, axis=1)
+    return output

data/clean_data.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

deployment_files/client.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b42d1dff3521c2e7462994c6eafb072bf004108d27c838e690a6702d775c0b5
-size 35673

 version https://git-lfs.github.com/spec/v1
+oid sha256:06c7bd8264089eb169342aa5c3f638b11d894c54d054511a91523bfdfab69487
+size 76130

deployment_files/pre_processor_third_party.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee39c00c8ca119a4e61f6905687c9bb540352b5ce4005aaba125290679722587
+size 1590

deployment_files/pre_processor_user.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af3db3f40e0e38febb8efb858e07df1f432458cc66f2edb38bedbd4d35520802
+size 6207

deployment_files/server.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75b15663431ff4f3788b380c100ea87c1bf97959234aeefb51ae734bed7514c4
-size 10953

 version https://git-lfs.github.com/spec/v1
+oid sha256:04c3f1de7261abe6ad075f6cc13885677ddf4ca0b03d6a31f26a60f94d5aa2ae
+size 10975

development.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""Train and compile the model."""
+import shutil
+import numpy
+import pandas
+import pickle
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from imblearn.over_sampling import SMOTE
+from settings import DEPLOYMENT_PATH, RANDOM_STATE, DATA_PATH, INPUT_SLICES, PRE_PROCESSOR_USER_PATH, PRE_PROCESSOR_THIRD_PARTY_PATH
+from utils.client_server_interface import MultiInputsFHEModelDev
+from utils.model import MultiInputXGBClassifier
+from utils.pre_processing import get_pre_processors, select_and_pop_features
+def get_processed_multi_inputs(data):
+    return (
+        data[:, INPUT_SLICES["user"]],
+        data[:, INPUT_SLICES["bank"]],
+        data[:, INPUT_SLICES["third_party"]]
+    )
+print("Load and pre-process the data")
+data = pandas.read_csv(DATA_PATH, encoding="utf-8")
+# Define input and target data
+data_y = data.pop("Target").copy()
+data_x = data.copy()
+# Get data from all parties
+data_third_party = select_and_pop_features(data_x, ["Years_employed", "Unemployed"])
+data_bank = select_and_pop_features(data_x, ["Account_length"])
+data_user = data_x.copy()
+# Feature engineer the data
+pre_processor_user, pre_processor_third_party = get_pre_processors()
+preprocessed_data_user = pre_processor_user.fit_transform(data_user)
+preprocessed_data_bank = data_bank.to_numpy()
+preprocessed_data_third_party = pre_processor_third_party.fit_transform(data_third_party)
+preprocessed_data_x = numpy.concatenate((preprocessed_data_user, preprocessed_data_bank, preprocessed_data_third_party), axis=1)
+# The initial data-set is very imbalanced: use SMOTE to get better results
+x, y = SMOTE().fit_resample(preprocessed_data_x, data_y)
+# Retrieve the training and testing data
+X_train, X_test, y_train, y_test = train_test_split(
+    x, y, stratify=y, test_size=0.3, random_state=RANDOM_STATE
+)
+print("\nTrain and compile the model")
+model = MultiInputXGBClassifier(max_depth=3, n_estimators=40)
+model, sklearn_model = model.fit_benchmark(X_train, y_train)
+multi_inputs_train = get_processed_multi_inputs(X_train)
+model.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
+# Delete the deployment folder and its content if it already exists
+if DEPLOYMENT_PATH.is_dir():
+    shutil.rmtree(DEPLOYMENT_PATH)
+print("\nEvaluate the models")
+y_pred_sklearn = sklearn_model.predict(X_test)
+print(f"Sklearn accuracy score : {accuracy_score(y_test, y_pred_sklearn )*100:.2f}%")
+multi_inputs_test = get_processed_multi_inputs(X_test)
+y_pred_simulated = model.predict_multi_inputs(*multi_inputs_test, simulate=True)
+print(f"Concrete ML accuracy score (simulated) : {accuracy_score(y_test, y_pred_simulated)*100:.2f}%")
+print("\nSave deployment files")
+# Save files needed for deployment
+fhe_dev = MultiInputsFHEModelDev(DEPLOYMENT_PATH, model)
+fhe_dev.save()
+# Save pre-processors
+with PRE_PROCESSOR_USER_PATH.open('wb') as file:
+    pickle.dump(pre_processor_user, file)
+with PRE_PROCESSOR_THIRD_PARTY_PATH.open('wb') as file:
+    pickle.dump(pre_processor_third_party, file)
+print("\nDone !")

development/development.py DELETED Viewed

@@ -1,67 +0,0 @@
-"A script to generate all development files necessary for the project."
-import shutil
-import numpy
-import pandas
-from sklearn.model_selection import train_test_split
-from imblearn.over_sampling import SMOTE
-from ..settings import DEPLOYMENT_PATH, RANDOM_STATE
-from client_server_interface import MultiInputsFHEModelDev
-from model import MultiInputXGBClassifier
-from development.pre_processing import pre_process_data
-print("Load and pre-process the data")
-data = pandas.read_csv("data/clean_data.csv", encoding="utf-8")
-# Make median annual salary similar to France (2023): from 157500 to 22050
-data["Total_income"] = data["Total_income"] * 0.14
-# Remove ID feature
-data.drop("ID", axis=1, inplace=True)
-# Feature engineer the data
-pre_processed_data, training_bins = pre_process_data(data)
-# Define input and target data
-y = pre_processed_data.pop("Target")
-x = pre_processed_data
-# The initial data-set is very imbalanced: use SMOTE to get better results
-x, y = SMOTE().fit_resample(x, y)
-# Retrieve the training data
-X_train, _, y_train, _ = train_test_split(
-    x, y, stratify=y, test_size=0.3, random_state=RANDOM_STATE
-)
-# Convert the Pandas data frames into Numpy arrays
-X_train_np = X_train.to_numpy()
-y_train_np = y_train.to_numpy()
-print("Train and compile the model")
-model = MultiInputXGBClassifier(max_depth=3, n_estimators=40)
-model.fit(X_train_np, y_train_np)
-multi_inputs_train = numpy.array_split(X_train_np, 3, axis=1)
-model.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
-# Delete the deployment folder and its content if it already exists
-if DEPLOYMENT_PATH.is_dir():
-    shutil.rmtree(DEPLOYMENT_PATH)
-print("Save deployment files")
-# Save the files needed for deployment
-fhe_dev = MultiInputsFHEModelDev(model, DEPLOYMENT_PATH)
-fhe_dev.save()
-print("Done !")

development/pre_processing.py DELETED Viewed

@@ -1,122 +0,0 @@
-import pandas
-from copy import deepcopy
-def convert_dummy(df, feature):
-    pos = pandas.get_dummies(df[feature], prefix=feature)
-    df.drop([feature], axis=1, inplace=True)
-    df = df.join(pos)
-    return df
-def get_category(df, col, labels, qcut=False, binsnum=None, bins=None, retbins=False):
-    assert binsnum is not None or bins is not None
-    if qcut and binsnum is not None:
-        localdf, bin_edges = pandas.qcut(df[col], q=binsnum, labels=labels, retbins=True)  # quantile cut
-    else:
-        input_bins = bins if bins is not None else binsnum
-        localdf, bin_edges = pandas.cut(df[col], bins=input_bins, labels=labels, retbins=True)  # equal-length cut
-    df.drop(col, axis=1, inplace=True)
-    localdf = pandas.DataFrame(localdf)
-    df = df.join(localdf[col])
-    if retbins:
-        return df, bin_edges
-    return df
-def pre_process_data(input_data, bins=None, columns=None):
-    assert bins is None or ("bin_edges_income" in bins and "bin_edges_age" in bins and "bin_edges_years_employed" in bins and columns is not None)
-    training_bins = {}
-    input_data = deepcopy(input_data)
-    bins = deepcopy(bins) if bins is not None else None
-    input_data.loc[input_data["Num_children"] >= 2, "Num_children"] = "2_or_more"
-    input_data = convert_dummy(input_data, "Num_children")
-    if bins is None:
-        input_data, bin_edges_income = get_category(input_data, "Total_income", ["low", "medium", "high"], qcut=True, binsnum=3, retbins=True)
-        training_bins["bin_edges_income"] = bin_edges_income
-    else:
-        input_data = get_category(input_data, "Total_income", ["low", "medium", "high"], bins=bins["bin_edges_income"])
-    input_data = convert_dummy(input_data, "Total_income")
-    if bins is None:
-        input_data, bin_edges_age = get_category(input_data, "Age", ["lowest", "low", "medium", "high", "highest"], binsnum=5, retbins=True)
-        training_bins["bin_edges_age"] = bin_edges_age
-    else:
-        input_data = get_category(input_data, "Age", ["lowest", "low", "medium", "high", "highest"], bins=bins["bin_edges_age"])
-    input_data = convert_dummy(input_data, "Age")
-    if bins is None:
-        input_data, bin_edges_years_employed = get_category(input_data, "Years_employed", ["lowest", "low", "medium", "high", "highest"], binsnum=5, retbins=True)
-        training_bins["bin_edges_years_employed"] = bin_edges_years_employed
-    else:
-        input_data = get_category(input_data, "Years_employed", ["lowest", "low", "medium", "high", "highest"], bins=bins["bin_edges_years_employed"])
-    input_data = convert_dummy(input_data, "Years_employed")
-    input_data.loc[input_data["Num_family"] >= 3, "Num_family"] = "3_or_more"
-    input_data = convert_dummy(input_data, "Num_family")
-    input_data.loc[input_data["Income_type"] == "Pensioner", "Income_type"] = "State servant"
-    input_data.loc[input_data["Income_type"] == "Student", "Income_type"] = "State servant"
-    input_data = convert_dummy(input_data, "Income_type")
-    input_data.loc[
-        (input_data["Occupation_type"] == "Cleaning staff")
-        | (input_data["Occupation_type"] == "Cooking staff")
-        | (input_data["Occupation_type"] == "Drivers")
-        | (input_data["Occupation_type"] == "Laborers")
-        | (input_data["Occupation_type"] == "Low-skill Laborers")
-        | (input_data["Occupation_type"] == "Security staff")
-        | (input_data["Occupation_type"] == "Waiters/barmen staff"),
-        "Occupation_type",
-    ] = "Labor_work"
-    input_data.loc[
-        (input_data["Occupation_type"] == "Accountants")
-        | (input_data["Occupation_type"] == "Core staff")
-        | (input_data["Occupation_type"] == "HR staff")
-        | (input_data["Occupation_type"] == "Medicine staff")
-        | (input_data["Occupation_type"] == "Private service staff")
-        | (input_data["Occupation_type"] == "Realty agents")
-        | (input_data["Occupation_type"] == "Sales staff")
-        | (input_data["Occupation_type"] == "Secretaries"),
-        "Occupation_type",
-    ] = "Office_work"
-    input_data.loc[
-        (input_data["Occupation_type"] == "Managers")
-        | (input_data["Occupation_type"] == "High skill tech staff")
-        | (input_data["Occupation_type"] == "IT staff"),
-        "Occupation_type",
-    ] = "High_tech_work"
-    input_data = convert_dummy(input_data, "Occupation_type")
-    input_data = convert_dummy(input_data, "Housing_type")
-    input_data.loc[input_data["Education_type"] == "Academic degree", "Education_type"] = "Higher education"
-    input_data = convert_dummy(input_data, "Education_type")
-    input_data = convert_dummy(input_data, "Family_status")
-    input_data = input_data.astype("int")
-    if training_bins:
-        return input_data, training_bins
-    input_data = input_data.reindex(columns=columns, fill_value=0)
-    return input_data

server.py CHANGED Viewed

@@ -6,12 +6,13 @@ from fastapi import FastAPI, File, Form, UploadFile
 from fastapi.responses import JSONResponse, Response
 from settings import DEPLOYMENT_PATH, SERVER_FILES, CLIENT_TYPES
-from development.client_server_interface import MultiInputsFHEModelServer
 # Load the server objects related to all currently available filters once and for all
 FHE_SERVER = MultiInputsFHEModelServer(DEPLOYMENT_PATH)
-def get_server_file_path(name, client_id, client_type):
     """Get the correct temporary file path for the server.
     Args:
@@ -42,8 +43,8 @@ def send_input(
 ):
     """Send the inputs to the server."""
     # Retrieve the encrypted inputs and the evaluation key paths
-    encrypted_inputs_path = get_server_file_path("encrypted_inputs", client_id, client_type)
-    evaluation_key_path = get_server_file_path("evaluation_key", client_id, client_type)
     # Write the files using the above paths
     with encrypted_inputs_path.open("wb") as encrypted_inputs, evaluation_key_path.open(
@@ -55,23 +56,30 @@ def send_input(
 @app.post("/run_fhe")
 def run_fhe(
-    client_id: str = Form(),
 ):
     """Execute the model on the encrypted inputs using FHE."""
-    # Retrieve the evaluation key
-    evaluation_key_path = get_server_file_path("evaluation_key", client_id, "user")
-    # Get the evaluation key
-    with evaluation_key_path.open("rb") as evaluation_key_file:
         evaluation_key = evaluation_key_file.read()
-    # Get the encrypted inputs
-    encrypted_inputs = []
-    for client_type in CLIENT_TYPES:
-        encrypted_inputs_path = get_server_file_path("encrypted_inputs", client_id, client_type)
-        with encrypted_inputs_path.open("rb") as encrypted_inputs_file:
-            encrypted_input = encrypted_inputs_file.read()
-            encrypted_inputs.append(encrypted_input)
     # Run the FHE execution
     start = time.time()
@@ -79,7 +87,7 @@ def run_fhe(
     fhe_execution_time = round(time.time() - start, 2)
     # Retrieve the encrypted output path
-    encrypted_output_path = get_server_file_path("encrypted_output", client_id, client_type)
     # Write the file using the above path
     with encrypted_output_path.open("wb") as output_file:
@@ -90,12 +98,13 @@ def run_fhe(
 @app.post("/get_output")
 def get_output(
-    client_id: str = Form(),
-    client_type: str = Form(),
 ):
     """Retrieve the encrypted output."""
     # Retrieve the encrypted output path
-    encrypted_output_path = get_server_file_path("encrypted_output", client_id, client_type)
     # Read the file using the above path
     with encrypted_output_path.open("rb") as encrypted_output_file:

 from fastapi.responses import JSONResponse, Response
 from settings import DEPLOYMENT_PATH, SERVER_FILES, CLIENT_TYPES
+from utils.client_server_interface import MultiInputsFHEModelServer
 # Load the server objects related to all currently available filters once and for all
 FHE_SERVER = MultiInputsFHEModelServer(DEPLOYMENT_PATH)
+def _get_server_file_path(name, client_id, client_type):
     """Get the correct temporary file path for the server.
     Args:
 ):
     """Send the inputs to the server."""
     # Retrieve the encrypted inputs and the evaluation key paths
+    encrypted_inputs_path = _get_server_file_path("encrypted_inputs", client_id, client_type)
+    evaluation_key_path = _get_server_file_path("evaluation_key", client_id, client_type)
     # Write the files using the above paths
     with encrypted_inputs_path.open("wb") as encrypted_inputs, evaluation_key_path.open(
 @app.post("/run_fhe")
 def run_fhe(
+    user_id: str = Form(),
+    bank_id: str = Form(),
+    third_party_id: str = Form(),
 ):
     """Execute the model on the encrypted inputs using FHE."""
+    # Retrieve the evaluation key (from the user, as all evaluation keys should be the same)
+    evaluation_key_path = _get_server_file_path("evaluation_key", user_id, "user")
+    # Get the encrypted inputs
+    encrypted_user_inputs_path = _get_server_file_path("encrypted_inputs", user_id, "user")
+    encrypted_bank_inputs_path = _get_server_file_path("encrypted_inputs", bank_id, "bank")
+    encrypted_third_party_inputs_path = _get_server_file_path("encrypted_inputs", third_party_id, "third_party")
+    with (
+        evaluation_key_path.open("rb") as evaluation_key_file,
+        encrypted_user_inputs_path.open("rb") as encrypted_user_inputs_file,
+        encrypted_bank_inputs_path.open("rb") as encrypted_bank_inputs_file,
+        encrypted_third_party_inputs_path.open("rb") as encrypted_third_party_inputs_file,
+    ):
         evaluation_key = evaluation_key_file.read()
+        encrypted_user_input = encrypted_user_inputs_file.read()
+        encrypted_bank_input = encrypted_bank_inputs_file.read()
+        encrypted_third_party_input = encrypted_third_party_inputs_file.read()
+    encrypted_inputs = (encrypted_user_input, encrypted_bank_input, encrypted_third_party_input)
     # Run the FHE execution
     start = time.time()
     fhe_execution_time = round(time.time() - start, 2)
     # Retrieve the encrypted output path
+    encrypted_output_path = _get_server_file_path("encrypted_output", user_id + bank_id + third_party_id, "output")
     # Write the file using the above path
     with encrypted_output_path.open("wb") as output_file:
 @app.post("/get_output")
 def get_output(
+    user_id: str = Form(),
+    bank_id: str = Form(),
+    third_party_id: str = Form(),
 ):
     """Retrieve the encrypted output."""
     # Retrieve the encrypted output path
+    encrypted_output_path = _get_server_file_path("encrypted_output", user_id + bank_id + third_party_id, "output")
     # Read the file using the above path
     with encrypted_output_path.open("rb") as encrypted_output_file:

settings.py CHANGED Viewed

@@ -1,6 +1,7 @@
 "All constants used in the project."
 from pathlib import Path
 # The directory of this project
 REPO_DIR = Path(__file__).parent
@@ -11,6 +12,10 @@ FHE_KEYS = REPO_DIR / ".fhe_keys"
 CLIENT_FILES = REPO_DIR / "client_files"
 SERVER_FILES = REPO_DIR / "server_files"
 # Create the necessary directories
 FHE_KEYS.mkdir(exist_ok=True)
 CLIENT_FILES.mkdir(exist_ok=True)
@@ -19,8 +24,14 @@ SERVER_FILES.mkdir(exist_ok=True)
 # Store the server's URL
 SERVER_URL = "http://localhost:8000/"
-RANDOM_STATE = 0
 INITIAL_INPUT_SHAPE = (1, 49)
 CLIENT_TYPES = ["user", "bank", "third_party"]
@@ -29,8 +40,33 @@ INPUT_INDEXES = {
     "bank": 1,
     "third_party": 2,
 }
-START_POSITIONS = {
-    "user": 0,  # First position: start from 0
-    "bank": 17,  # Second position: start from len(input_user)
-    "third_party": 33,  # Third position: start from len(input_user) + len(input_bank)
 }

 "All constants used in the project."
 from pathlib import Path
+import pandas
 # The directory of this project
 REPO_DIR = Path(__file__).parent
 CLIENT_FILES = REPO_DIR / "client_files"
 SERVER_FILES = REPO_DIR / "server_files"
+# Path targeting pre-processor saved files
+PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
+PRE_PROCESSOR_THIRD_PARTY_PATH = DEPLOYMENT_PATH / 'pre_processor_third_party.pkl'
 # Create the necessary directories
 FHE_KEYS.mkdir(exist_ok=True)
 CLIENT_FILES.mkdir(exist_ok=True)
 # Store the server's URL
 SERVER_URL = "http://localhost:8000/"
+# Path to data file
+# The data was previously cleaned using this notebook : https://www.kaggle.com/code/samuelcortinhas/credit-cards-data-cleaning
+# Additionally, the "ID" columns has been removed and the "Total_income" has been adjusted so that
+# its median value corresponds to France's 2023 median annual salary (22050 euros)
+DATA_PATH = "data/clean_data.csv"
+# Developement settings
+RANDOM_STATE = 0
 INITIAL_INPUT_SHAPE = (1, 49)
 CLIENT_TYPES = ["user", "bank", "third_party"]
     "bank": 1,
     "third_party": 2,
 }
+INPUT_SLICES = {
+    "user": slice(0, 42),  # First position: start from 0
+    "bank": slice(42, 43),  # Second position: start from n_feature_user
+    "third_party": slice(43, 49),  # Third position: start from n_feature_user + n_feature_bank
 }
+_data = pandas.read_csv(DATA_PATH, encoding="utf-8")
+def get_min_max(data, column):
+    """Get min/max values of a column in order to input them in Gradio's API as key arguments."""
+    return {
+        "minimum": int(data[column].min()),
+        "maximum": int(data[column].max()),
+    }
+# App data min and max values
+ACCOUNT_MIN_MAX = get_min_max(_data, "Account_length")
+CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
+INCOME_MIN_MAX = get_min_max(_data, "Total_income")
+AGE_MIN_MAX = get_min_max(_data, "Age")
+EMPLOYED_MIN_MAX = get_min_max(_data, "Years_employed")
+FAMILY_MIN_MAX = get_min_max(_data, "Num_family")
+# App data choices
+INCOME_TYPES = list(_data["Income_type"].unique())
+OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
+HOUSING_TYPES = list(_data["Housing_type"].unique())
+EDUCATION_TYPES = list(_data["Education_type"].unique())
+FAMILY_STATUS = list(_data["Family_status"].unique())

{development → utils}/client_server_interface.py RENAMED Viewed

@@ -1,3 +1,5 @@
 import numpy
 import copy
@@ -25,22 +27,21 @@ class MultiInputsFHEModelClient(FHEModelClient):
         super().__init__(*args, **kwargs)
-    def quantize_encrypt_serialize_multi_inputs(self, x: numpy.ndarray, input_index, initial_input_shape, start_position) -> bytes:
         x_padded = numpy.zeros(initial_input_shape)
-        end = start_position + x.shape[1]
-        x_padded[:, start_position:end] = x
         q_x_padded = self.model.quantize_input(x_padded)
-        q_x = q_x_padded[:, start_position:end]
-        q_x_padded = [None for _ in range(self.nb_inputs)]
-        q_x_padded[input_index] = q_x
         # Encrypt the values
-        q_x_enc = self.client.encrypt(*q_x_padded)
         # Serialize the encrypted values to be sent to the server
         q_x_enc_ser = q_x_enc[input_index].serialize()

+"""Modified classes for use for Client-Server interface with multi-inputs circuits."""
 import numpy
 import copy
         super().__init__(*args, **kwargs)
+    def quantize_encrypt_serialize_multi_inputs(self, x: numpy.ndarray, input_index, initial_input_shape, input_slice) -> bytes:
         x_padded = numpy.zeros(initial_input_shape)
+        x_padded[:, input_slice] = x
         q_x_padded = self.model.quantize_input(x_padded)
+        q_x = q_x_padded[:, input_slice]
+        q_x_inputs = [None for _ in range(self.nb_inputs)]
+        q_x_inputs[input_index] = q_x
         # Encrypt the values
+        q_x_enc = self.client.encrypt(*q_x_inputs)
         # Serialize the encrypted values to be sent to the server
         q_x_enc_ser = q_x_enc[input_index].serialize()

{development → utils}/model.py RENAMED Viewed

@@ -1,4 +1,7 @@
 import numpy
 from typing import Optional, Sequence, Union
 from concrete.fhe.compilation.compiler import Compiler, Configuration, DebugArtifacts, Circuit
@@ -128,3 +131,43 @@ class MultiInputXGBClassifier(ConcreteXGBClassifier):
         )
         return compiler

+"""Modified model class to handles multi-inputs circuit."""
 import numpy
+import time
 from typing import Optional, Sequence, Union
 from concrete.fhe.compilation.compiler import Compiler, Configuration, DebugArtifacts, Circuit
         )
         return compiler
+    def predict_multi_inputs(self, *multi_inputs, simulate=True):
+        """Run the inference with multiple inputs, with simulation or in FHE."""
+        assert all(isinstance(inputs, numpy.ndarray) for inputs in multi_inputs)
+        if not simulate:
+            self.fhe_circuit.keygen()
+        y_preds = []
+        execution_times = []
+        for inputs in zip(*multi_inputs):
+            inputs = tuple(numpy.expand_dims(input, axis=0) for input in inputs)
+            q_inputs = self.quantize_input(*inputs)
+            if simulate:
+                q_y_proba = self.fhe_circuit.simulate(*q_inputs)
+            else:
+                q_inputs_enc = self.fhe_circuit.encrypt(*q_inputs)
+                start = time.time()
+                q_y_proba_enc = self.fhe_circuit.run(*q_inputs_enc)
+                end = time.time() - start
+                execution_times.append(end)
+                q_y_proba = self.fhe_circuit.decrypt(q_y_proba_enc)
+            y_proba = self.dequantize_output(q_y_proba)
+            y_proba = self.post_processing(y_proba)
+            y_pred = numpy.argmax(y_proba, axis=1)
+            y_preds.append(y_pred)
+        if not simulate:
+            print(f"FHE execution time per inference: {numpy.mean(execution_times) :.2}s")
+        return numpy.array(y_preds)

utils/pre_processing.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""Data pre-processing functions."""
+import numpy
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, KBinsDiscretizer
+def _get_pipeline_replace_one_hot(func, value):
+    return Pipeline([
+        ("replace", FunctionTransformer(
+            func,
+            kw_args={"value": value},
+            feature_names_out='one-to-one',
+        )),
+        ("one_hot", OneHotEncoder(),),
+    ])
+def _replace_values_geq(column, value):
+    return numpy.where(column >= value, f"{value}_or_more", column)
+def _replace_values_eq(column, value):
+    for desired_value, values_to_replace in value.items():
+        column = numpy.where(numpy.isin(column, values_to_replace), desired_value, column)
+    return column
+def get_pre_processors():
+    pre_processor_user = ColumnTransformer(
+        transformers=[
+            (
+                "replace_num_children",
+                _get_pipeline_replace_one_hot(_replace_values_geq, 2),
+                ['Num_children']
+            ),
+            (
+                "replace_num_family",
+                _get_pipeline_replace_one_hot(_replace_values_geq, 3),
+                ['Num_family']
+            ),
+            (
+                "replace_income_type",
+                _get_pipeline_replace_one_hot(_replace_values_eq, {"State servant": ["Pensioner", "Student"]}),
+                ['Income_type']
+            ),
+            (
+                "replace_education_type",
+                _get_pipeline_replace_one_hot(_replace_values_eq, {"Higher education": ["Academic degree"]}),
+                ['Education_type']
+            ),
+            (
+                "replace_occupation_type_labor",
+                _get_pipeline_replace_one_hot(
+                    _replace_values_eq,
+                        {
+                            "Labor_work": ["Cleaning staff", "Cooking staff", "Drivers", "Laborers", "Low-skill Laborers", "Security staff", "Waiters/barmen staff"],
+                            "Office_work": ["Accountants", "Core staff", "HR staff", "Medicine staff", "Private service staff", "Realty agents", "Sales staff", "Secretaries"],
+                            "High_tech_work": ["Managers", "High skill tech staff", "IT staff"],
+                        },
+                ),
+                ['Occupation_type']
+            ),
+            ('one_hot_housing_fam_status', OneHotEncoder(), ['Housing_type', 'Family_status']),
+            ('qbin_total_income', KBinsDiscretizer(n_bins=3, strategy='quantile', encode="onehot"), ['Total_income']),
+            ('bin_age', KBinsDiscretizer(n_bins=5, strategy='uniform', encode="onehot"), ['Age']),
+        ],
+        remainder='passthrough',
+        verbose_feature_names_out=False,
+    )
+    pre_processor_third_party = ColumnTransformer(
+        transformers=[
+            ('bin_years_employed', KBinsDiscretizer(n_bins=5, strategy='uniform', encode="onehot"), ['Years_employed'])
+        ],
+        remainder='passthrough',
+        verbose_feature_names_out=False,
+    )
+    return pre_processor_user, pre_processor_third_party
+def select_and_pop_features(data, columns):
+    new_data = data[columns].copy()
+    data.drop(columns, axis=1, inplace=True)
+    return new_data