romanbredehoft-zama
commited on
Commit
•
747c295
1
Parent(s):
0e9fc02
Add descriptions and fix comments
Browse files- app.py +50 -20
- development.py +2 -2
- utils/pre_processing.py +8 -1
app.py
CHANGED
@@ -44,23 +44,43 @@ with demo:
|
|
44 |
"""
|
45 |
)
|
46 |
|
47 |
-
gr.Markdown("
|
48 |
|
49 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
keygen_button = gr.Button("Generate the keys and send evaluation key to the server.")
|
51 |
evaluation_key = gr.Textbox(
|
52 |
label="Evaluation key representation:", max_lines=2, interactive=False
|
53 |
)
|
54 |
client_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
|
55 |
|
56 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
with gr.Row():
|
58 |
with gr.Column():
|
59 |
gr.Markdown("### User")
|
60 |
gender = gr.Radio(["Female", "Male"], label="Gender", value="Female")
|
61 |
bool_inputs = gr.CheckboxGroup(["Car", "Property", "Work phone", "Phone", "Email"], label="What do you own ?")
|
62 |
num_children = gr.Slider(**CHILDREN_MIN_MAX, step=1, label="Number of children", info="How many children do you have ?")
|
63 |
-
household_size = gr.Slider(**FAMILY_MIN_MAX, step=1, label="Household size", info="How many members does your
|
64 |
total_income = gr.Slider(**INCOME_MIN_MAX, label="Income", info="What's you total yearly income (in euros) ?")
|
65 |
age = gr.Slider(**AGE_MIN_MAX, step=1, label="Age", info="How old are you ?")
|
66 |
income_type = gr.Dropdown(choices=INCOME_TYPES, value=INCOME_TYPES[0], label="Income type", info="What is your main type of income ?")
|
@@ -79,7 +99,7 @@ with demo:
|
|
79 |
years_salaried = gr.Slider(**SALARIED_MIN_MAX, step=1, label="Years of employment", info="How long have this person been salaried (in years) ?")
|
80 |
|
81 |
|
82 |
-
gr.Markdown("
|
83 |
with gr.Row():
|
84 |
with gr.Column():
|
85 |
gr.Markdown("### User")
|
@@ -107,40 +127,50 @@ with demo:
|
|
107 |
label="Encrypted input representation:", max_lines=2, interactive=False
|
108 |
)
|
109 |
|
110 |
-
gr.Markdown("
|
111 |
gr.Markdown(
|
112 |
-
"
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
)
|
116 |
|
117 |
-
gr.Markdown("
|
118 |
execute_fhe_button = gr.Button("Run FHE execution.")
|
119 |
fhe_execution_time = gr.Textbox(
|
120 |
label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
|
121 |
)
|
122 |
|
123 |
-
gr.Markdown("
|
124 |
gr.Markdown(
|
125 |
-
"
|
126 |
-
|
|
|
127 |
)
|
128 |
|
129 |
-
gr.Markdown("
|
130 |
gr.Markdown(
|
131 |
-
"
|
132 |
-
|
133 |
-
"
|
134 |
)
|
135 |
-
get_output_button = gr.Button("Receive the encrypted output from the server.")
|
136 |
|
|
|
137 |
encrypted_output_representation = gr.Textbox(
|
138 |
label="Encrypted output representation: ", max_lines=2, interactive=False
|
139 |
)
|
140 |
|
141 |
-
gr.Markdown("
|
142 |
-
|
|
|
|
|
|
|
|
|
143 |
|
|
|
144 |
prediction_output = gr.Textbox(
|
145 |
label="Prediction", max_lines=1, interactive=False
|
146 |
)
|
|
|
44 |
"""
|
45 |
)
|
46 |
|
47 |
+
gr.Markdown("# Client side")
|
48 |
|
49 |
+
gr.Markdown("## Step 1: Generate the keys.")
|
50 |
+
gr.Markdown(
|
51 |
+
"""
|
52 |
+
- The private key is used to encrypt and decrypt the data and shall never be shared.
|
53 |
+
- The evaluation key is a public key that the server needs to process encrypted data. It is
|
54 |
+
therefore transmitted to the server for further processing as well.
|
55 |
+
"""
|
56 |
+
)
|
57 |
keygen_button = gr.Button("Generate the keys and send evaluation key to the server.")
|
58 |
evaluation_key = gr.Textbox(
|
59 |
label="Evaluation key representation:", max_lines=2, interactive=False
|
60 |
)
|
61 |
client_id = gr.Textbox(label="", max_lines=2, interactive=False, visible=False)
|
62 |
|
63 |
+
gr.Markdown("## Step 2: Fill in some information.")
|
64 |
+
gr.Markdown(
|
65 |
+
"""
|
66 |
+
Select any information that corresponds to the profile you want to evaluate. Three
|
67 |
+
dissociated parties are represented :
|
68 |
+
- the user, which provides some personal information in order to evaluate its credit card
|
69 |
+
eligibility
|
70 |
+
- the user's bank, which provides any of the user's banking information relevant to the
|
71 |
+
decision
|
72 |
+
- a third party, which represents any other party (here, the user's employer) that could
|
73 |
+
provide any information relevant to the decision
|
74 |
+
"""
|
75 |
+
)
|
76 |
+
|
77 |
with gr.Row():
|
78 |
with gr.Column():
|
79 |
gr.Markdown("### User")
|
80 |
gender = gr.Radio(["Female", "Male"], label="Gender", value="Female")
|
81 |
bool_inputs = gr.CheckboxGroup(["Car", "Property", "Work phone", "Phone", "Email"], label="What do you own ?")
|
82 |
num_children = gr.Slider(**CHILDREN_MIN_MAX, step=1, label="Number of children", info="How many children do you have ?")
|
83 |
+
household_size = gr.Slider(**FAMILY_MIN_MAX, step=1, label="Household size", info="How many members does your household have? ?")
|
84 |
total_income = gr.Slider(**INCOME_MIN_MAX, label="Income", info="What's you total yearly income (in euros) ?")
|
85 |
age = gr.Slider(**AGE_MIN_MAX, step=1, label="Age", info="How old are you ?")
|
86 |
income_type = gr.Dropdown(choices=INCOME_TYPES, value=INCOME_TYPES[0], label="Income type", info="What is your main type of income ?")
|
|
|
99 |
years_salaried = gr.Slider(**SALARIED_MIN_MAX, step=1, label="Years of employment", info="How long have this person been salaried (in years) ?")
|
100 |
|
101 |
|
102 |
+
gr.Markdown("## Step 3: Encrypt the inputs using FHE and send them to the server.")
|
103 |
with gr.Row():
|
104 |
with gr.Column():
|
105 |
gr.Markdown("### User")
|
|
|
127 |
label="Encrypted input representation:", max_lines=2, interactive=False
|
128 |
)
|
129 |
|
130 |
+
gr.Markdown("# Server side")
|
131 |
gr.Markdown(
|
132 |
+
"""
|
133 |
+
Once the server receives the encrypted inputs, it can compute the prediction without ever
|
134 |
+
needing to decrypt any value.
|
135 |
+
|
136 |
+
This server employs an [XGBoost](https://github.com/dmlc/xgboost) classifier model that has
|
137 |
+
been trained on [this credit card data-set](https://www.kaggle.com/datasets/rikdifos/credit-card-approval-prediction/data).
|
138 |
+
"""
|
139 |
)
|
140 |
|
141 |
+
gr.Markdown("## Step 4: Run FHE execution.")
|
142 |
execute_fhe_button = gr.Button("Run FHE execution.")
|
143 |
fhe_execution_time = gr.Textbox(
|
144 |
label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
|
145 |
)
|
146 |
|
147 |
+
gr.Markdown("# Client side")
|
148 |
gr.Markdown(
|
149 |
+
"""
|
150 |
+
Once the server completed the inference, the encrypted output is returned to the user.
|
151 |
+
"""
|
152 |
)
|
153 |
|
154 |
+
gr.Markdown("## Step 5: Receive the encrypted output from the server.")
|
155 |
gr.Markdown(
|
156 |
+
"""
|
157 |
+
The value displayed below is a shortened byte representation of the actual encrypted output.
|
158 |
+
"""
|
159 |
)
|
|
|
160 |
|
161 |
+
get_output_button = gr.Button("Receive the encrypted output from the server.")
|
162 |
encrypted_output_representation = gr.Textbox(
|
163 |
label="Encrypted output representation: ", max_lines=2, interactive=False
|
164 |
)
|
165 |
|
166 |
+
gr.Markdown("## Step 6: Decrypt the output.")
|
167 |
+
gr.Markdown(
|
168 |
+
"""
|
169 |
+
The user is able to decrypt the prediction using its private key.
|
170 |
+
"""
|
171 |
+
)
|
172 |
|
173 |
+
decrypt_button = gr.Button("Decrypt the output")
|
174 |
prediction_output = gr.Textbox(
|
175 |
label="Prediction", max_lines=1, interactive=False
|
176 |
)
|
development.py
CHANGED
@@ -30,8 +30,8 @@ print("Load and pre-process the data")
|
|
30 |
# https://www.kaggle.com/code/samuelcortinhas/credit-cards-data-cleaning
|
31 |
# A few additional pre-processing steps has bee applied to this data set as well :
|
32 |
# - "ID" column has been removed
|
33 |
-
# - "Total_income" values have been multiplied by 0.14 to make its median
|
34 |
-
#
|
35 |
data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
36 |
|
37 |
# Define input and target data
|
|
|
30 |
# https://www.kaggle.com/code/samuelcortinhas/credit-cards-data-cleaning
|
31 |
# A few additional pre-processing steps has bee applied to this data set as well :
|
32 |
# - "ID" column has been removed
|
33 |
+
# - "Total_income" values have been multiplied by 0.14 to make its median match France's annual
|
34 |
+
# salary one from 2023 (in euros)
|
35 |
data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
36 |
|
37 |
# Define input and target data
|
utils/pre_processing.py
CHANGED
@@ -1,4 +1,11 @@
|
|
1 |
-
"""Data pre-processing functions.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
import numpy
|
4 |
from sklearn.compose import ColumnTransformer
|
|
|
1 |
+
"""Data pre-processing functions.
|
2 |
+
|
3 |
+
The pre-processing steps are heavily inspired by the following notebook :
|
4 |
+
https://www.kaggle.com/code/rikdifos/credit-card-approval-prediction-using-ml
|
5 |
+
|
6 |
+
Additional steps, mostly including renaming some values or features, were added for better user
|
7 |
+
experience.
|
8 |
+
"""
|
9 |
|
10 |
import numpy
|
11 |
from sklearn.compose import ColumnTransformer
|