AeternumS commited on
Commit
0489425
·
1 Parent(s): 097c2b7
Files changed (3) hide show
  1. app.py +141 -14
  2. heart_disease_dt_model.pkl +0 -0
  3. server.py +0 -149
app.py CHANGED
@@ -1,22 +1,149 @@
1
  import numpy as np
2
  import pandas as pd
3
- import joblib # for loading the saved model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from sklearn.tree import DecisionTreeClassifier #using sklearn decisiontreeclassifier
 
5
 
6
- #from concrete.ml.sklearn.xgb import DecisionTreeClassifier
 
 
7
 
8
- # Load the saved model
9
- dt = joblib.load('heart_disease_dt_model.pkl')
10
 
11
- #fhe_circuit =
12
- # Make prediction on the first row of data
13
- #prediction = dt.predict(sample_data, fhe="execute")
14
- prediction = dt.predict(sample_data) # clair
15
 
16
- # Display the prediction result
17
- print(prediction)
18
- if prediction == 1:
19
- print("Prediction: The patient is likely to have heart disease.")
20
- else:
21
- print("Prediction: The patient is unlikely to have heart disease.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
 
 
1
  import numpy as np
2
  import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import joblib
6
+
7
+ import os
8
+ import shutil
9
+
10
+ # Define the directory for FHE client/server files
11
+ fhe_directory = '/tmp/fhe_client_server_files/'
12
+
13
+ # Create the directory if it does not exist
14
+ if not os.path.exists(fhe_directory):
15
+ os.makedirs(fhe_directory)
16
+ else:
17
+ # If it exists, delete its contents
18
+ shutil.rmtree(fhe_directory)
19
+ os.makedirs(fhe_directory)
20
+
21
+ data=pd.read_csv('data/heart.xls')
22
+
23
+ data.info() #checking the info
24
+
25
+ data_corr=data.corr()
26
+
27
+ plt.figure(figsize=(20,20))
28
+ sns.heatmap(data=data_corr,annot=True)
29
+ #Heatmap for data
30
+
31
+ feature_value=np.array(data_corr['output'])
32
+ for i in range(len(feature_value)):
33
+ if feature_value[i]<0:
34
+ feature_value[i]=-feature_value[i]
35
+
36
+ print(feature_value)
37
+
38
+ features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation'])
39
+
40
+ feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False)
41
+
42
+ feature_selected=feature_sorted.index
43
+
44
+ feature_selected #selected features which are very much correalated
45
+
46
+ clean_data=data[feature_selected]
47
+
48
  from sklearn.tree import DecisionTreeClassifier #using sklearn decisiontreeclassifier
49
+ from sklearn.model_selection import train_test_split
50
 
51
+ #making input and output dataset
52
+ X=clean_data.iloc[:,1:]
53
+ Y=clean_data['output']
54
 
55
+ x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
 
56
 
57
+ print(x_train.shape,y_train.shape,x_test.shape,y_test.shape) #data is splited in traing and testing dataset
 
 
 
58
 
59
+ # feature scaling
60
+ from sklearn.preprocessing import StandardScaler
61
+ sc=StandardScaler()
62
+ x_train=sc.fit_transform(x_train)
63
+ x_test=sc.transform(x_test)
64
+
65
+ #training our model
66
+ dt=DecisionTreeClassifier(criterion='entropy',max_depth=6)
67
+ dt.fit(x_train,y_train)
68
+ #dt.compile(x_trqin)
69
+
70
+ #predicting the value on testing data
71
+ y_pred=dt.predict(x_test)
72
+
73
+ #ploting the data
74
+ from sklearn.metrics import confusion_matrix
75
+ conf_mat=confusion_matrix(y_test,y_pred)
76
+ print(conf_mat)
77
+ accuracy=dt.score(x_test,y_test)
78
+ print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%")
79
+
80
+ joblib.dump(dt, 'heart_disease_dt_model.pkl')
81
+
82
+ from concrete.ml.sklearn.tree import DecisionTreeClassifier
83
+
84
+ fhe_compatible = DecisionTreeClassifier.from_sklearn_model(dt, x_train, n_bits = 10)
85
+ fhe_compatible.compile(x_train)
86
+
87
+
88
+
89
+
90
+
91
+
92
+ #### server
93
+ from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
94
+
95
+ # Setup the development environment
96
+ dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
97
+ dev.save()
98
+
99
+ # Setup the server
100
+ server = FHEModelServer(path_dir=fhe_directory)
101
+ server.load()
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+ ####### client
110
+
111
+ from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
112
+
113
+ # Setup the client
114
+ client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
115
+ serialized_evaluation_keys = client.get_serialized_evaluation_keys()
116
+
117
+
118
+ # Load the dataset and select the relevant features
119
+ data = pd.read_csv('data/heart.xls')
120
+
121
+ # Perform the correlation analysis
122
+ data_corr = data.corr()
123
+
124
+ # Select features based on correlation with 'output'
125
+ feature_value = np.array(data_corr['output'])
126
+ for i in range(len(feature_value)):
127
+ if feature_value[i] < 0:
128
+ feature_value[i] = -feature_value[i]
129
+
130
+ features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
131
+ feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
132
+ feature_selected = feature_sorted.index
133
+
134
+ # Clean the data by selecting the most correlated features
135
+ clean_data = data[feature_selected]
136
+
137
+ # Extract the first row of feature data for prediction (excluding 'output' column)
138
+ sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # Reshape to 2D array for model input
139
+
140
+ encrypted_data = client.quantize_encrypt_serialize(sample_data)
141
+
142
+
143
+
144
+ ##### end client
145
+
146
+ encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
147
 
148
+ result = client.deserialize_decrypt_dequantize(encrypted_result)
149
+ print(result)
heart_disease_dt_model.pkl CHANGED
Binary files a/heart_disease_dt_model.pkl and b/heart_disease_dt_model.pkl differ
 
server.py DELETED
@@ -1,149 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import seaborn as sns
4
- import matplotlib.pyplot as plt
5
- import joblib
6
-
7
- import os
8
- import shutil
9
-
10
- # Define the directory for FHE client/server files
11
- fhe_directory = '/tmp/fhe_client_server_files/'
12
-
13
- # Create the directory if it does not exist
14
- if not os.path.exists(fhe_directory):
15
- os.makedirs(fhe_directory)
16
- else:
17
- # If it exists, delete its contents
18
- shutil.rmtree(fhe_directory)
19
- os.makedirs(fhe_directory)
20
-
21
- data=pd.read_csv('data/heart.xls')
22
-
23
- data.info() #checking the info
24
-
25
- data_corr=data.corr()
26
-
27
- plt.figure(figsize=(20,20))
28
- sns.heatmap(data=data_corr,annot=True)
29
- #Heatmap for data
30
-
31
- feature_value=np.array(data_corr['output'])
32
- for i in range(len(feature_value)):
33
- if feature_value[i]<0:
34
- feature_value[i]=-feature_value[i]
35
-
36
- print(feature_value)
37
-
38
- features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation'])
39
-
40
- feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False)
41
-
42
- feature_selected=feature_sorted.index
43
-
44
- feature_selected #selected features which are very much correalated
45
-
46
- clean_data=data[feature_selected]
47
-
48
- from sklearn.tree import DecisionTreeClassifier #using sklearn decisiontreeclassifier
49
- from sklearn.model_selection import train_test_split
50
-
51
- #making input and output dataset
52
- X=clean_data.iloc[:,1:]
53
- Y=clean_data['output']
54
-
55
- x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
56
-
57
- print(x_train.shape,y_train.shape,x_test.shape,y_test.shape) #data is splited in traing and testing dataset
58
-
59
- # feature scaling
60
- from sklearn.preprocessing import StandardScaler
61
- sc=StandardScaler()
62
- x_train=sc.fit_transform(x_train)
63
- x_test=sc.transform(x_test)
64
-
65
- #training our model
66
- dt=DecisionTreeClassifier(criterion='entropy',max_depth=6)
67
- dt.fit(x_train,y_train)
68
- #dt.compile(x_trqin)
69
-
70
- #predicting the value on testing data
71
- y_pred=dt.predict(x_test)
72
-
73
- #ploting the data
74
- from sklearn.metrics import confusion_matrix
75
- conf_mat=confusion_matrix(y_test,y_pred)
76
- print(conf_mat)
77
- accuracy=dt.score(x_test,y_test)
78
- print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%")
79
-
80
- joblib.dump(dt, 'heart_disease_dt_model.pkl')
81
-
82
- from concrete.ml.sklearn.tree import DecisionTreeClassifier
83
-
84
- fhe_compatible = DecisionTreeClassifier.from_sklearn_model(dt, x_train, n_bits = 10)
85
- fhe_compatible.compile(x_train)
86
-
87
-
88
-
89
-
90
-
91
-
92
- #### server
93
- from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
94
-
95
- # Setup the development environment
96
- dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
97
- dev.save()
98
-
99
- # Setup the server
100
- server = FHEModelServer(path_dir=fhe_directory)
101
- server.load()
102
-
103
-
104
-
105
-
106
-
107
-
108
-
109
- ####### client
110
-
111
- from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
112
-
113
- # Setup the client
114
- client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
115
- serialized_evaluation_keys = client.get_serialized_evaluation_keys()
116
-
117
-
118
- # Load the dataset and select the relevant features
119
- data = pd.read_csv('data/heart.xls')
120
-
121
- # Perform the correlation analysis
122
- data_corr = data.corr()
123
-
124
- # Select features based on correlation with 'output'
125
- feature_value = np.array(data_corr['output'])
126
- for i in range(len(feature_value)):
127
- if feature_value[i] < 0:
128
- feature_value[i] = -feature_value[i]
129
-
130
- features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
131
- feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
132
- feature_selected = feature_sorted.index
133
-
134
- # Clean the data by selecting the most correlated features
135
- clean_data = data[feature_selected]
136
-
137
- # Extract the first row of feature data for prediction (excluding 'output' column)
138
- sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # Reshape to 2D array for model input
139
-
140
- encrypted_data = client.quantize_encrypt_serialize(sample_data)
141
-
142
-
143
-
144
- ##### end client
145
-
146
- encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
147
-
148
- result = client.deserialize_decrypt_dequantize(encrypted_result)
149
- print(result)