Spaces:
Sleeping
Sleeping
Update src/bin/function_predictor.py
Browse files
src/bin/function_predictor.py
CHANGED
@@ -83,8 +83,16 @@ def MultiLabelSVC_cross_val_predict(representation_name, dataset, X, y, classifi
|
|
83 |
rc_we_cv.append(np.round(recall_score(y.iloc[fold_test_index, :], y_pred[fold_test_index], average="weighted"), decimals=5))
|
84 |
hamm_cv.append(np.round(hamming_loss(y.iloc[fold_test_index, :], y_pred[fold_test_index]), decimals=5))
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
return {
|
87 |
"cv_results": [representation_name + "_" + dataset, acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv],
|
|
|
|
|
88 |
"predictions": y_pred
|
89 |
}
|
90 |
|
@@ -101,6 +109,8 @@ def ProtDescModel():
|
|
101 |
filtered_datasets = [dataset for dataset in datasets if aspect_type in dataset and dataset_type in dataset]
|
102 |
|
103 |
cv_results = []
|
|
|
|
|
104 |
|
105 |
for dt in tqdm(filtered_datasets, total=len(filtered_datasets)):
|
106 |
print(f"Protein function prediction is started for the dataset: {dt.split('.')[0]}")
|
@@ -119,17 +129,26 @@ def ProtDescModel():
|
|
119 |
|
120 |
if model is not None:
|
121 |
cv_results.append(model["cv_results"])
|
|
|
|
|
122 |
|
123 |
return {
|
124 |
-
"cv_results": cv_results
|
|
|
|
|
125 |
}
|
126 |
|
127 |
def pred_output():
|
128 |
model = ProtDescModel()
|
129 |
cv_result = model["cv_results"]
|
130 |
|
|
|
|
|
|
|
131 |
return {
|
132 |
-
"cv_result": cv_result
|
|
|
|
|
133 |
}
|
134 |
|
135 |
# Example call to the function
|
|
|
83 |
rc_we_cv.append(np.round(recall_score(y.iloc[fold_test_index, :], y_pred[fold_test_index], average="weighted"), decimals=5))
|
84 |
hamm_cv.append(np.round(hamming_loss(y.iloc[fold_test_index, :], y_pred[fold_test_index]), decimals=5))
|
85 |
|
86 |
+
means = list(np.mean([acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv], axis=1))
|
87 |
+
means = [np.round(i, decimals=5) for i in means]
|
88 |
+
|
89 |
+
stds = list(np.std([acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv], axis=1))
|
90 |
+
stds = [np.round(i, decimals=5) for i in stds]
|
91 |
+
|
92 |
return {
|
93 |
"cv_results": [representation_name + "_" + dataset, acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv],
|
94 |
+
"means": [representation_name + "_" + dataset] + means,
|
95 |
+
"stds": [representation_name + "_" + dataset] + stds,
|
96 |
"predictions": y_pred
|
97 |
}
|
98 |
|
|
|
109 |
filtered_datasets = [dataset for dataset in datasets if aspect_type in dataset and dataset_type in dataset]
|
110 |
|
111 |
cv_results = []
|
112 |
+
cv_mean_results = []
|
113 |
+
cv_std_results = []
|
114 |
|
115 |
for dt in tqdm(filtered_datasets, total=len(filtered_datasets)):
|
116 |
print(f"Protein function prediction is started for the dataset: {dt.split('.')[0]}")
|
|
|
129 |
|
130 |
if model is not None:
|
131 |
cv_results.append(model["cv_results"])
|
132 |
+
cv_mean_results.append(model["means"])
|
133 |
+
cv_std_results.append(model["stds"])
|
134 |
|
135 |
return {
|
136 |
+
"cv_results": cv_results,
|
137 |
+
"cv_mean_results": cv_mean_results,
|
138 |
+
"cv_std_results": cv_std_results
|
139 |
}
|
140 |
|
141 |
def pred_output():
|
142 |
model = ProtDescModel()
|
143 |
cv_result = model["cv_results"]
|
144 |
|
145 |
+
cv_mean_result = model["cv_mean_results"]
|
146 |
+
cv_std_result = model["cv_std_results"]
|
147 |
+
|
148 |
return {
|
149 |
+
"cv_result": cv_result,
|
150 |
+
"cv_mean_result": cv_mean_result,
|
151 |
+
"cv_std_result": cv_std_result
|
152 |
}
|
153 |
|
154 |
# Example call to the function
|