Spaces:
Running
Running
File size: 10,484 Bytes
5b94380 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
import pandas as pd
import os
import sys
script_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append('..')
sys.path.append('.')
def save_similarity_output(output_dict, method_name, leaderboard_path="./data/leaderboard_results.csv", similarity_path="./data/similarity_results.csv"):
# Load or initialize the DataFrames
if os.path.exists(leaderboard_path):
leaderboard_df = pd.read_csv(leaderboard_path)
else:
leaderboard_df = pd.DataFrame()
if os.path.exists(similarity_path):
similarity_df = pd.read_csv(similarity_path)
else:
similarity_df = pd.DataFrame(columns=['Method'])
# Check if method exists in similarity results
if method_name not in similarity_df['Method'].values:
similarity_df = pd.concat([similarity_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
# Initialize storage for averages
averages = {}
# Iterate through the output_dict and calculate averages if all aspects (MF, CC, BP) are present
for dataset in ['sparse', '200', '500']:
correlation_values = []
pvalue_values = []
# Check each aspect within the dataset (MF, BP, CC)
for aspect in ['MF', 'BP', 'CC']:
correlation_key = f"{dataset}_{aspect}_correlation"
pvalue_key = f"{dataset}_{aspect}_pvalue"
# Process correlation if present
if correlation_key in output_dict:
correlation_values.append(output_dict[correlation_key])
similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_correlation"] = output_dict[correlation_key]
leaderboard_df.at[0, f"sim_{dataset}_{aspect}_correlation"] = output_dict[correlation_key]
# Process pvalue if present
if pvalue_key in output_dict:
pvalue_values.append(output_dict[pvalue_key])
similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_pvalue"] = output_dict[pvalue_key]
leaderboard_df.at[0, f"sim_{dataset}_{aspect}_pvalue"] = output_dict[pvalue_key]
# Calculate averages if all three aspects (MF, BP, CC) are present
if len(correlation_values) == 3:
averages[f"{dataset}_Ave_correlation"] = sum(correlation_values) / 3
similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"]
leaderboard_df.at[0, f"sim_{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"]
if len(pvalue_values) == 3:
averages[f"{dataset}_Ave_pvalue"] = sum(pvalue_values) / 3
similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
leaderboard_df.at[0, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
# Save the updated DataFrames back to CSV
leaderboard_df.to_csv(leaderboard_path, index=False)
similarity_df.to_csv(similarity_path, index=False)
return 0
def save_function_output(model_output, method_name, func_results_path="./data/function_results.csv", leaderboard_path="./data/leaderboard_results.csv"):
# Load or initialize the DataFrames
if os.path.exists(func_results_path):
func_results_df = pd.read_csv(func_results_path)
else:
func_results_df = pd.DataFrame(columns=['Method'])
if os.path.exists(leaderboard_path):
leaderboard_df = pd.read_csv(leaderboard_path)
else:
leaderboard_df = pd.DataFrame()
# Ensure the method_name row exists in function results
if method_name not in func_results_df['Method'].values:
func_results_df = pd.concat([func_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
# Storage for averaging in leaderboard results
metrics_sum = {
'accuracy': {'BP': [], 'CC': [], 'MF': []},
'F1': {'BP': [], 'CC': [], 'MF': []},
'precision': {'BP': [], 'CC': [], 'MF': []},
'recall': {'BP': [], 'CC': [], 'MF': []}
}
# Iterate over each entry in model_output
for entry in model_output:
key = entry[0]
accuracy, f1, precision, recall = entry[1], entry[4], entry[7], entry[10]
# Parse the key to extract the aspect and datasets
aspect, dataset1, dataset2 = key.split('_')
# Save each metric to function_results under its respective column
func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_accuracy"] = accuracy
func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_F1"] = f1
func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_precision"] = precision
func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_recall"] = recall
# Add values for leaderboard averaging
metrics_sum['accuracy'][aspect].append(accuracy)
metrics_sum['F1'][aspect].append(f1)
metrics_sum['precision'][aspect].append(precision)
metrics_sum['recall'][aspect].append(recall)
# Calculate averages for each aspect and overall (if all aspects have entries)
for metric in ['accuracy', 'F1', 'precision', 'recall']:
for aspect in ['BP', 'CC', 'MF']:
if metrics_sum[metric][aspect]:
aspect_average = sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect])
leaderboard_df.at[0, f"func_{aspect}_{metric}"] = aspect_average
# Calculate overall average if each aspect has entries
if all(metrics_sum[metric][aspect] for aspect in ['BP', 'CC', 'MF']):
overall_average = sum(
sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect])
for aspect in ['BP', 'CC', 'MF']
) / 3
leaderboard_df.at[0, f"func_Ave_{metric}"] = overall_average
# Save updated DataFrames to CSV
func_results_df.to_csv(func_results_path, index=False)
leaderboard_df.to_csv(leaderboard_path, index=False)
return 0
def save_family_output(model_output, method_name, leaderboard_path="./data/leaderboard_results.csv", family_results_path="./data/family_results.csv"):
# Load or initialize the DataFrames
if os.path.exists(leaderboard_path):
leaderboard_df = pd.read_csv(leaderboard_path)
else:
leaderboard_df = pd.DataFrame(columns=['Method'])
if os.path.exists(family_results_path):
family_results_df = pd.read_csv(family_results_path)
else:
family_results_df = pd.DataFrame(columns=['Method'])
# Ensure the method_name row exists in the leaderboard results
if method_name not in leaderboard_df['Method'].values:
leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
# Ensure the method_name row exists in family results
if method_name not in family_results_df['Method'].values:
family_results_df = pd.concat([family_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
# Iterate through the datasets and metrics
for dataset, metrics in model_output.items():
for metric, values in metrics.items():
# Calculate the average for each metric in leaderboard results
avg_value = sum(values) / len(values) if values else None
leaderboard_df.at[leaderboard_df['Method'] == method_name, f"fam_{dataset}_{metric}_ave"] = avg_value
# Save each fold result for family results
for i, value in enumerate(values):
family_results_df.at[family_results_df['Method'] == method_name, f"{dataset}_{metric}_{i}"] = value
# Save updated DataFrames to CSV
leaderboard_df.to_csv(leaderboard_path, index=False)
family_results_df.to_csv(family_results_path, index=False)
return leaderboard_df, family_results_df
def save_affinity_output(model_output, method_name, leaderboard_path="./data/leaderboard_results.csv", affinity_results_path="./data/affinity_results.csv"):
# Load or initialize DataFrames
if os.path.exists(leaderboard_path):
leaderboard_df = pd.read_csv(leaderboard_path)
else:
leaderboard_df = pd.DataFrame(columns=['Method'])
if os.path.exists(affinity_results_path):
affinity_results_df = pd.read_csv(affinity_results_path)
else:
affinity_results_df = pd.DataFrame(columns=['Method'])
# Ensure the method_name row exists in the leaderboard results
if method_name not in leaderboard_df['Method'].values:
leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
# Ensure the method_name row exists in affinity results
if method_name not in affinity_results_df['Method'].values:
affinity_results_df = pd.concat([affinity_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
# Process 'summary' section for leaderboard results
summary = model_output.get('summary', {})
if summary:
leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_mse_ave'] = summary.get('val_mse_error')
leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_mae_ave'] = summary.get('val_mae_error')
leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_corr_ave'] = summary.get('validation_corr')
# Process 'detail' section for affinity results
detail = model_output.get('detail', {})
if detail:
# Save each 10-fold cross-validation result for mse, mae, and corr
for i in range(10):
if 'val_mse_errors' in detail:
affinity_results_df.at[affinity_results_df['Method'] == method_name, f"mse_{i}"] = detail['val_mse_errors'][i]
if 'val_mae_errors' in detail:
affinity_results_df.at[affinity_results_df['Method'] == method_name, f"mae_{i}"] = detail['val_mae_errors'][i]
if 'validation_corrs' in detail:
affinity_results_df.at[affinity_results_df['Method'] == method_name, f"corr_{i}"] = detail['validation_corrs'][i]
# Save updated DataFrames to CSV
leaderboard_df.to_csv(leaderboard_path, index=False)
affinity_results_df.to_csv(affinity_results_path, index=False)
return 0
|