Spaces:
Sleeping
Sleeping
File size: 3,477 Bytes
dd49f8a 4db2d24 dd49f8a 39ebd1e a2e6203 f491248 50ca4fc a2e6203 db965ce f491248 a2e6203 54b2fde f491248 a2e6203 f491248 a2e6203 54b2fde 2614717 a2e6203 75b4caf f491248 50ca4fc b86034a a2e6203 b86034a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import yaml
import pandas as pd
import tqdm
from . import semantic_similarity_infer as ssi
from . import target_family_classifier as tfc
from . import function_predictor as fp
from . import binding_affinity_estimator as bae
def load_representation(multi_col_representation_vector_file_path):
multi_col_representation_vector = pd.read_csv(multi_col_representation_vector_file_path)
vals = multi_col_representation_vector.iloc[:,1:(len(multi_col_representation_vector.columns))]
original_values_as_df = pd.DataFrame({'Entry': pd.Series([], dtype='str'),'Vector': pd.Series([], dtype='object')})
for index, row in tqdm.tqdm(vals.iterrows(), total = len(vals)):
list_of_floats = [float(item) for item in list(row)]
original_values_as_df.loc[index] = [multi_col_representation_vector.iloc[index]['Entry']] + [list_of_floats]
return original_values_as_df
def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspect="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
results = {}
if any(item in ['similarity', 'function', 'family', 'all'] for item in benchmarks):
print("\nRepresentation vectors are loading...\n")
human_representation_dataframe = load_representation(representation_file_human)
if "similarity" in benchmarks:
print("\nSemantic similarity Inference Benchmark is running...\n")
ssi.representation_dataframe = human_representation_dataframe
ssi.representation_name = representation_name
ssi.protein_names = ssi.representation_dataframe['Entry'].tolist()
ssi.similarity_tasks = similarity_tasks
ssi.detailed_output = detailed_output
similarity_result = ssi.calculate_all_correlations()
results['similarity'] = similarity_result
if "function" in benchmarks:
print("\n\nOntology-based protein function prediction benchmark is running...\n")
fp.aspect_type = function_prediction_aspect
fp.dataset_type = function_prediction_dataset
fp.representation_dataframe = human_representation_dataframe
fp.representation_name = representation_name
fp.detailed_output = detailed_output
function_results = fp.pred_output()
results['function'] = function_results
if "family" in benchmarks:
print("\n\nDrug target protein family classification benchmark is running...\n")
tfc.representation_path = representation_file_human
tfc.representation_name = representation_name
tfc.detailed_output = detailed_output
results['family'] = {}
for dataset in family_prediction_dataset:
family_result = tfc.score_protein_rep(dataset)
results['family'][f'{dataset}'] = family_result
if "affinity" in benchmarks:
print("\n\nProtein-protein binding affinity estimation benchmark is running...\n")
bae.skempi_vectors_path = representation_file_affinity
bae.representation_name = representation_name
affinity_result = bae.predict_affinities_and_report_results()
results['affinity'] = affinity_result
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")
return results
|