Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload app.py
Browse files
app.py
CHANGED
@@ -403,12 +403,18 @@ def validate_columns(df, mandatory_cols):
|
|
403 |
|
404 |
|
405 |
def process_target_fasta(sequence):
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
|
413 |
|
414 |
def send_email(receiver, msg):
|
@@ -804,7 +810,8 @@ To predict interactions/binding affinities of a single target against a library
|
|
804 |
HelpTip(
|
805 |
"Enter (paste) a amino acid sequence below manually or upload a FASTA file."
|
806 |
"If multiple entities are in the FASTA, only the first will be used."
|
807 |
-
"Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for
|
|
|
808 |
)
|
809 |
with gr.Row():
|
810 |
target_input_type = gr.Dropdown(
|
@@ -838,9 +845,9 @@ To predict interactions/binding affinities of a single target against a library
|
|
838 |
# with gr.Row():
|
839 |
# with gr.Column():
|
840 |
example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
|
841 |
-
|
842 |
-
|
843 |
-
|
844 |
|
845 |
with gr.Row():
|
846 |
with gr.Column():
|
@@ -862,7 +869,7 @@ To predict interactions/binding affinities of a single target against a library
|
|
862 |
with gr.Row():
|
863 |
with gr.Column():
|
864 |
HelpTip(
|
865 |
-
"Select a preset compound library (e.g., DrugBank)."
|
866 |
"Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
|
867 |
"or use an SDF file."
|
868 |
)
|
@@ -882,15 +889,18 @@ To predict interactions/binding affinities of a single target against a library
|
|
882 |
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
883 |
"while affinity prediction directly estimates their binding strength measured using IC50."
|
884 |
)
|
885 |
-
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
|
|
886 |
value='Compound-protein interaction')
|
887 |
|
888 |
with gr.Row():
|
889 |
with gr.Column():
|
890 |
-
HelpTip(
|
891 |
-
|
892 |
-
|
893 |
-
|
|
|
|
|
894 |
screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
895 |
with gr.Row():
|
896 |
with gr.Column():
|
@@ -901,7 +911,7 @@ To predict interactions/binding affinities of a single target against a library
|
|
901 |
|
902 |
with gr.Row(visible=True):
|
903 |
with gr.Column():
|
904 |
-
|
905 |
drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
|
906 |
# TODO Modify the pd df directly with df['X2'] = target
|
907 |
|
@@ -943,7 +953,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
943 |
with gr.Row():
|
944 |
with gr.Column():
|
945 |
HelpTip(
|
946 |
-
"By default, models trained on all protein families (general) will be applied."
|
947 |
"If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
|
948 |
)
|
949 |
target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
@@ -973,22 +983,26 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
973 |
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
974 |
"while affinity prediction directly estimates their binding strength measured using IC50."
|
975 |
)
|
976 |
-
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
|
|
977 |
value='Compound-protein interaction')
|
978 |
|
979 |
with gr.Row():
|
980 |
with gr.Column():
|
981 |
-
HelpTip(
|
982 |
-
|
983 |
-
|
984 |
-
|
|
|
|
|
985 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
986 |
|
987 |
with gr.Row():
|
988 |
with gr.Column():
|
989 |
target_identify_email = gr.Textbox(
|
990 |
label='Step 6. Email (Optional)',
|
991 |
-
info="If an email is provided, a notification email will be sent to you when your job
|
|
|
992 |
)
|
993 |
|
994 |
with gr.Row(visible=True):
|
@@ -1007,45 +1021,69 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1007 |
''')
|
1008 |
with gr.Blocks() as infer_block:
|
1009 |
with gr.Column() as infer_page:
|
1010 |
-
infer_type = gr.Dropdown(choices=['Upload a
|
1011 |
-
'Upload a
|
1012 |
-
|
|
|
1013 |
with gr.Column() as pair_upload:
|
1014 |
-
gr.
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
|
|
1018 |
infer_data_for_predict = gr.File(
|
1019 |
-
label='Upload a
|
1020 |
with gr.Column() as pair_generate:
|
1021 |
with gr.Row():
|
1022 |
-
gr.File(label='Example SDF
|
1023 |
value='data/examples/compound_library.sdf', interactive=False)
|
1024 |
-
gr.File(label='Example FASTA
|
1025 |
value='data/examples/target_library.fasta', interactive=False)
|
1026 |
with gr.Row():
|
1027 |
-
gr.File(label='Example CSV
|
1028 |
value='data/examples/compound_library.csv', interactive=False)
|
1029 |
-
gr.File(label='Example CSV
|
1030 |
value='data/examples/target_library.csv', interactive=False)
|
1031 |
with gr.Row():
|
1032 |
-
infer_drug = gr.File(label='SDF/CSV
|
1033 |
file_count="single", type='filepath')
|
1034 |
-
infer_target = gr.File(label='FASTA/CSV
|
1035 |
file_count="single", type='filepath')
|
1036 |
|
1037 |
-
with gr.Row(
|
1038 |
-
|
1039 |
-
|
1040 |
-
|
1041 |
-
|
1042 |
-
|
|
|
|
|
|
|
1043 |
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
1047 |
-
|
1048 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1049 |
|
1050 |
with gr.Row(visible=True):
|
1051 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
@@ -1060,7 +1098,6 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1060 |
with gr.Blocks() as report:
|
1061 |
gr.Markdown('''
|
1062 |
# <center>DeepSEQreen Chemical Property Report</center>
|
1063 |
-
<center>
|
1064 |
To compute chemical properties for the predictions of drug hit screening,
|
1065 |
target protein identification, and interaction pair inference.
|
1066 |
|
@@ -1068,7 +1105,6 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1068 |
your own dataset. The page shows only a preview report displaying at most 30 records
|
1069 |
(with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
|
1070 |
generate and download a raw data CSV or interactive table HTML file below.
|
1071 |
-
</center>
|
1072 |
''')
|
1073 |
with gr.Row():
|
1074 |
file_for_report = gr.File(interactive=True, type='filepath')
|
@@ -1087,10 +1123,10 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1087 |
|
1088 |
with gr.Row():
|
1089 |
with gr.Column():
|
1090 |
-
csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True
|
1091 |
csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
|
1092 |
with gr.Column():
|
1093 |
-
html_generate = gr.Button(value='Generate report (HTML)', interactive=True
|
1094 |
html_download_file = gr.File(label='Download report (HTML)', visible=False)
|
1095 |
|
1096 |
|
@@ -1188,7 +1224,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1188 |
def example_fill(input_type):
|
1189 |
return {target_id: 'Q16539',
|
1190 |
target_gene: 'MAPK14',
|
1191 |
-
target_organism: '
|
1192 |
target_fasta: """
|
1193 |
>sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
|
1194 |
MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
|
@@ -1230,7 +1266,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1230 |
& (benchmark_df['Scenario'] == scenario)
|
1231 |
& (benchmark_df['all'] == False)]
|
1232 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
1233 |
-
|
1234 |
return gr.Dropdown(value=row['preset'],
|
1235 |
info=f"Reason: {scenario} in the training dataset; we recommend the model "
|
1236 |
f"with the best {score} ({float(row[score]):.3f}) "
|
@@ -1339,6 +1374,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1339 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
1340 |
else:
|
1341 |
screen_df = process_drug_library_upload(library_upload)
|
|
|
1342 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
1343 |
raise gr.Error(f'The uploaded compound library has more records '
|
1344 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
@@ -1576,3 +1612,5 @@ if __name__ == "__main__":
|
|
1576 |
demo.launch(
|
1577 |
show_api=False,
|
1578 |
)
|
|
|
|
|
|
403 |
|
404 |
|
405 |
def process_target_fasta(sequence):
|
406 |
+
try:
|
407 |
+
if sequence:
|
408 |
+
# lines = sequence.strip().split("\n")
|
409 |
+
# if lines[0].startswith(">"):
|
410 |
+
# lines = lines[1:]
|
411 |
+
# return ''.join(lines).split(">")[0]
|
412 |
+
record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
|
413 |
+
return str(record.seq)
|
414 |
+
else:
|
415 |
+
raise ValueError('Empty FASTA sequence.')
|
416 |
+
except Exception as e:
|
417 |
+
raise gr.Error(f'Failed to process FASTA due to error: {str(e)}')
|
418 |
|
419 |
|
420 |
def send_email(receiver, msg):
|
|
|
810 |
HelpTip(
|
811 |
"Enter (paste) a amino acid sequence below manually or upload a FASTA file."
|
812 |
"If multiple entities are in the FASTA, only the first will be used."
|
813 |
+
"Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
|
814 |
+
"the sequence."
|
815 |
)
|
816 |
with gr.Row():
|
817 |
target_input_type = gr.Dropdown(
|
|
|
845 |
# with gr.Row():
|
846 |
# with gr.Column():
|
847 |
example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
|
848 |
+
# with gr.Column():
|
849 |
+
# gr.File(label='Example FASTA file',
|
850 |
+
# value='data/examples/MAPK14.fasta', interactive=False)
|
851 |
|
852 |
with gr.Row():
|
853 |
with gr.Column():
|
|
|
869 |
with gr.Row():
|
870 |
with gr.Column():
|
871 |
HelpTip(
|
872 |
+
"Select a preset compound library (e.g., DrugBank)."
|
873 |
"Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
|
874 |
"or use an SDF file."
|
875 |
)
|
|
|
889 |
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
890 |
"while affinity prediction directly estimates their binding strength measured using IC50."
|
891 |
)
|
892 |
+
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
893 |
+
label='Step 4. Select a Prediction Task',
|
894 |
value='Compound-protein interaction')
|
895 |
|
896 |
with gr.Row():
|
897 |
with gr.Column():
|
898 |
+
HelpTip(
|
899 |
+
"Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
|
900 |
+
"Please refer to documentation for detailed benchamrk results."
|
901 |
+
)
|
902 |
+
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
903 |
+
label='Step 5. Select a Preset Model')
|
904 |
screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
905 |
with gr.Row():
|
906 |
with gr.Column():
|
|
|
911 |
|
912 |
with gr.Row(visible=True):
|
913 |
with gr.Column():
|
914 |
+
# drug_screen_clr_btn = gr.ClearButton(size='lg')
|
915 |
drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
|
916 |
# TODO Modify the pd df directly with df['X2'] = target
|
917 |
|
|
|
953 |
with gr.Row():
|
954 |
with gr.Column():
|
955 |
HelpTip(
|
956 |
+
"By default, models trained on all protein families (general) will be applied."
|
957 |
"If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
|
958 |
)
|
959 |
target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
|
|
983 |
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
984 |
"while affinity prediction directly estimates their binding strength measured using IC50."
|
985 |
)
|
986 |
+
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
987 |
+
label='Step 4. Select a Prediction Task',
|
988 |
value='Compound-protein interaction')
|
989 |
|
990 |
with gr.Row():
|
991 |
with gr.Column():
|
992 |
+
HelpTip(
|
993 |
+
"Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
|
994 |
+
"Please refer to documentation for detailed benchamrk results."
|
995 |
+
)
|
996 |
+
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
997 |
+
label='Step 5. Select a Preset Model')
|
998 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
999 |
|
1000 |
with gr.Row():
|
1001 |
with gr.Column():
|
1002 |
target_identify_email = gr.Textbox(
|
1003 |
label='Step 6. Email (Optional)',
|
1004 |
+
info="If an email is provided, a notification email will be sent to you when your job "
|
1005 |
+
"is completed."
|
1006 |
)
|
1007 |
|
1008 |
with gr.Row(visible=True):
|
|
|
1021 |
''')
|
1022 |
with gr.Blocks() as infer_block:
|
1023 |
with gr.Column() as infer_page:
|
1024 |
+
infer_type = gr.Dropdown(choices=['Upload a CSV interaction pair dataset',
|
1025 |
+
'Upload a compound library and a target library'],
|
1026 |
+
label='Step 1. Select Pair Input Type and Input',
|
1027 |
+
value='Upload a CSV interaction pair dataset')
|
1028 |
with gr.Column() as pair_upload:
|
1029 |
+
with gr.Row():
|
1030 |
+
gr.File(label="Example custom dataset",
|
1031 |
+
value="data/examples/interaction_pair_inference.csv",
|
1032 |
+
interactive=False)
|
1033 |
+
with gr.Row():
|
1034 |
infer_data_for_predict = gr.File(
|
1035 |
+
label='Upload a Custom Dataset', file_count="single", type='filepath', visible=True)
|
1036 |
with gr.Column() as pair_generate:
|
1037 |
with gr.Row():
|
1038 |
+
gr.File(label='Example SDF Compound Library',
|
1039 |
value='data/examples/compound_library.sdf', interactive=False)
|
1040 |
+
gr.File(label='Example FASTA Target Library',
|
1041 |
value='data/examples/target_library.fasta', interactive=False)
|
1042 |
with gr.Row():
|
1043 |
+
gr.File(label='Example CSV Compound Library',
|
1044 |
value='data/examples/compound_library.csv', interactive=False)
|
1045 |
+
gr.File(label='Example CSV Target Library',
|
1046 |
value='data/examples/target_library.csv', interactive=False)
|
1047 |
with gr.Row():
|
1048 |
+
infer_drug = gr.File(label='SDF/CSV File containing multiple compounds',
|
1049 |
file_count="single", type='filepath')
|
1050 |
+
infer_target = gr.File(label='FASTA/CSV File containing multiple targets',
|
1051 |
file_count="single", type='filepath')
|
1052 |
|
1053 |
+
with gr.Row():
|
1054 |
+
with gr.Column():
|
1055 |
+
HelpTip(
|
1056 |
+
"By default, models trained on all protein families (general) will be applied."
|
1057 |
+
"If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
|
1058 |
+
)
|
1059 |
+
pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
1060 |
+
value='General',
|
1061 |
+
label='Step 2. Select Target Protein Family (Optional)')
|
1062 |
|
1063 |
+
with gr.Row():
|
1064 |
+
with gr.Column():
|
1065 |
+
HelpTip(
|
1066 |
+
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
1067 |
+
"while affinity prediction directly estimates their binding strength measured using IC50."
|
1068 |
+
)
|
1069 |
+
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
|
1070 |
+
label='Step 3. Select a Prediction Task',
|
1071 |
+
value='Compound-protein interaction')
|
1072 |
+
|
1073 |
+
with gr.Row():
|
1074 |
+
with gr.Column():
|
1075 |
+
HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
|
1076 |
+
"Please refer to documentation for detailed benchamrk results."
|
1077 |
+
)
|
1078 |
+
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
|
1079 |
+
infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
1080 |
+
|
1081 |
+
|
1082 |
+
with gr.Row():
|
1083 |
+
pair_infer_email = gr.Textbox(
|
1084 |
+
label='Step 5. Email (Optional)',
|
1085 |
+
info="If an email is provided, a notification email will be sent to you when your job is completed."
|
1086 |
+
)
|
1087 |
|
1088 |
with gr.Row(visible=True):
|
1089 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
|
|
1098 |
with gr.Blocks() as report:
|
1099 |
gr.Markdown('''
|
1100 |
# <center>DeepSEQreen Chemical Property Report</center>
|
|
|
1101 |
To compute chemical properties for the predictions of drug hit screening,
|
1102 |
target protein identification, and interaction pair inference.
|
1103 |
|
|
|
1105 |
your own dataset. The page shows only a preview report displaying at most 30 records
|
1106 |
(with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
|
1107 |
generate and download a raw data CSV or interactive table HTML file below.
|
|
|
1108 |
''')
|
1109 |
with gr.Row():
|
1110 |
file_for_report = gr.File(interactive=True, type='filepath')
|
|
|
1123 |
|
1124 |
with gr.Row():
|
1125 |
with gr.Column():
|
1126 |
+
csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
|
1127 |
csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
|
1128 |
with gr.Column():
|
1129 |
+
html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
|
1130 |
html_download_file = gr.File(label='Download report (HTML)', visible=False)
|
1131 |
|
1132 |
|
|
|
1224 |
def example_fill(input_type):
|
1225 |
return {target_id: 'Q16539',
|
1226 |
target_gene: 'MAPK14',
|
1227 |
+
target_organism: 'Homo sapiens',
|
1228 |
target_fasta: """
|
1229 |
>sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
|
1230 |
MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
|
|
|
1266 |
& (benchmark_df['Scenario'] == scenario)
|
1267 |
& (benchmark_df['all'] == False)]
|
1268 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
|
|
1269 |
return gr.Dropdown(value=row['preset'],
|
1270 |
info=f"Reason: {scenario} in the training dataset; we recommend the model "
|
1271 |
f"with the best {score} ({float(row[score]):.3f}) "
|
|
|
1374 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
1375 |
else:
|
1376 |
screen_df = process_drug_library_upload(library_upload)
|
1377 |
+
print(screen_df.shape)
|
1378 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
1379 |
raise gr.Error(f'The uploaded compound library has more records '
|
1380 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
|
|
1612 |
demo.launch(
|
1613 |
show_api=False,
|
1614 |
)
|
1615 |
+
|
1616 |
+
#%%
|