Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,7 +60,7 @@ SESSION.mount('https://', ADAPTER)
|
|
| 60 |
|
| 61 |
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
| 62 |
|
| 63 |
-
CUSTOM_DATASET_MAX_LEN =
|
| 64 |
|
| 65 |
CSS = """
|
| 66 |
.help-tip {
|
|
@@ -403,12 +403,12 @@ def validate_columns(df, mandatory_cols):
|
|
| 403 |
|
| 404 |
|
| 405 |
def process_target_fasta(sequence):
|
| 406 |
-
lines = sequence.strip().split("\n")
|
| 407 |
-
if lines[0].startswith(">"):
|
| 408 |
-
|
| 409 |
-
return ''.join(lines).split(">")[0]
|
| 410 |
-
|
| 411 |
-
|
| 412 |
|
| 413 |
|
| 414 |
def send_email(receiver, msg):
|
|
@@ -732,24 +732,24 @@ def drug_library_from_sdf(sdf_path):
|
|
| 732 |
|
| 733 |
def process_target_library_upload(library_upload):
|
| 734 |
if library_upload.endswith('.csv'):
|
| 735 |
-
|
| 736 |
elif library_upload.endswith('.fasta'):
|
| 737 |
-
|
| 738 |
else:
|
| 739 |
raise gr.Error('Currently only CSV and FASTA files are supported as target libraries.')
|
| 740 |
-
validate_columns(
|
| 741 |
-
return
|
| 742 |
|
| 743 |
|
| 744 |
def process_drug_library_upload(library_upload):
|
| 745 |
if library_upload.endswith('.csv'):
|
| 746 |
-
|
| 747 |
elif library_upload.endswith('.sdf'):
|
| 748 |
-
|
| 749 |
else:
|
| 750 |
-
raise gr.Error('Currently only CSV and SDF files are supported as
|
| 751 |
-
validate_columns(
|
| 752 |
-
return
|
| 753 |
|
| 754 |
|
| 755 |
def target_library_from_fasta(fasta_path):
|
|
@@ -863,14 +863,15 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 863 |
with gr.Column():
|
| 864 |
HelpTip(
|
| 865 |
"Select a preset compound library (e.g., DrugBank)."
|
| 866 |
-
"Alternatively, upload a CSV file with a column named X1 containing compound SMILES,
|
|
|
|
| 867 |
)
|
| 868 |
drug_library = gr.Dropdown(label='Step 3. Select or Upload a Compound Library',
|
| 869 |
choices=list(DRUG_LIBRARY_MAP.keys()))
|
| 870 |
with gr.Row():
|
| 871 |
-
gr.File(label='Example SDF
|
| 872 |
value='data/examples/compound_library.sdf', interactive=False)
|
| 873 |
-
gr.File(label='Example CSV
|
| 874 |
value='data/examples/compound_library.csv', interactive=False)
|
| 875 |
drug_library_upload_btn = gr.UploadButton(
|
| 876 |
label='Upload a custom library', variant='primary')
|
|
@@ -958,9 +959,9 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 958 |
target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
|
| 959 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
| 960 |
with gr.Row():
|
| 961 |
-
gr.File(label='Example FASTA
|
| 962 |
value='data/examples/target_library.fasta', interactive=False)
|
| 963 |
-
gr.File(label='Example CSV
|
| 964 |
value='data/examples/target_library.csv', interactive=False)
|
| 965 |
target_library_upload_btn = gr.UploadButton(
|
| 966 |
label='Upload a custom library', variant='primary')
|
|
@@ -1006,69 +1007,45 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1006 |
''')
|
| 1007 |
with gr.Blocks() as infer_block:
|
| 1008 |
with gr.Column() as infer_page:
|
| 1009 |
-
infer_type = gr.Dropdown(choices=['Upload a
|
| 1010 |
-
'Upload a
|
| 1011 |
-
|
| 1012 |
-
value='Upload a CSV interaction pair dataset')
|
| 1013 |
with gr.Column() as pair_upload:
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
with gr.Row():
|
| 1019 |
infer_data_for_predict = gr.File(
|
| 1020 |
-
label='Upload a
|
| 1021 |
with gr.Column() as pair_generate:
|
| 1022 |
with gr.Row():
|
| 1023 |
-
gr.File(label='Example SDF
|
| 1024 |
value='data/examples/compound_library.sdf', interactive=False)
|
| 1025 |
-
gr.File(label='Example FASTA
|
| 1026 |
value='data/examples/target_library.fasta', interactive=False)
|
| 1027 |
with gr.Row():
|
| 1028 |
-
gr.File(label='Example CSV
|
| 1029 |
value='data/examples/compound_library.csv', interactive=False)
|
| 1030 |
-
gr.File(label='Example CSV
|
| 1031 |
value='data/examples/target_library.csv', interactive=False)
|
| 1032 |
with gr.Row():
|
| 1033 |
-
infer_drug = gr.File(label='SDF/CSV
|
| 1034 |
file_count="single", type='filepath')
|
| 1035 |
-
infer_target = gr.File(label='FASTA/CSV
|
| 1036 |
file_count="single", type='filepath')
|
| 1037 |
|
| 1038 |
-
with gr.Row():
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
| 1045 |
-
value='General',
|
| 1046 |
-
label='Step 2. Select Target Protein Family (Optional)')
|
| 1047 |
-
|
| 1048 |
-
with gr.Row():
|
| 1049 |
-
with gr.Column():
|
| 1050 |
-
HelpTip(
|
| 1051 |
-
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
| 1052 |
-
"while affinity prediction directly estimates their binding strength measured using IC50."
|
| 1053 |
-
)
|
| 1054 |
-
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 1055 |
-
label='Step 3. Select a Prediction Task',
|
| 1056 |
-
value='Compound-protein interaction')
|
| 1057 |
-
|
| 1058 |
-
with gr.Row():
|
| 1059 |
-
with gr.Column():
|
| 1060 |
-
HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
|
| 1061 |
-
"Please refer to documentation for detailed benchamrk results."
|
| 1062 |
-
)
|
| 1063 |
-
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
|
| 1064 |
-
infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 1065 |
-
|
| 1066 |
|
| 1067 |
-
with gr.Row():
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
|
| 1073 |
with gr.Row(visible=True):
|
| 1074 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
|
@@ -1110,10 +1087,10 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1110 |
|
| 1111 |
with gr.Row():
|
| 1112 |
with gr.Column():
|
| 1113 |
-
csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
|
| 1114 |
csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
|
| 1115 |
with gr.Column():
|
| 1116 |
-
html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
|
| 1117 |
html_download_file = gr.File(label='Download report (HTML)', visible=False)
|
| 1118 |
|
| 1119 |
|
|
@@ -1362,7 +1339,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1362 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
| 1363 |
else:
|
| 1364 |
screen_df = process_drug_library_upload(library_upload)
|
| 1365 |
-
print(screen_df.shape)
|
| 1366 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
| 1367 |
raise gr.Error(f'The uploaded compound library has more records '
|
| 1368 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
|
@@ -1600,5 +1576,3 @@ if __name__ == "__main__":
|
|
| 1600 |
demo.launch(
|
| 1601 |
show_api=False,
|
| 1602 |
)
|
| 1603 |
-
|
| 1604 |
-
#%%
|
|
|
|
| 60 |
|
| 61 |
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
| 62 |
|
| 63 |
+
CUSTOM_DATASET_MAX_LEN = 10_000
|
| 64 |
|
| 65 |
CSS = """
|
| 66 |
.help-tip {
|
|
|
|
| 403 |
|
| 404 |
|
| 405 |
def process_target_fasta(sequence):
|
| 406 |
+
# lines = sequence.strip().split("\n")
|
| 407 |
+
# if lines[0].startswith(">"):
|
| 408 |
+
# lines = lines[1:]
|
| 409 |
+
# return ''.join(lines).split(">")[0]
|
| 410 |
+
record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
|
| 411 |
+
return str(record.seq)
|
| 412 |
|
| 413 |
|
| 414 |
def send_email(receiver, msg):
|
|
|
|
| 732 |
|
| 733 |
def process_target_library_upload(library_upload):
|
| 734 |
if library_upload.endswith('.csv'):
|
| 735 |
+
df = pd.read_csv(library_upload)
|
| 736 |
elif library_upload.endswith('.fasta'):
|
| 737 |
+
df = target_library_from_fasta(library_upload)
|
| 738 |
else:
|
| 739 |
raise gr.Error('Currently only CSV and FASTA files are supported as target libraries.')
|
| 740 |
+
validate_columns(df, ['X2'])
|
| 741 |
+
return df
|
| 742 |
|
| 743 |
|
| 744 |
def process_drug_library_upload(library_upload):
|
| 745 |
if library_upload.endswith('.csv'):
|
| 746 |
+
df = pd.read_csv(library_upload)
|
| 747 |
elif library_upload.endswith('.sdf'):
|
| 748 |
+
df = drug_library_from_sdf(library_upload)
|
| 749 |
else:
|
| 750 |
+
raise gr.Error('Currently only CSV and SDF files are supported as drug libraries.')
|
| 751 |
+
validate_columns(df, ['X1'])
|
| 752 |
+
return df
|
| 753 |
|
| 754 |
|
| 755 |
def target_library_from_fasta(fasta_path):
|
|
|
|
| 863 |
with gr.Column():
|
| 864 |
HelpTip(
|
| 865 |
"Select a preset compound library (e.g., DrugBank)."
|
| 866 |
+
"Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
|
| 867 |
+
"or use an SDF file."
|
| 868 |
)
|
| 869 |
drug_library = gr.Dropdown(label='Step 3. Select or Upload a Compound Library',
|
| 870 |
choices=list(DRUG_LIBRARY_MAP.keys()))
|
| 871 |
with gr.Row():
|
| 872 |
+
gr.File(label='Example SDF compound library',
|
| 873 |
value='data/examples/compound_library.sdf', interactive=False)
|
| 874 |
+
gr.File(label='Example CSV compound library',
|
| 875 |
value='data/examples/compound_library.csv', interactive=False)
|
| 876 |
drug_library_upload_btn = gr.UploadButton(
|
| 877 |
label='Upload a custom library', variant='primary')
|
|
|
|
| 959 |
target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
|
| 960 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
| 961 |
with gr.Row():
|
| 962 |
+
gr.File(label='Example FASTA target library',
|
| 963 |
value='data/examples/target_library.fasta', interactive=False)
|
| 964 |
+
gr.File(label='Example CSV target library',
|
| 965 |
value='data/examples/target_library.csv', interactive=False)
|
| 966 |
target_library_upload_btn = gr.UploadButton(
|
| 967 |
label='Upload a custom library', variant='primary')
|
|
|
|
| 1007 |
''')
|
| 1008 |
with gr.Blocks() as infer_block:
|
| 1009 |
with gr.Column() as infer_page:
|
| 1010 |
+
infer_type = gr.Dropdown(choices=['Upload a compound library and a target library',
|
| 1011 |
+
'Upload a CSV interaction pair dataset'],
|
| 1012 |
+
value='Upload a compound library and a target library')
|
|
|
|
| 1013 |
with gr.Column() as pair_upload:
|
| 1014 |
+
gr.File(label="Example custom dataset",
|
| 1015 |
+
value="data/examples/interaction_pair_inference.csv",
|
| 1016 |
+
interactive=False)
|
| 1017 |
+
with gr.Column():
|
|
|
|
| 1018 |
infer_data_for_predict = gr.File(
|
| 1019 |
+
label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
|
| 1020 |
with gr.Column() as pair_generate:
|
| 1021 |
with gr.Row():
|
| 1022 |
+
gr.File(label='Example SDF compound library',
|
| 1023 |
value='data/examples/compound_library.sdf', interactive=False)
|
| 1024 |
+
gr.File(label='Example FASTA target library',
|
| 1025 |
value='data/examples/target_library.fasta', interactive=False)
|
| 1026 |
with gr.Row():
|
| 1027 |
+
gr.File(label='Example CSV compound library',
|
| 1028 |
value='data/examples/compound_library.csv', interactive=False)
|
| 1029 |
+
gr.File(label='Example CSV target library',
|
| 1030 |
value='data/examples/target_library.csv', interactive=False)
|
| 1031 |
with gr.Row():
|
| 1032 |
+
infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
|
| 1033 |
file_count="single", type='filepath')
|
| 1034 |
+
infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
|
| 1035 |
file_count="single", type='filepath')
|
| 1036 |
|
| 1037 |
+
with gr.Row(visible=True):
|
| 1038 |
+
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
|
| 1039 |
+
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
| 1040 |
+
pair_infer_target_family = gr.Dropdown(choices=['General'],
|
| 1041 |
+
label='Target family',
|
| 1042 |
+
value='General')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1043 |
|
| 1044 |
+
# with gr.Row():
|
| 1045 |
+
# pair_infer_email = gr.Textbox(
|
| 1046 |
+
# label='Email (optional)',
|
| 1047 |
+
# info="Your email will be used to send you notifications when your job finishes."
|
| 1048 |
+
# )
|
| 1049 |
|
| 1050 |
with gr.Row(visible=True):
|
| 1051 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
|
|
|
| 1087 |
|
| 1088 |
with gr.Row():
|
| 1089 |
with gr.Column():
|
| 1090 |
+
csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True, variant='primary')
|
| 1091 |
csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
|
| 1092 |
with gr.Column():
|
| 1093 |
+
html_generate = gr.Button(value='Generate report (HTML)', interactive=True, variant='primary')
|
| 1094 |
html_download_file = gr.File(label='Download report (HTML)', visible=False)
|
| 1095 |
|
| 1096 |
|
|
|
|
| 1339 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
| 1340 |
else:
|
| 1341 |
screen_df = process_drug_library_upload(library_upload)
|
|
|
|
| 1342 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
| 1343 |
raise gr.Error(f'The uploaded compound library has more records '
|
| 1344 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
|
|
|
| 1576 |
demo.launch(
|
| 1577 |
show_api=False,
|
| 1578 |
)
|
|
|
|
|
|