DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

libokj commited on Dec 25, 2023

Commit

73e18be

1 Parent(s): c2d0602

Upload app.py

Browse files

Files changed (1) hide show

app.py +93 -55

app.py CHANGED Viewed

@@ -403,12 +403,18 @@ def validate_columns(df, mandatory_cols):
 def process_target_fasta(sequence):
-    # lines = sequence.strip().split("\n")
-    # if lines[0].startswith(">"):
-    #     lines = lines[1:]
-    # return ''.join(lines).split(">")[0]
-    record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
-    return str(record.seq)
 def send_email(receiver, msg):
@@ -804,7 +810,8 @@ To predict interactions/binding affinities of a single target against a library
                             HelpTip(
                                 "Enter (paste) a amino acid sequence below manually or upload a FASTA file."
                                 "If multiple entities are in the FASTA, only the first will be used."
-                                "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for the sequence."
                             )
                             with gr.Row():
                                 target_input_type = gr.Dropdown(
@@ -838,9 +845,9 @@ To predict interactions/binding affinities of a single target against a library
                     # with gr.Row():
                     #     with gr.Column():
                     example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
-                        # with gr.Column():
-                        #     gr.File(label='Example FASTA file',
-                        #             value='data/examples/MAPK14.fasta', interactive=False)
                     with gr.Row():
                         with gr.Column():
@@ -862,7 +869,7 @@ To predict interactions/binding affinities of a single target against a library
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
-                                "Select a preset compound library (e.g., DrugBank)."
                                 "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
                                 "or use an SDF file."
                             )
@@ -882,15 +889,18 @@ To predict interactions/binding affinities of a single target against a library
                                 "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
                                 "while affinity prediction directly estimates their binding strength measured using IC50."
                             )
-                            drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()), label='Step 4. Select a Prediction Task',
                                                            value='Compound-protein interaction')
                     with gr.Row():
                         with gr.Column():
-                            HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
-                                    "Please refer to documentation for detailed benchamrk results."
-                            )
-                            drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a Preset Model')
                             screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
                     with gr.Row():
                         with gr.Column():
@@ -901,7 +911,7 @@ To predict interactions/binding affinities of a single target against a library
                     with gr.Row(visible=True):
                         with gr.Column():
-                        # drug_screen_clr_btn = gr.ClearButton(size='lg')
                             drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
                     # TODO Modify the pd df directly with df['X2'] = target
@@ -943,7 +953,7 @@ To predict interactions/binding affinities of a single compound against a librar
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
-                                "By default, models trained on all protein families (general) will be applied."
                                 "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
                             )
                             target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
@@ -973,22 +983,26 @@ To predict interactions/binding affinities of a single compound against a librar
                                 "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
                                 "while affinity prediction directly estimates their binding strength measured using IC50."
                             )
-                            target_identify_task = gr.Dropdown(list(TASK_MAP.keys()), label='Step 4. Select a Prediction Task',
                                                                value='Compound-protein interaction')
                     with gr.Row():
                         with gr.Column():
-                            HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
-                                    "Please refer to documentation for detailed benchamrk results."
-                                    )
-                            target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a Preset Model')
                             identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
                     with gr.Row():
                         with gr.Column():
                             target_identify_email = gr.Textbox(
                                 label='Step 6. Email (Optional)',
-                                info="If an email is provided, a notification email will be sent to you when your job is completed."
                             )
                     with gr.Row(visible=True):
@@ -1007,45 +1021,69 @@ To predict interactions/binding affinities of a single compound against a librar
 ''')
             with gr.Blocks() as infer_block:
                 with gr.Column() as infer_page:
-                    infer_type = gr.Dropdown(choices=['Upload a compound library and a target library',
-                                                      'Upload a CSV interaction pair dataset'],
-                                             value='Upload a compound library and a target library')
                     with gr.Column() as pair_upload:
-                        gr.File(label="Example custom dataset",
-                                value="data/examples/interaction_pair_inference.csv",
-                                interactive=False)
-                        with gr.Column():
                             infer_data_for_predict = gr.File(
-                                label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
                     with gr.Column() as pair_generate:
                         with gr.Row():
-                            gr.File(label='Example SDF compound library',
                                     value='data/examples/compound_library.sdf', interactive=False)
-                            gr.File(label='Example FASTA target library',
                                     value='data/examples/target_library.fasta', interactive=False)
                         with gr.Row():
-                            gr.File(label='Example CSV compound library',
                                     value='data/examples/compound_library.csv', interactive=False)
-                            gr.File(label='Example CSV target library',
                                     value='data/examples/target_library.csv', interactive=False)
                         with gr.Row():
-                            infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
                                                  file_count="single", type='filepath')
-                            infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
                                                    file_count="single", type='filepath')
-                    with gr.Row(visible=True):
-                        pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
-                        pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
-                        pair_infer_target_family = gr.Dropdown(choices=['General'],
-                                                               label='Target family',
-                                                               value='General')
-                    # with gr.Row():
-                    #     pair_infer_email = gr.Textbox(
-                    #         label='Email (optional)',
-                    #         info="Your email will be used to send you notifications when your job finishes."
-                    #     )
                     with gr.Row(visible=True):
                         # pair_infer_clr_btn = gr.ClearButton(size='lg')
@@ -1060,7 +1098,6 @@ To predict interactions/binding affinities of a single compound against a librar
             with gr.Blocks() as report:
                 gr.Markdown('''
                 # <center>DeepSEQreen Chemical Property Report</center>
-                <center>
                 To compute chemical properties for the predictions of drug hit screening,
                 target protein identification, and interaction pair inference.
@@ -1068,7 +1105,6 @@ To predict interactions/binding affinities of a single compound against a librar
                 your own dataset. The page shows only a preview report displaying at most 30 records
                 (with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
                 generate and download a raw data CSV or interactive table HTML file below.
-                </center>
                 ''')
                 with gr.Row():
                     file_for_report = gr.File(interactive=True, type='filepath')
@@ -1087,10 +1123,10 @@ To predict interactions/binding affinities of a single compound against a librar
                 with gr.Row():
                     with gr.Column():
-                        csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True, variant='primary')
                         csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
                     with gr.Column():
-                        html_generate = gr.Button(value='Generate report (HTML)', interactive=True, variant='primary')
                         html_download_file = gr.File(label='Download report (HTML)', visible=False)
@@ -1188,7 +1224,7 @@ To predict interactions/binding affinities of a single compound against a librar
     def example_fill(input_type):
         return {target_id: 'Q16539',
                 target_gene: 'MAPK14',
-                target_organism: 'Human',
                 target_fasta: """
 >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
 MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
@@ -1230,7 +1266,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
                                        & (benchmark_df['Scenario'] == scenario)
                                        & (benchmark_df['all'] == False)]
         row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
                            info=f"Reason: {scenario} in the training dataset; we recommend the model "
                                 f"with the best {score} ({float(row[score]):.3f}) "
@@ -1339,6 +1374,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
                     screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
                 else:
                     screen_df = process_drug_library_upload(library_upload)
                     if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
                         raise gr.Error(f'The uploaded compound library has more records '
                                        f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
@@ -1576,3 +1612,5 @@ if __name__ == "__main__":
     demo.launch(
         show_api=False,
     )

 def process_target_fasta(sequence):
+    try:
+        if sequence:
+            # lines = sequence.strip().split("\n")
+            # if lines[0].startswith(">"):
+            #     lines = lines[1:]
+            # return ''.join(lines).split(">")[0]
+            record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
+            return str(record.seq)
+        else:
+            raise ValueError('Empty FASTA sequence.')
+    except Exception as e:
+        raise gr.Error(f'Failed to process FASTA due to error: {str(e)}')
 def send_email(receiver, msg):
                             HelpTip(
                                 "Enter (paste) a amino acid sequence below manually or upload a FASTA file."
                                 "If multiple entities are in the FASTA, only the first will be used."
+                                "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
+                                "the sequence."
                             )
                             with gr.Row():
                                 target_input_type = gr.Dropdown(
                     # with gr.Row():
                     #     with gr.Column():
                     example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
+                    # with gr.Column():
+                    #     gr.File(label='Example FASTA file',
+                    #             value='data/examples/MAPK14.fasta', interactive=False)
                     with gr.Row():
                         with gr.Column():
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
+                                "Select a preset compound library (e.g., DrugBank)."
                                 "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
                                 "or use an SDF file."
                             )
                                 "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
                                 "while affinity prediction directly estimates their binding strength measured using IC50."
                             )
+                            drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
+                                                           label='Step 4. Select a Prediction Task',
                                                            value='Compound-protein interaction')
                     with gr.Row():
                         with gr.Column():
+                            HelpTip(
+                                "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
+                                "Please refer to documentation for detailed benchamrk results."
+                                )
+                            drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
+                                                             label='Step 5. Select a Preset Model')
                             screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
                     with gr.Row():
                         with gr.Column():
                     with gr.Row(visible=True):
                         with gr.Column():
+                            # drug_screen_clr_btn = gr.ClearButton(size='lg')
                             drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
                     # TODO Modify the pd df directly with df['X2'] = target
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
+                                "By default, models trained on all protein families (general) will be applied."
                                 "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
                             )
                             target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
                                 "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
                                 "while affinity prediction directly estimates their binding strength measured using IC50."
                             )
+                            target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
+                                                               label='Step 4. Select a Prediction Task',
                                                                value='Compound-protein interaction')
                     with gr.Row():
                         with gr.Column():
+                            HelpTip(
+                                "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
+                                "Please refer to documentation for detailed benchamrk results."
+                                )
+                            target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
+                                                                 label='Step 5. Select a Preset Model')
                             identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
                     with gr.Row():
                         with gr.Column():
                             target_identify_email = gr.Textbox(
                                 label='Step 6. Email (Optional)',
+                                info="If an email is provided, a notification email will be sent to you when your job "
+                                     "is completed."
                             )
                     with gr.Row(visible=True):
 ''')
             with gr.Blocks() as infer_block:
                 with gr.Column() as infer_page:
+                    infer_type = gr.Dropdown(choices=['Upload a CSV interaction pair dataset',
+                                                      'Upload a compound library and a target library'],
+                                             label='Step 1. Select Pair Input Type and Input',
+                                             value='Upload a CSV interaction pair dataset')
                     with gr.Column() as pair_upload:
+                        with gr.Row():
+                            gr.File(label="Example custom dataset",
+                                    value="data/examples/interaction_pair_inference.csv",
+                                    interactive=False)
+                        with gr.Row():
                             infer_data_for_predict = gr.File(
+                                label='Upload a Custom Dataset', file_count="single", type='filepath', visible=True)
                     with gr.Column() as pair_generate:
                         with gr.Row():
+                            gr.File(label='Example SDF Compound Library',
                                     value='data/examples/compound_library.sdf', interactive=False)
+                            gr.File(label='Example FASTA Target Library',
                                     value='data/examples/target_library.fasta', interactive=False)
                         with gr.Row():
+                            gr.File(label='Example CSV Compound Library',
                                     value='data/examples/compound_library.csv', interactive=False)
+                            gr.File(label='Example CSV Target Library',
                                     value='data/examples/target_library.csv', interactive=False)
                         with gr.Row():
+                            infer_drug = gr.File(label='SDF/CSV File containing multiple compounds',
                                                  file_count="single", type='filepath')
+                            infer_target = gr.File(label='FASTA/CSV File containing multiple targets',
                                                    file_count="single", type='filepath')
+                    with gr.Row():
+                        with gr.Column():
+                            HelpTip(
+                                "By default, models trained on all protein families (general) will be applied."
+                                "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
+                            )
+                            pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
+                                                                   value='General',
+                                                                   label='Step 2. Select Target Protein Family (Optional)')
+                    with gr.Row():
+                        with gr.Column():
+                            HelpTip(
+                                "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
+                                "while affinity prediction directly estimates their binding strength measured using IC50."
+                            )
+                            pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
+                                                          label='Step 3. Select a Prediction Task',
+                                                          value='Compound-protein interaction')
+                    with gr.Row():
+                        with gr.Column():
+                            HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
+                                    "Please refer to documentation for detailed benchamrk results."
+                                    )
+                            pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
+                            infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
+                    with gr.Row():
+                        pair_infer_email = gr.Textbox(
+                            label='Step 5. Email (Optional)',
+                            info="If an email is provided, a notification email will be sent to you when your job is completed."
+                        )
                     with gr.Row(visible=True):
                         # pair_infer_clr_btn = gr.ClearButton(size='lg')
             with gr.Blocks() as report:
                 gr.Markdown('''
                 # <center>DeepSEQreen Chemical Property Report</center>
                 To compute chemical properties for the predictions of drug hit screening,
                 target protein identification, and interaction pair inference.
                 your own dataset. The page shows only a preview report displaying at most 30 records
                 (with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
                 generate and download a raw data CSV or interactive table HTML file below.
                 ''')
                 with gr.Row():
                     file_for_report = gr.File(interactive=True, type='filepath')
                 with gr.Row():
                     with gr.Column():
+                        csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
                         csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
                     with gr.Column():
+                        html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
                         html_download_file = gr.File(label='Download report (HTML)', visible=False)
     def example_fill(input_type):
         return {target_id: 'Q16539',
                 target_gene: 'MAPK14',
+                target_organism: 'Homo sapiens',
                 target_fasta: """
 >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
 MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
                                        & (benchmark_df['Scenario'] == scenario)
                                        & (benchmark_df['all'] == False)]
         row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
                            info=f"Reason: {scenario} in the training dataset; we recommend the model "
                                 f"with the best {score} ({float(row[score]):.3f}) "
                     screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
                 else:
                     screen_df = process_drug_library_upload(library_upload)
+                    print(screen_df.shape)
                     if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
                         raise gr.Error(f'The uploaded compound library has more records '
                                        f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
     demo.launch(
         show_api=False,
     )
+#%%