DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

libokj commited on Dec 26, 2023

Commit

7148a25

1 Parent(s): c7bb63d

Upload app.py

Browse files

Files changed (1) hide show

app.py +285 -228

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import hashlib
 import itertools
 import json
@@ -21,6 +22,7 @@ import hydra
 import pandas as pd
 import plotly.express as px
 import requests
 from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
 from requests.adapters import HTTPAdapter, Retry
 from rdkit import Chem
@@ -39,7 +41,7 @@ import sascorer
 ROOT = Path.cwd()
-DF_FOR_REPORT = pd.DataFrame()
 pd.set_option('display.float_format', '{:.3f}'.format)
 PandasTools.molRepresentation = 'svg'
@@ -146,7 +148,7 @@ CSS = """
   position: absolute;
 }
-#example {
 padding: 0;
 background: none;
 border: none;
@@ -171,47 +173,47 @@ class HelpTip:
         )
-def sa_score(row):
-    return sascorer.calculateScore(row['Compound'])
-def mw(row):
-    return Chem.Descriptors.MolWt(row['Compound'])
-def mr(row):
-    return Crippen.MolMR(row['Compound'])
-def hbd(row):
-    return Lipinski.NumHDonors(row['Compound'])
-def hba(row):
-    return Lipinski.NumHAcceptors(row['Compound'])
-def logp(row):
-    return Crippen.MolLogP(row['Compound'])
-def atom(row):
-    return CalcNumAtoms(row['Compound'])
-def heavy_atom(row):
-    return CalcNumHeavyAtoms(row['Compound'])
-def rotatable_bond(row):
-    return CalcNumRotatableBonds((row['Compound']))
-def tpsa(row):
-    return CalcTPSA((row['Compound']))
-def lipinski(row):
     """
     Lipinski's rules:
     Hydrogen bond donors <= 5
@@ -219,19 +221,19 @@ def lipinski(row):
     Molecular weight <= 500 daltons
     logP <= 5
     """
-    if hbd(row) > 5:
         return False
-    elif hba(row) > 10:
         return False
-    elif mw(row) > 500:
         return False
-    elif logp(row) > 5:
         return False
     else:
         return True
-def reos(row):
     """
     Rapid Elimination Of Swill filter:
     Molecular weight between 200 and 500
@@ -242,23 +244,23 @@ def reos(row):
     Rotatable bond count between 0 and 8
     Heavy atom count between 15 and 50
     """
-    if not 200 < mw(row) < 500:
         return False
-    elif not -5.0 < logp(row) < 5.0:
         return False
-    elif not 0 < hbd(row) < 5:
         return False
-    elif not 0 < hba(row) < 10:
         return False
-    elif not 0 < rotatable_bond(row) < 8:
         return False
-    elif not 15 < heavy_atom(row) < 50:
         return False
     else:
         return True
-def ghose(row):
     """
     Ghose drug like filter:
     Molecular weight between 160 and 480
@@ -266,34 +268,34 @@ def ghose(row):
     Atom count between 20 and 70
     Molar refractivity between 40 and 130
     """
-    if not 160 < mw(row) < 480:
         return False
-    elif not -0.4 < logp(row) < 5.6:
         return False
-    elif not 20 < atom(row) < 70:
         return False
-    elif not 40 < mr(row) < 130:
         return False
     else:
         return True
-def veber(row):
     """
     The Veber filter is a rule of thumb filter for orally active drugs described in
     Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
     Rotatable bonds <= 10
     Topological polar surface area <= 140
     """
-    if not rotatable_bond(row) <= 10:
         return False
-    elif not tpsa(row) <= 140:
         return False
     else:
         return True
-def rule_of_three(row):
     """
     Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
     Molecular weight <= 300
@@ -302,15 +304,15 @@ def rule_of_three(row):
     H-bond acceptor count <= 3
     Rotatable bond count <= 3
     """
-    if not mw(row) <= 300:
         return False
-    elif not logp(row) <= 3:
         return False
-    elif not hbd(row) <= 3:
         return False
-    elif not hba(row) <= 3:
         return False
-    elif not rotatable_bond(row) <= 3:
         return False
     else:
         return True
@@ -389,6 +391,9 @@ COLUMN_ALIASES = {
     'X2': 'Target FASTA',
     'ID1': 'Compound ID',
     'ID2': 'Target ID',
 }
@@ -421,7 +426,7 @@ def send_email(receiver, msg):
     pass
-def submit_predict(predict_filepath, task, preset, target_family, flag, progress=gr.Progress(track_tqdm=True)):
     if flag:
         try:
             job_id = flag
@@ -430,10 +435,10 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
             preset = PRESET_MAP[preset]
             target_family = TARGET_FAMILY_MAP[target_family]
             # email_hash = hashlib.sha256(email.encode()).hexdigest()
-            COLUMN_ALIASES = COLUMN_ALIASES | {
-                'Y': 'Actual interaction probability' if task == 'binary' else 'Actual binding affinity',
-                'Y^': 'Predicted interaction probability' if task == 'binary' else 'Predicted binding affinity'
-            }
             # target_family_list = [target_family]
             # for family in target_family_list:
@@ -451,20 +456,18 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
                 predictions, _ = predict(cfg)
                 predictions = [pd.DataFrame(prediction) for prediction in predictions]
                 prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
                 predictions_file = f'temp/{job_id}_predictions.csv'
-                prediction_df.to_csv(predictions_file, index=False)
                 return [predictions_file,
                         False]
         except Exception as e:
             gr.Warning(f"Prediction job failed due to error: {str(e)}")
-            return [None,
-                    False]
     else:
-        return [None,
-                False]
         #
         # except Exception as e:
         #     raise gr.Error(str(e))
@@ -536,19 +539,19 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
 def update_df(file, progress=gr.Progress(track_tqdm=True)):
-    global DF_FOR_REPORT
-    if file is not None:
         df = pd.read_csv(file)
-        if df['X1'].nunique() > 1:
-            df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
-                desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
-            # Add a new column with RDKit molecule objects
-            if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
-                PandasTools.AddMoleculeColumnToFrame(df, smilesCol='X1', molCol='Compound',
-                                                     includeFingerprints=True)
-            PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
                                                  includeFingerprints=True)
-        DF_FOR_REPORT = df.copy()
         # pie_chart = None
         # value = None
@@ -563,30 +566,64 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
         #     elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
         #         pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
-        return create_html_report(DF_FOR_REPORT), df  # pie_chart
     else:
-        return gr.HTML(), gr.Dataframe()
 def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
-    df_html = df.copy()
-    cols_left = ['ID1', 'ID2', 'Y', 'Y^', 'Compound', 'Scaffold', 'Scaffold SMILES', ]
     cols_right = ['X1', 'X2']
     cols_left = [col for col in cols_left if col in df_html.columns]
     cols_right = [col for col in cols_right if col in df_html.columns]
     df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
-    df_html['X2'] = df_html['X2'].swifter.apply(wrap_text)
     df_html = df_html.sort_values(
-        [col for col in ['Y', 'Y^', 'ID1', 'ID2', 'X1', 'X2'] if col in df.columns], ascending=False
-    ).rename(columns=COLUMN_ALIASES)
     # PandasTools.RenderImagesInAllDataFrames(images=True)
-    PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
-    # Return the DataFrame as HTML
-    PandasTools.RenderImagesInAllDataFrames(images=True)
     if not file:
-        styled_df = df_html.iloc[:51].style
-        # styled_df = df.style.format("{:.2f}")
         colors = sns.color_palette('husl', len(df_html.columns))
         for i, col in enumerate(df_html.columns):
             if pd.api.types.is_numeric_dtype(df_html[col]):
@@ -597,13 +634,21 @@ def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
         import panel as pn
         from bokeh.resources import INLINE
         from bokeh.models import NumberFormatter, BooleanFormatter
-        bokeh_formatters = {
-            'float': {'type': 'progress', 'legend': True},
-            'bool': BooleanFormatter(),
         }
         # html = df.to_html(file)
         # return html
-        pn.widgets.Tabulator(df_html, formatters=bokeh_formatters).save(file, resources=INLINE)
 # def create_pie_chart(df, category, value, top_k):
@@ -657,16 +702,18 @@ def create_pie_chart(df, category, value, top_k):
     return fig
-def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
-    df = DF_FOR_REPORT.copy()
     try:
         for filter_name in filter_list:
-            df[filter_name] = df.swifter.progress_bar(desc=f"Calculating {filter_name}").apply(
-                FILTER_MAP[filter_name], axis=1)
         for score_name in score_list:
-            df[score_name] = df.swifter.progress_bar(desc=f"Calculating {score_name}").apply(
-                SCORE_MAP[score_name], axis=1)
         # pie_chart = None
         # value = None
@@ -681,11 +728,11 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
         #     elif df['X2'].nunique() > 1 >= df['X1'].nunique():
         #         pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
-        return create_html_report(df), df  # pie_chart
     except Exception as e:
-        raise gr.Error(str(e))
 # def check_job_status(job_id):
 #     job_lock = DATA_PATH / f"{job_id}.lock"
@@ -704,20 +751,23 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
 def wrap_text(text, line_length=60):
-    wrapper = textwrap.TextWrapper(width=line_length)
-    if text.startswith('>'):
-        sections = text.split('>')
-        wrapped_sections = []
-        for section in sections:
-            if not section:
-                continue
-            lines = section.split('\n')
-            seq_header = lines[0]
-            wrapped_seq = wrapper.fill(''.join(lines[1:]))
-            wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
-        return '\n'.join(wrapped_sections)
     else:
-        return wrapper.fill(text)
 def unwrap_text(text):
@@ -834,17 +884,18 @@ To predict interactions/binding affinities of a single target against a library
                                     visible=False, interactive=True, scale=4, )
                     with gr.Row():
-                        with gr.Column():
-                            target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
-                                                                visible=True, variant='primary',
-                                                                size='lg')
-                            target_query_btn = gr.Button(value='Query the sequence', variant='primary',
-                                                         visible=False)
                     target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
                     # with gr.Row():
                     #     with gr.Column():
-                    example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
                     # with gr.Column():
                     #     gr.File(label='Example FASTA file',
                     #             value='data/examples/MAPK14.fasta', interactive=False)
@@ -853,7 +904,8 @@ To predict interactions/binding affinities of a single target against a library
                         with gr.Column():
                             HelpTip(
                                 "Click Auto-detect to identify the protein family using sequence alignment. "
-                                "This optional step allows applying a family-specific model instead of a all-family model (general)."
                                 "Manually select general if the alignment results are unsatisfactory."
                             )
                             drug_screen_target_family = gr.Dropdown(
@@ -886,8 +938,10 @@ To predict interactions/binding affinities of a single target against a library
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
-                                "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
-                                "while affinity prediction directly estimates their binding strength measured using IC50."
                             )
                             drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
                                                            label='Step 4. Select a Prediction Task',
@@ -896,7 +950,8 @@ To predict interactions/binding affinities of a single target against a library
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
-                                "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
                                 "Please refer to documentation for detailed benchamrk results."
                                 )
                             drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
@@ -906,7 +961,8 @@ To predict interactions/binding affinities of a single target against a library
                         with gr.Column():
                             drug_screen_email = gr.Textbox(
                                 label='Step 6. Email (Optional)',
-                                info="If an email is provided, a notification email will be sent to you when your job is completed."
                             )
                     with gr.Row(visible=True):
@@ -937,34 +993,39 @@ To predict interactions/binding affinities of a single compound against a librar
                             HelpTip(
                                 "Enter (paste) a compound SMILES below manually or upload a SDF file."
                                 "If multiple entities are in the SDF, only the first will be used."
-                                "SMILES can be obtained by searching for the compound of interest in databases such as NCBI, PubChem and and ChEMBL."
                             )
                             compound_type = gr.Dropdown(
                                 label='Step 1. Select Compound Input Type and Input',
                                 choices=['SMILES', 'SDF'],
-                                info='Enter (paste) an SMILES string or upload an SDF file.',
                                 value='SMILES',
                                 interactive=True)
-                            compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
                     compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
-                    example_drug = gr.Button(value='Example: Aspirin', elem_id='example')
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
                                 "By default, models trained on all protein families (general) will be applied."
-                                "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
                             )
                             target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
                                                                         value='General',
-                                                                        label='Step 2. Select Target Protein Family (Optional)')
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
                                 "Select a preset target library (e.g., ChEMBL33_human_proteins)."
-                                "Alternatively, upload a CSV file with a column named X2 containing tareget protein sequences, or use an FASTA file."
                             )
                             target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
                                                          choices=list(TARGET_LIBRARY_MAP.keys()))
@@ -980,8 +1041,10 @@ To predict interactions/binding affinities of a single compound against a librar
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
-                                "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
-                                "while affinity prediction directly estimates their binding strength measured using IC50."
                             )
                             target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
                                                                label='Step 4. Select a Prediction Task',
@@ -990,11 +1053,12 @@ To predict interactions/binding affinities of a single compound against a librar
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
-                                "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
                                 "Please refer to documentation for detailed benchamrk results."
                                 )
-                            target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
-                                                                 label='Step 5. Select a Preset Model')
                             identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
                     with gr.Row():
@@ -1021,69 +1085,46 @@ To predict interactions/binding affinities of a single compound against a librar
 ''')
             with gr.Blocks() as infer_block:
                 with gr.Column() as infer_page:
-                    infer_type = gr.Dropdown(choices=['Upload a CSV interaction pair dataset',
-                                                      'Upload a compound library and a target library'],
-                                             label='Step 1. Select Pair Input Type and Input',
-                                             value='Upload a CSV interaction pair dataset')
                     with gr.Column() as pair_upload:
-                        with gr.Row():
-                            gr.File(label="Example custom dataset",
-                                    value="data/examples/interaction_pair_inference.csv",
-                                    interactive=False)
-                        with gr.Row():
                             infer_data_for_predict = gr.File(
-                                label='Upload a Custom Dataset', file_count="single", type='filepath', visible=True)
                     with gr.Column() as pair_generate:
                         with gr.Row():
-                            gr.File(label='Example SDF Compound Library',
                                     value='data/examples/compound_library.sdf', interactive=False)
-                            gr.File(label='Example FASTA Target Library',
                                     value='data/examples/target_library.fasta', interactive=False)
                         with gr.Row():
-                            gr.File(label='Example CSV Compound Library',
                                     value='data/examples/compound_library.csv', interactive=False)
-                            gr.File(label='Example CSV Target Library',
                                     value='data/examples/target_library.csv', interactive=False)
                         with gr.Row():
-                            infer_drug = gr.File(label='SDF/CSV File containing multiple compounds',
                                                  file_count="single", type='filepath')
-                            infer_target = gr.File(label='FASTA/CSV File containing multiple targets',
                                                    file_count="single", type='filepath')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "By default, models trained on all protein families (general) will be applied."
-                                "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
-                            )
-                            pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
-                                                                   value='General',
-                                                                   label='Step 2. Select Target Protein Family (Optional)')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
-                                "while affinity prediction directly estimates their binding strength measured using IC50."
-                            )
-                            pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
-                                                          label='Step 3. Select a Prediction Task',
-                                                          value='Compound-protein interaction')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
-                                    "Please refer to documentation for detailed benchamrk results."
-                                    )
-                            pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
-                            infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
-                    with gr.Row():
-                        pair_infer_email = gr.Textbox(
-                            label='Step 5. Email (Optional)',
-                            info="If an email is provided, a notification email will be sent to you when your job is completed."
-                        )
                     with gr.Row(visible=True):
                         # pair_infer_clr_btn = gr.ClearButton(size='lg')
@@ -1098,23 +1139,28 @@ To predict interactions/binding affinities of a single compound against a librar
             with gr.Blocks() as report:
                 gr.Markdown('''
                 # <center>DeepSEQreen Chemical Property Report</center>
                 To compute chemical properties for the predictions of drug hit screening,
-                target protein identification, and interaction pair inference.
-                You may also upload
-                your own dataset. The page shows only a preview report displaying at most 30 records
-                (with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
-                generate and download a raw data CSV or interactive table HTML file below.
                 ''')
                 with gr.Row():
                     file_for_report = gr.File(interactive=True, type='filepath')
-                    df_raw = gr.Dataframe(type="pandas", interactive=False, visible=False)
                     scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
                     filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
                 with gr.Row():
                     # clear_btn = gr.ClearButton(size='lg')
-                    analyze_btn = gr.Button('REPORT', variant='primary', size='lg')
                 with gr.Row():
                     with gr.Column(scale=3):
@@ -1123,11 +1169,13 @@ To predict interactions/binding affinities of a single compound against a librar
                 with gr.Row():
                     with gr.Column():
-                        csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
-                        csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
                     with gr.Column():
-                        html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
-                        html_download_file = gr.File(label='Download report (HTML)', visible=False)
     def target_input_type_select(input_type):
@@ -1224,7 +1272,7 @@ To predict interactions/binding affinities of a single compound against a librar
     def example_fill(input_type):
         return {target_id: 'Q16539',
                 target_gene: 'MAPK14',
-                target_organism: 'Homo sapiens',
                 target_fasta: """
 >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
 MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
@@ -1236,9 +1284,10 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
 """}
-    example_fasta.click(fn=example_fill, inputs=target_input_type,
-                        outputs=[target_id, target_gene, target_organism, target_fasta], show_progress=False)
     def screen_recommend_model(fasta, family, task):
         task = TASK_MAP[task]
@@ -1249,7 +1298,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
             train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
             score = 'CI'
-        if fasta not in train['X2']:
             scenario = "Unseen target"
         else:
             scenario = "Seen target"
@@ -1266,6 +1315,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
                                        & (benchmark_df['Scenario'] == scenario)
                                        & (benchmark_df['all'] == False)]
         row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
                            info=f"Reason: {scenario} in the training dataset; we recommend the model "
                                 f"with the best {score} ({float(row[score]):.3f}) "
@@ -1280,13 +1330,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     def compound_input_type_select(input_type):
         match input_type:
             case 'SMILES':
-                return gr.Dropdown(info='Input an SMILES string or upload an SMI file')
             case 'SDF':
-                return gr.Dropdown(info='Convert the first molecule in an SDF file to SMILES')
     compound_type.select(fn=compound_input_type_select,
-                         inputs=compound_type, outputs=compound_type, show_progress=False)
     def compound_upload_process(input_type, input_upload):
@@ -1374,7 +1424,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
                     screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
                 else:
                     screen_df = process_drug_library_upload(library_upload)
-                    print(screen_df.shape)
                     if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
                         raise gr.Error(f'The uploaded compound library has more records '
                                        f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
@@ -1517,7 +1566,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     ).then(
         fn=submit_predict,
         inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
-                drug_screen_target_family, screen_flag],  # , drug_screen_email],
         outputs=[file_for_report, run_state]
     ).then(
         fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
@@ -1529,12 +1578,12 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
         inputs=[compound_smiles, target_library, target_library_upload, run_state],  # , drug_screen_email],
         outputs=[identify_data_for_predict, identify_flag, run_state]
     ).then(
-        fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True), gr.Tabs(selected=3)],
-        outputs=[identify_page, identify_waiting, tabs]
     ).then(
         fn=submit_predict,
         inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
-                target_identify_target_family, identify_flag],  # , target_identify_email],
         outputs=[file_for_report, run_state]
     ).then(
         fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
@@ -1551,45 +1600,55 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     ).then(
         fn=submit_predict,
         inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
-                pair_infer_target_family, infer_flag],  # , pair_infer_email],
         outputs=[file_for_report, run_state]
     ).then(
-        fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False)],
-        outputs=[infer_page, infer_waiting]
     )
     # TODO background job from these 3 pipelines to update file_for_report
     file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
         html_report,
-        df_raw,
         # ranking_pie_chart
     ])
-    analyze_btn.click(fn=submit_report, inputs=[scores, filters], outputs=[
         html_report,
-        df_raw,
         # ranking_pie_chart
     ])
-    def create_csv_raw_file(df, file_report):
-        from datetime import datetime
-        now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
-        df.drop(['Compound', 'Scaffold']).to_csv(filename, index=False)
-        return gr.File(filename, visible=True)
     def create_html_report_file(df, file_report):
-        from datetime import datetime
-        now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
-        create_html_report(df, filename)
-        return gr.File(filename, visible=True)
-    csv_generate.click(fn=create_csv_raw_file, inputs=[df_raw, file_for_report], outputs=csv_download_file)
-    html_generate.click(fn=create_html_report_file, inputs=[df_raw, file_for_report], outputs=html_download_file)
     # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
     #                       every=5)
@@ -1612,5 +1671,3 @@ if __name__ == "__main__":
     demo.launch(
         show_api=False,
     )
-#%%

+from datetime import datetime
 import hashlib
 import itertools
 import json
 import pandas as pd
 import plotly.express as px
 import requests
+from bokeh.models import HTMLTemplateFormatter, StringFormatter
 from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
 from requests.adapters import HTTPAdapter, Retry
 from rdkit import Chem
 ROOT = Path.cwd()
+# DF_FOR_REPORT = pd.DataFrame()
 pd.set_option('display.float_format', '{:.3f}'.format)
 PandasTools.molRepresentation = 'svg'
   position: absolute;
 }
+.example {
 padding: 0;
 background: none;
 border: none;
         )
+def sa_score(mol):
+    return sascorer.calculateScore(mol)
+def mw(mol):
+    return Chem.Descriptors.MolWt(mol)
+def mr(mol):
+    return Crippen.MolMR(mol)
+def hbd(mol):
+    return Lipinski.NumHDonors(mol)
+def hba(mol):
+    return Lipinski.NumHAcceptors(mol)
+def logp(mol):
+    return Crippen.MolLogP(mol)
+def atom(mol):
+    return CalcNumAtoms(mol)
+def heavy_atom(mol):
+    return CalcNumHeavyAtoms(mol)
+def rotatable_bond(mol):
+    return CalcNumRotatableBonds((mol))
+def tpsa(mol):
+    return CalcTPSA((mol))
+def lipinski(mol):
     """
     Lipinski's rules:
     Hydrogen bond donors <= 5
     Molecular weight <= 500 daltons
     logP <= 5
     """
+    if hbd(mol) > 5:
         return False
+    elif hba(mol) > 10:
         return False
+    elif mw(mol) > 500:
         return False
+    elif logp(mol) > 5:
         return False
     else:
         return True
+def reos(mol):
     """
     Rapid Elimination Of Swill filter:
     Molecular weight between 200 and 500
     Rotatable bond count between 0 and 8
     Heavy atom count between 15 and 50
     """
+    if not 200 < mw(mol) < 500:
         return False
+    elif not -5.0 < logp(mol) < 5.0:
         return False
+    elif not 0 < hbd(mol) < 5:
         return False
+    elif not 0 < hba(mol) < 10:
         return False
+    elif not 0 < rotatable_bond(mol) < 8:
         return False
+    elif not 15 < heavy_atom(mol) < 50:
         return False
     else:
         return True
+def ghose(mol):
     """
     Ghose drug like filter:
     Molecular weight between 160 and 480
     Atom count between 20 and 70
     Molar refractivity between 40 and 130
     """
+    if not 160 < mw(mol) < 480:
         return False
+    elif not -0.4 < logp(mol) < 5.6:
         return False
+    elif not 20 < atom(mol) < 70:
         return False
+    elif not 40 < mr(mol) < 130:
         return False
     else:
         return True
+def veber(mol):
     """
     The Veber filter is a rule of thumb filter for orally active drugs described in
     Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
     Rotatable bonds <= 10
     Topological polar surface area <= 140
     """
+    if not rotatable_bond(mol) <= 10:
         return False
+    elif not tpsa(mol) <= 140:
         return False
     else:
         return True
+def rule_of_three(mol):
     """
     Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
     Molecular weight <= 300
     H-bond acceptor count <= 3
     Rotatable bond count <= 3
     """
+    if not mw(mol) <= 300:
         return False
+    elif not logp(mol) <= 3:
         return False
+    elif not hbd(mol) <= 3:
         return False
+    elif not hba(mol) <= 3:
         return False
+    elif not rotatable_bond(mol) <= 3:
         return False
     else:
         return True
     'X2': 'Target FASTA',
     'ID1': 'Compound ID',
     'ID2': 'Target ID',
+    'Y': 'Actual CPI/CPA',
+    'Y^': 'Predicted CPI/CPA',
+    'N': 'Original Index'
 }
     pass
+def submit_predict(predict_filepath, task, preset, target_family, flag, state, progress=gr.Progress(track_tqdm=True)):
     if flag:
         try:
             job_id = flag
             preset = PRESET_MAP[preset]
             target_family = TARGET_FAMILY_MAP[target_family]
             # email_hash = hashlib.sha256(email.encode()).hexdigest()
+            COLUMN_ALIASES.update({
+                'Y': 'Actual interaction probability' if task == 'DTI' else 'Actual binding affinity',
+                'Y^': 'Predicted interaction probability' if task == 'DTI' else 'Predicted binding affinity'
+            })
             # target_family_list = [target_family]
             # for family in target_family_list:
                 predictions, _ = predict(cfg)
                 predictions = [pd.DataFrame(prediction) for prediction in predictions]
                 prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
+                prediction_df.set_index('N', inplace=True)
                 predictions_file = f'temp/{job_id}_predictions.csv'
+                prediction_df.to_csv(predictions_file)
                 return [predictions_file,
                         False]
         except Exception as e:
             gr.Warning(f"Prediction job failed due to error: {str(e)}")
+            return {run_state: False}
     else:
+        return {run_state: state}
         #
         # except Exception as e:
         #     raise gr.Error(str(e))
 def update_df(file, progress=gr.Progress(track_tqdm=True)):
+    # global DF_FOR_REPORT
+    if Path(file).is_file():
         df = pd.read_csv(file)
+        # if df['X1'].nunique() > 1:
+        df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
+            desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
+        # Add a new column with RDKit molecule objects
+        if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
+            PandasTools.AddMoleculeColumnToFrame(df, smilesCol='X1', molCol='Compound',
                                                  includeFingerprints=True)
+        PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
+                                             includeFingerprints=True)
+        # DF_FOR_REPORT = df.copy()
         # pie_chart = None
         # value = None
         #     elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
         #         pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
+        return {html_report: create_html_report(df),
+                raw_df: df,
+                report_df: df.copy(),
+                analyze_btn: gr.Button(interactive=True)}  # pie_chart
     else:
+        return {analyze_btn: gr.Button(interactive=False)}
 def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
+    df_html = df.copy(deep=True)
+    cols_left = ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^',]
     cols_right = ['X1', 'X2']
     cols_left = [col for col in cols_left if col in df_html.columns]
     cols_right = [col for col in cols_right if col in df_html.columns]
     df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
+    ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted binding affinity' else False
     df_html = df_html.sort_values(
+        [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
+    )
+    # # Remove repeated info for one-against-N tasks to save visual and physical space
+    # if df_html['X1'].nunique() <= 1:
+    #     columns_to_clean = ['X1', 'ID1', 'Scaffold', 'Compound'] + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys())
+    #     for column in columns_to_clean:
+    #         if column in df_html.columns:
+    #             df_html.loc[1:, column] = pd.NA
+    #
+    # if df_html['X2'].nunique() <= 1:
+    #     columns_to_clean = ['X2', 'ID2']
+    #     for column in columns_to_clean:
+    #         if column in df_html.columns:
+    #             df_html.loc[1:, column] = pd.NA
+    if not file:
+        df_html = df_html.iloc[:31]
+    # PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
     # PandasTools.RenderImagesInAllDataFrames(images=True)
+    df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
+        'Generating compound graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
+    df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
+        'Generating scaffold graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
+    df_html = df_html.rename(columns=COLUMN_ALIASES)
+    df_html.index.name = 'Index'
     if not file:
+        if 'Compound ID' in df_html.columns:
+            df_html.drop(['Compound SMILES'], axis=1, inplace=True)
+        if 'Target ID' in df_html.columns:
+            df_html.drop(['Target FASTA'], axis=1, inplace=True)
+        if 'Target FASTA' in df_html.columns:
+            df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
+                'Processing FASTA...').apply(lambda x: wrap_text(x) if not pd.isna(x) else x)
+        df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
+        # num_formatters = {col: "{:.3f}" for col in df.select_dtypes('number').columns}
+        styled_df = df_html.style.format(precision=3)
         colors = sns.color_palette('husl', len(df_html.columns))
         for i, col in enumerate(df_html.columns):
             if pd.api.types.is_numeric_dtype(df_html[col]):
         import panel as pn
         from bokeh.resources import INLINE
         from bokeh.models import NumberFormatter, BooleanFormatter
+        bool_formatters = {col: BooleanFormatter() for col in df_html.select_dtypes(bool).columns}
+        num_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('number').columns}
+        other_formatters = {
+            'Predicted interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
+            'Actual interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
+            'Compound': HTMLTemplateFormatter(),
+            'Scaffold': HTMLTemplateFormatter(),
+            'Target FASTA': {'type': 'textarea', 'width': 60},
         }
+        formatters = {**bool_formatters, **num_formatters, **other_formatters}
         # html = df.to_html(file)
         # return html
+        pn.widgets.Tabulator(df_html, formatters=formatters).save(file, resources=INLINE)
 # def create_pie_chart(df, category, value, top_k):
     return fig
+def submit_report(df, score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
+    df_report = df.copy()
     try:
         for filter_name in filter_list:
+            df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
+                desc=f"Calculating {filter_name}").apply(
+                lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x, axis=1)
         for score_name in score_list:
+            df_report[score_name] = df_report['Compound'].swifter.progress_bar(
+                desc=f"Calculating {score_name}").apply(
+                lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x, axis=1)
         # pie_chart = None
         # value = None
         #     elif df['X2'].nunique() > 1 >= df['X1'].nunique():
         #         pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
+        return create_html_report(df_report), df_report  # pie_chart
     except Exception as e:
+        gr.Warning(f'Failed to report results due to error: {str(e)}')
+        return None, None
 # def check_job_status(job_id):
 #     job_lock = DATA_PATH / f"{job_id}.lock"
 def wrap_text(text, line_length=60):
+    if isinstance(text, str):
+        wrapper = textwrap.TextWrapper(width=line_length)
+        if text.startswith('>'):
+            sections = text.split('>')
+            wrapped_sections = []
+            for section in sections:
+                if not section:
+                    continue
+                lines = section.split('\n')
+                seq_header = lines[0]
+                wrapped_seq = wrapper.fill(''.join(lines[1:]))
+                wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
+            return '\n'.join(wrapped_sections)
+        else:
+            return wrapper.fill(text)
     else:
+        return text
 def unwrap_text(text):
                                     visible=False, interactive=True, scale=4, )
                     with gr.Row():
+                        target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
+                                                            visible=True, variant='primary',
+                                                            size='lg')
+                        target_query_btn = gr.Button(value='Query the sequence', variant='primary',
+                                                     visible=False)
+                    # with gr.Row():
+                    #     example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
+                    #     example_gene = gr.Button(value='Example: MAPK14', elem_classes='example', visible=False)
+                    example_fasta = gr.Button(value='Example: Human MAPK14', elem_classes='example')
                     target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
                     # with gr.Row():
                     #     with gr.Column():
                     # with gr.Column():
                     #     gr.File(label='Example FASTA file',
                     #             value='data/examples/MAPK14.fasta', interactive=False)
                         with gr.Column():
                             HelpTip(
                                 "Click Auto-detect to identify the protein family using sequence alignment. "
+                                "This optional step allows applying a family-specific model instead of a all-family "
+                                "model (general)."
                                 "Manually select general if the alignment results are unsatisfactory."
                             )
                             drug_screen_target_family = gr.Dropdown(
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
+                                "Interaction prediction provides you binding probability score between the target of "
+                                "interest and each compound in the library,"
+                                "while affinity prediction directly estimates their binding strength measured using "
+                                "IC50."
                             )
                             drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
                                                            label='Step 4. Select a Prediction Task',
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
+                                "Select your preferred model, or click Recommend for the best-performing model based "
+                                "on the selected task, family, and whether the target was trained."
                                 "Please refer to documentation for detailed benchamrk results."
                                 )
                             drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
                         with gr.Column():
                             drug_screen_email = gr.Textbox(
                                 label='Step 6. Email (Optional)',
+                                info="If an email is provided, a notification email will be sent to you when your job "
+                                     "is completed."
                             )
                     with gr.Row(visible=True):
                             HelpTip(
                                 "Enter (paste) a compound SMILES below manually or upload a SDF file."
                                 "If multiple entities are in the SDF, only the first will be used."
+                                "SMILES can be obtained by searching for the compound of interest in databases such "
+                                "as NCBI, PubChem and and ChEMBL."
                             )
                             compound_type = gr.Dropdown(
                                 label='Step 1. Select Compound Input Type and Input',
                                 choices=['SMILES', 'SDF'],
+                                info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
                                 value='SMILES',
                                 interactive=True)
+                            compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
+                                                                  type='binary', visible=False)
                     compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
+                    example_drug = gr.Button(value='Example: Aspirin', elem_classes='example')
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
                                 "By default, models trained on all protein families (general) will be applied."
+                                "If the proteins in the target library of interest all belong to the same protein "
+                                "family, manually selecting the family is supported."
                             )
                             target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
                                                                         value='General',
+                                                                        label='Step 2. Select Target Protein Family ('
+                                                                              'Optional)')
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
                                 "Select a preset target library (e.g., ChEMBL33_human_proteins)."
+                                "Alternatively, upload a CSV file with a column named X2 containing target protein "
+                                "sequences, or use an FASTA file."
                             )
                             target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
                                                          choices=list(TARGET_LIBRARY_MAP.keys()))
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
+                                "Interaction prediction provides you binding probability score between the target of "
+                                "interest and each compound in the library,"
+                                "while affinity prediction directly estimates their binding strength measured using "
+                                "IC50."
                             )
                             target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
                                                                label='Step 4. Select a Prediction Task',
                     with gr.Row():
                         with gr.Column():
                             HelpTip(
+                                "Select your preferred model, or click Recommend for the best-performing model based "
+                                "on the selected task, family, and whether the compound was trained."
                                 "Please refer to documentation for detailed benchamrk results."
                                 )
+                            target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a '
+                                                                                                'Preset Model')
                             identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
                     with gr.Row():
 ''')
             with gr.Blocks() as infer_block:
                 with gr.Column() as infer_page:
+                    infer_type = gr.Dropdown(
+                        choices=['Upload a compound library and a target library',
+                                 'Upload a CSV interaction pair dataset'],
+                        value='Upload a compound library and a target library')
                     with gr.Column() as pair_upload:
+                        gr.File(label="Example custom dataset",
+                                value="data/examples/interaction_pair_inference.csv",
+                                interactive=False)
+                        with gr.Column():
                             infer_data_for_predict = gr.File(
+                                label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
                     with gr.Column() as pair_generate:
                         with gr.Row():
+                            gr.File(label='Example SDF compound library',
                                     value='data/examples/compound_library.sdf', interactive=False)
+                            gr.File(label='Example FASTA target library',
                                     value='data/examples/target_library.fasta', interactive=False)
                         with gr.Row():
+                            gr.File(label='Example CSV compound library',
                                     value='data/examples/compound_library.csv', interactive=False)
+                            gr.File(label='Example CSV target library',
                                     value='data/examples/target_library.csv', interactive=False)
                         with gr.Row():
+                            infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
                                                  file_count="single", type='filepath')
+                            infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
                                                    file_count="single", type='filepath')
+                    with gr.Row(visible=True):
+                        pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
+                        pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
+                        pair_infer_target_family = gr.Dropdown(choices=['General'],
+                                                               label='Target family',
+                                                               value='General')
+                    # with gr.Row():
+                    #     pair_infer_email = gr.Textbox(
+                    #         label='Email (optional)',
+                    #         info="Your email will be used to send you notifications when your job finishes."
+                    #     )
                     with gr.Row(visible=True):
                         # pair_infer_clr_btn = gr.ClearButton(size='lg')
             with gr.Blocks() as report:
                 gr.Markdown('''
                 # <center>DeepSEQreen Chemical Property Report</center>
+                <center>
                 To compute chemical properties for the predictions of drug hit screening,
+                target protein identification, and interaction pair inference. You may also upload
+                your own dataset.
+                The page shows only a preview report displaying at most 30 records
+                (with top predicted CPI/CPA if reporting results from a prediction job).
+                For a full report, please
+                generate and download a CSV or interactive HTML report below.
+                </center>
                 ''')
                 with gr.Row():
                     file_for_report = gr.File(interactive=True, type='filepath')
+                    raw_df = gr.State(value=pd.DataFrame())
+                    report_df = gr.State(value=pd.DataFrame())
                     scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
                     filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
                 with gr.Row():
                     # clear_btn = gr.ClearButton(size='lg')
+                    analyze_btn = gr.Button('REPORT', variant='primary', size='lg', interactive=False)
                 with gr.Row():
                     with gr.Column(scale=3):
                 with gr.Row():
                     with gr.Column():
+                        csv_generate = gr.Button(value='Generate CSV Report',
+                                                 interactive=True, variant='primary', visible=False)
+                        csv_download_file = gr.File(label='Download CSV Report', visible=False)
                     with gr.Column():
+                        html_generate = gr.Button(value='Generate HTML Report',
+                                                  interactive=True, variant='primary', visible=False)
+                        html_download_file = gr.File(label='Download HTML Report', visible=False)
     def target_input_type_select(input_type):
     def example_fill(input_type):
         return {target_id: 'Q16539',
                 target_gene: 'MAPK14',
+                target_organism: 'Human',
                 target_fasta: """
 >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
 MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
 """}
+    example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
+        target_id, target_gene, target_organism, target_fasta], show_progress=False)
+    # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
+    # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
     def screen_recommend_model(fasta, family, task):
         task = TASK_MAP[task]
             train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
             score = 'CI'
+        if not np.isin(process_target_fasta(fasta), train['X2']):
             scenario = "Unseen target"
         else:
             scenario = "Seen target"
                                        & (benchmark_df['Scenario'] == scenario)
                                        & (benchmark_df['all'] == False)]
         row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
                            info=f"Reason: {scenario} in the training dataset; we recommend the model "
                                 f"with the best {score} ({float(row[score]):.3f}) "
     def compound_input_type_select(input_type):
         match input_type:
             case 'SMILES':
+                return gr.Button(visible=False)
             case 'SDF':
+                return gr.Button(visible=True)
     compound_type.select(fn=compound_input_type_select,
+                         inputs=compound_type, outputs=compound_upload_btn, show_progress=False)
     def compound_upload_process(input_type, input_upload):
                     screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
                 else:
                     screen_df = process_drug_library_upload(library_upload)
                     if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
                         raise gr.Error(f'The uploaded compound library has more records '
                                        f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
     ).then(
         fn=submit_predict,
         inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
+                drug_screen_target_family, screen_flag, run_state],  # , drug_screen_email],
         outputs=[file_for_report, run_state]
     ).then(
         fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
         inputs=[compound_smiles, target_library, target_library_upload, run_state],  # , drug_screen_email],
         outputs=[identify_data_for_predict, identify_flag, run_state]
     ).then(
+        fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
+        outputs=[identify_page, identify_waiting]
     ).then(
         fn=submit_predict,
         inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
+                target_identify_target_family, identify_flag, run_state],  # , target_identify_email],
         outputs=[file_for_report, run_state]
     ).then(
         fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
     ).then(
         fn=submit_predict,
         inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
+                pair_infer_target_family, infer_flag, run_state],  # , pair_infer_email],
         outputs=[file_for_report, run_state]
     ).then(
+        fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
+        outputs=[infer_page, infer_waiting, tabs]
     )
     # TODO background job from these 3 pipelines to update file_for_report
     file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
         html_report,
+        raw_df,
+        report_df,
+        analyze_btn
         # ranking_pie_chart
     ])
+    analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters], outputs=[
         html_report,
+        report_df,
         # ranking_pie_chart
     ])
+    def create_csv_report_file(df, file_report):
+        try:
+            now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+            filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
+            df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
+            return gr.File(filename, visible=True), gr.Button(visible=False)
+        except Exception as e:
+            gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
+            return None, None
     def create_html_report_file(df, file_report):
+        try:
+            now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+            filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
+            create_html_report(df, filename)
+            return gr.File(filename, visible=True), gr.Button(visible=False)
+        except Exception as e:
+            gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
+            return None, None
+    html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
+    csv_generate.click(fn=create_csv_report_file, inputs=[report_df, file_for_report],
+                       outputs=[csv_download_file, csv_generate])
+    html_generate.click(fn=create_html_report_file, inputs=[report_df, file_for_report],
+                        outputs=[html_download_file, html_generate])
     # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
     #                       every=5)
     demo.launch(
         show_api=False,
     )