DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

libokj commited on Mar 11, 2024

Commit

c5b7c86

verified ·

1 Parent(s): b36ed96

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -40

app.py CHANGED Viewed

@@ -375,10 +375,10 @@ PRESET_MAP = {
 TARGET_FAMILY_MAP = {
     'General': 'general',
     'Kinase': 'kinase',
-    'Non kinase enzyme': 'enzyme',
-    'Membrane receptor': 'membrane',
-    'Nuclear receptor': 'nuclear',
-    'Ion channel': 'ion',
     'Others': 'others',
 }
@@ -1584,37 +1584,55 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
     # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
     def screen_recommend_model(fasta, family, task):
         task = TASK_MAP[task]
-        if task == 'DTI':
-            train = pd.read_csv('data/benchmarks/all_families_reduced_dti_train.csv')
-            score = 'AUROC'
-        elif task == 'DTA':
-            train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
-            score = 'CI'
-        if not np.isin(process_target_fasta(fasta), train['X2']):
-            scenario = "Unseen target"
-        else:
-            scenario = "Seen target"
-        benchmark_df = pd.read_csv('data/benchmarks/compound_screen.csv')
         if family == 'General':
             filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
-                                       & (benchmark_df['Target.family'] == 'All families reduced')
-                                       & (benchmark_df['Scenario'] == 'Random split')
-                                       & (benchmark_df['all'] == True)]
-        else:
-            filtered_df = benchmark_df[(benchmark_df['Task'] == task)
-                                       & (benchmark_df['Target.family'] == family)
                                        & (benchmark_df['Scenario'] == scenario)
-                                       & (benchmark_df['all'] == False)]
-        row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
-                           info=f"Reason: {scenario} in the training dataset; we recommend the model "
                                 f"with the best {score} ({float(row[score]):.3f}) "
-                                f"in the {scenario.lower()} scenario on {family.lower()} family.")
     screen_preset_recommend_btn.click(fn=screen_recommend_model,
@@ -1661,26 +1679,27 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     def identify_recommend_model(smiles, task):
         task = TASK_MAP[task]
-        if task == 'DTI':
-            train = pd.read_csv('data/benchmarks/all_families_reduced_dti_train.csv')
-            score = 'AUROC'
-        elif task == 'DTA':
-            train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
-            score = 'CI'
-        if not np.isin(smiles, train['X1']):
-            scenario = "Unseen drug"
         else:
-            scenario = "Seen drug"
-        benchmark_df = pd.read_csv('data/benchmarks/target_identification.csv')
-        filtered_df = benchmark_df[(benchmark_df['Task'] == task)
-                                   & (benchmark_df['Scenario'] == scenario)]
         row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
-                           info=f"Reason: {scenario} in the training dataset; choosing the model "
                                 f"with the best {score} ({float(row[score]):3f}) "
-                                f"in the {scenario.lower()} scenario.")
     identify_preset_recommend_btn.click(fn=identify_recommend_model,

 TARGET_FAMILY_MAP = {
     'General': 'general',
     'Kinase': 'kinase',
+    'Non kinase enzyme': 'non_kinase_enzyme',
+    'Membrane receptor': 'membrane_receptor',
+    'Nuclear receptor': 'nuclear_receptor',
+    'Ion channel': 'ion_channel',
     'Others': 'others',
 }
     # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
     # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
     def screen_recommend_model(fasta, family, task):
         task = TASK_MAP[task]
+        score = TASK_METRIC_MAP[task]
+        benchmark_df = pd.read_csv('data/benchmarks/{task}_test_metrics.csv')
         if family == 'General':
+            seen_targets = pd.read_csv(
+                f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
+            if process_target_fasta(fasta) in seen_targets['X2'].values:
+                scenario = "Seen Target"
+            else:
+                scenario = "Unseen Target"
             filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
+                                       & (benchmark_df['Family'] == 'All Families')
                                        & (benchmark_df['Scenario'] == scenario)
+                                       & (benchmark_df['Type'] == 'General')]
+        else:
+            seen_targets_general = pd.read_csv(
+                f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
+            if fasta in seen_targets_general['X2'].values:
+                scenario_general = "Seen Target"
+            else:
+                scenario_general = "Unseen Target"
+            seen_targets_family = pd.read_csv(
+                f'data/benchmarks/seen_targets/{family}_{task.lower()}_random_split.csv')
+            if fasta in seen_targets_family['X2'].values:
+                scenario_family = "Seen Target"
+            else:
+                scenario_family = "Unseen Target"
+            filtered_df_general = benchmark_df[(benchmark_df['Task'] == task)
+                                               & (benchmark_df['Family'] == family)
+                                               & (benchmark_df['Scenario'] == scenario_general)
+                                               & (benchmark_df['Type'] == 'General')]
+            filtered_df_family = benchmark_df[(benchmark_df['Task'] == task)
+                                              & (benchmark_df['Family'] == family)
+                                              & (benchmark_df['Scenario'] == scenario_family)
+                                              & (benchmark_df['Type'] == 'Family')]
+            filtered_df = pd.concat([filtered_df_general, filtered_df_family])
+        row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
+                           info=f"Reason: {row['Scenario']} in training; we recommend the model "
                                 f"with the best {score} ({float(row[score]):.3f}) "
+                                f"in the {row['Scenario']} scenario on {row['Family']}.")
     screen_preset_recommend_btn.click(fn=screen_recommend_model,
     def identify_recommend_model(smiles, task):
         task = TASK_MAP[task]
+        score = TASK_METRIC_MAP[task]
+        benchmark_df = pd.read_csv('data/benchmarks/{task}_test_metrics.csv')
+        seen_drugs = pd.read_csv(
+            f'data/benchmarks/seen_drugs/all_families_full_{task.lower()}_random_split.csv')
+        if rdkit_canonicalize(smiles) in seen_drugs['X1'].values:
+            scenario = "Seen Compound"
         else:
+            scenario = "Unseen Compound"
+        filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
+                                   & (benchmark_df['Family'] == 'All Families')
+                                   & (benchmark_df['Scenario'] == scenario)
+                                   & (benchmark_df['Type'] == 'General')]
         row = filtered_df.loc[filtered_df[score].idxmax()]
         return gr.Dropdown(value=row['preset'],
+                           info=f"Reason: {scenario} in training; choosing the model "
                                 f"with the best {score} ({float(row[score]):3f}) "
+                                f"in the {scenario} scenario.")
     identify_preset_recommend_btn.click(fn=identify_recommend_model,