libokj commited on
Commit
c5b7c86
·
verified ·
1 Parent(s): b36ed96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -40
app.py CHANGED
@@ -375,10 +375,10 @@ PRESET_MAP = {
375
  TARGET_FAMILY_MAP = {
376
  'General': 'general',
377
  'Kinase': 'kinase',
378
- 'Non kinase enzyme': 'enzyme',
379
- 'Membrane receptor': 'membrane',
380
- 'Nuclear receptor': 'nuclear',
381
- 'Ion channel': 'ion',
382
  'Others': 'others',
383
  }
384
 
@@ -1584,37 +1584,55 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1584
  # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1585
  # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1586
 
 
1587
  def screen_recommend_model(fasta, family, task):
1588
  task = TASK_MAP[task]
1589
- if task == 'DTI':
1590
- train = pd.read_csv('data/benchmarks/all_families_reduced_dti_train.csv')
1591
- score = 'AUROC'
1592
- elif task == 'DTA':
1593
- train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
1594
- score = 'CI'
1595
-
1596
- if not np.isin(process_target_fasta(fasta), train['X2']):
1597
- scenario = "Unseen target"
1598
- else:
1599
- scenario = "Seen target"
1600
- benchmark_df = pd.read_csv('data/benchmarks/compound_screen.csv')
1601
 
1602
  if family == 'General':
 
 
 
 
 
 
1603
  filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
1604
- & (benchmark_df['Target.family'] == 'All families reduced')
1605
- & (benchmark_df['Scenario'] == 'Random split')
1606
- & (benchmark_df['all'] == True)]
1607
- else:
1608
- filtered_df = benchmark_df[(benchmark_df['Task'] == task)
1609
- & (benchmark_df['Target.family'] == family)
1610
  & (benchmark_df['Scenario'] == scenario)
1611
- & (benchmark_df['all'] == False)]
1612
- row = filtered_df.loc[filtered_df[score].idxmax()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1613
 
 
 
1614
  return gr.Dropdown(value=row['preset'],
1615
- info=f"Reason: {scenario} in the training dataset; we recommend the model "
1616
  f"with the best {score} ({float(row[score]):.3f}) "
1617
- f"in the {scenario.lower()} scenario on {family.lower()} family.")
1618
 
1619
 
1620
  screen_preset_recommend_btn.click(fn=screen_recommend_model,
@@ -1661,26 +1679,27 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1661
 
1662
  def identify_recommend_model(smiles, task):
1663
  task = TASK_MAP[task]
1664
- if task == 'DTI':
1665
- train = pd.read_csv('data/benchmarks/all_families_reduced_dti_train.csv')
1666
- score = 'AUROC'
1667
- elif task == 'DTA':
1668
- train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
1669
- score = 'CI'
1670
- if not np.isin(smiles, train['X1']):
1671
- scenario = "Unseen drug"
1672
  else:
1673
- scenario = "Seen drug"
1674
- benchmark_df = pd.read_csv('data/benchmarks/target_identification.csv')
 
 
 
 
1675
 
1676
- filtered_df = benchmark_df[(benchmark_df['Task'] == task)
1677
- & (benchmark_df['Scenario'] == scenario)]
1678
  row = filtered_df.loc[filtered_df[score].idxmax()]
1679
 
1680
  return gr.Dropdown(value=row['preset'],
1681
- info=f"Reason: {scenario} in the training dataset; choosing the model "
1682
  f"with the best {score} ({float(row[score]):3f}) "
1683
- f"in the {scenario.lower()} scenario.")
1684
 
1685
 
1686
  identify_preset_recommend_btn.click(fn=identify_recommend_model,
 
375
  TARGET_FAMILY_MAP = {
376
  'General': 'general',
377
  'Kinase': 'kinase',
378
+ 'Non kinase enzyme': 'non_kinase_enzyme',
379
+ 'Membrane receptor': 'membrane_receptor',
380
+ 'Nuclear receptor': 'nuclear_receptor',
381
+ 'Ion channel': 'ion_channel',
382
  'Others': 'others',
383
  }
384
 
 
1584
  # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1585
  # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1586
 
1587
+
1588
  def screen_recommend_model(fasta, family, task):
1589
  task = TASK_MAP[task]
1590
+ score = TASK_METRIC_MAP[task]
1591
+ benchmark_df = pd.read_csv('data/benchmarks/{task}_test_metrics.csv')
 
 
 
 
 
 
 
 
 
 
1592
 
1593
  if family == 'General':
1594
+ seen_targets = pd.read_csv(
1595
+ f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
1596
+ if process_target_fasta(fasta) in seen_targets['X2'].values:
1597
+ scenario = "Seen Target"
1598
+ else:
1599
+ scenario = "Unseen Target"
1600
  filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
1601
+ & (benchmark_df['Family'] == 'All Families')
 
 
 
 
 
1602
  & (benchmark_df['Scenario'] == scenario)
1603
+ & (benchmark_df['Type'] == 'General')]
1604
+
1605
+ else:
1606
+ seen_targets_general = pd.read_csv(
1607
+ f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
1608
+ if fasta in seen_targets_general['X2'].values:
1609
+ scenario_general = "Seen Target"
1610
+ else:
1611
+ scenario_general = "Unseen Target"
1612
+
1613
+ seen_targets_family = pd.read_csv(
1614
+ f'data/benchmarks/seen_targets/{family}_{task.lower()}_random_split.csv')
1615
+ if fasta in seen_targets_family['X2'].values:
1616
+ scenario_family = "Seen Target"
1617
+ else:
1618
+ scenario_family = "Unseen Target"
1619
+
1620
+ filtered_df_general = benchmark_df[(benchmark_df['Task'] == task)
1621
+ & (benchmark_df['Family'] == family)
1622
+ & (benchmark_df['Scenario'] == scenario_general)
1623
+ & (benchmark_df['Type'] == 'General')]
1624
+ filtered_df_family = benchmark_df[(benchmark_df['Task'] == task)
1625
+ & (benchmark_df['Family'] == family)
1626
+ & (benchmark_df['Scenario'] == scenario_family)
1627
+ & (benchmark_df['Type'] == 'Family')]
1628
+ filtered_df = pd.concat([filtered_df_general, filtered_df_family])
1629
 
1630
+ row = filtered_df.loc[filtered_df[score].idxmax()]
1631
+
1632
  return gr.Dropdown(value=row['preset'],
1633
+ info=f"Reason: {row['Scenario']} in training; we recommend the model "
1634
  f"with the best {score} ({float(row[score]):.3f}) "
1635
+ f"in the {row['Scenario']} scenario on {row['Family']}.")
1636
 
1637
 
1638
  screen_preset_recommend_btn.click(fn=screen_recommend_model,
 
1679
 
1680
  def identify_recommend_model(smiles, task):
1681
  task = TASK_MAP[task]
1682
+ score = TASK_METRIC_MAP[task]
1683
+ benchmark_df = pd.read_csv('data/benchmarks/{task}_test_metrics.csv')
1684
+
1685
+ seen_drugs = pd.read_csv(
1686
+ f'data/benchmarks/seen_drugs/all_families_full_{task.lower()}_random_split.csv')
1687
+ if rdkit_canonicalize(smiles) in seen_drugs['X1'].values:
1688
+ scenario = "Seen Compound"
 
1689
  else:
1690
+ scenario = "Unseen Compound"
1691
+
1692
+ filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
1693
+ & (benchmark_df['Family'] == 'All Families')
1694
+ & (benchmark_df['Scenario'] == scenario)
1695
+ & (benchmark_df['Type'] == 'General')]
1696
 
 
 
1697
  row = filtered_df.loc[filtered_df[score].idxmax()]
1698
 
1699
  return gr.Dropdown(value=row['preset'],
1700
+ info=f"Reason: {scenario} in training; choosing the model "
1701
  f"with the best {score} ({float(row[score]):3f}) "
1702
+ f"in the {scenario} scenario.")
1703
 
1704
 
1705
  identify_preset_recommend_btn.click(fn=identify_recommend_model,