Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -375,10 +375,10 @@ PRESET_MAP = {
|
|
375 |
TARGET_FAMILY_MAP = {
|
376 |
'General': 'general',
|
377 |
'Kinase': 'kinase',
|
378 |
-
'Non kinase enzyme': '
|
379 |
-
'Membrane receptor': '
|
380 |
-
'Nuclear receptor': '
|
381 |
-
'Ion channel': '
|
382 |
'Others': 'others',
|
383 |
}
|
384 |
|
@@ -1584,37 +1584,55 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1584 |
# example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
1585 |
# example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
1586 |
|
|
|
1587 |
def screen_recommend_model(fasta, family, task):
|
1588 |
task = TASK_MAP[task]
|
1589 |
-
|
1590 |
-
|
1591 |
-
score = 'AUROC'
|
1592 |
-
elif task == 'DTA':
|
1593 |
-
train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
|
1594 |
-
score = 'CI'
|
1595 |
-
|
1596 |
-
if not np.isin(process_target_fasta(fasta), train['X2']):
|
1597 |
-
scenario = "Unseen target"
|
1598 |
-
else:
|
1599 |
-
scenario = "Seen target"
|
1600 |
-
benchmark_df = pd.read_csv('data/benchmarks/compound_screen.csv')
|
1601 |
|
1602 |
if family == 'General':
|
|
|
|
|
|
|
|
|
|
|
|
|
1603 |
filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
|
1604 |
-
& (benchmark_df['
|
1605 |
-
& (benchmark_df['Scenario'] == 'Random split')
|
1606 |
-
& (benchmark_df['all'] == True)]
|
1607 |
-
else:
|
1608 |
-
filtered_df = benchmark_df[(benchmark_df['Task'] == task)
|
1609 |
-
& (benchmark_df['Target.family'] == family)
|
1610 |
& (benchmark_df['Scenario'] == scenario)
|
1611 |
-
& (benchmark_df['
|
1612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1613 |
|
|
|
|
|
1614 |
return gr.Dropdown(value=row['preset'],
|
1615 |
-
info=f"Reason: {
|
1616 |
f"with the best {score} ({float(row[score]):.3f}) "
|
1617 |
-
f"in the {
|
1618 |
|
1619 |
|
1620 |
screen_preset_recommend_btn.click(fn=screen_recommend_model,
|
@@ -1661,26 +1679,27 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1661 |
|
1662 |
def identify_recommend_model(smiles, task):
|
1663 |
task = TASK_MAP[task]
|
1664 |
-
|
1665 |
-
|
1666 |
-
|
1667 |
-
|
1668 |
-
|
1669 |
-
|
1670 |
-
|
1671 |
-
scenario = "Unseen drug"
|
1672 |
else:
|
1673 |
-
scenario = "
|
1674 |
-
|
|
|
|
|
|
|
|
|
1675 |
|
1676 |
-
filtered_df = benchmark_df[(benchmark_df['Task'] == task)
|
1677 |
-
& (benchmark_df['Scenario'] == scenario)]
|
1678 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
1679 |
|
1680 |
return gr.Dropdown(value=row['preset'],
|
1681 |
-
info=f"Reason: {scenario} in
|
1682 |
f"with the best {score} ({float(row[score]):3f}) "
|
1683 |
-
f"in the {scenario
|
1684 |
|
1685 |
|
1686 |
identify_preset_recommend_btn.click(fn=identify_recommend_model,
|
|
|
375 |
TARGET_FAMILY_MAP = {
|
376 |
'General': 'general',
|
377 |
'Kinase': 'kinase',
|
378 |
+
'Non kinase enzyme': 'non_kinase_enzyme',
|
379 |
+
'Membrane receptor': 'membrane_receptor',
|
380 |
+
'Nuclear receptor': 'nuclear_receptor',
|
381 |
+
'Ion channel': 'ion_channel',
|
382 |
'Others': 'others',
|
383 |
}
|
384 |
|
|
|
1584 |
# example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
1585 |
# example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
1586 |
|
1587 |
+
|
1588 |
def screen_recommend_model(fasta, family, task):
|
1589 |
task = TASK_MAP[task]
|
1590 |
+
score = TASK_METRIC_MAP[task]
|
1591 |
+
benchmark_df = pd.read_csv('data/benchmarks/{task}_test_metrics.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1592 |
|
1593 |
if family == 'General':
|
1594 |
+
seen_targets = pd.read_csv(
|
1595 |
+
f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
|
1596 |
+
if process_target_fasta(fasta) in seen_targets['X2'].values:
|
1597 |
+
scenario = "Seen Target"
|
1598 |
+
else:
|
1599 |
+
scenario = "Unseen Target"
|
1600 |
filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
|
1601 |
+
& (benchmark_df['Family'] == 'All Families')
|
|
|
|
|
|
|
|
|
|
|
1602 |
& (benchmark_df['Scenario'] == scenario)
|
1603 |
+
& (benchmark_df['Type'] == 'General')]
|
1604 |
+
|
1605 |
+
else:
|
1606 |
+
seen_targets_general = pd.read_csv(
|
1607 |
+
f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
|
1608 |
+
if fasta in seen_targets_general['X2'].values:
|
1609 |
+
scenario_general = "Seen Target"
|
1610 |
+
else:
|
1611 |
+
scenario_general = "Unseen Target"
|
1612 |
+
|
1613 |
+
seen_targets_family = pd.read_csv(
|
1614 |
+
f'data/benchmarks/seen_targets/{family}_{task.lower()}_random_split.csv')
|
1615 |
+
if fasta in seen_targets_family['X2'].values:
|
1616 |
+
scenario_family = "Seen Target"
|
1617 |
+
else:
|
1618 |
+
scenario_family = "Unseen Target"
|
1619 |
+
|
1620 |
+
filtered_df_general = benchmark_df[(benchmark_df['Task'] == task)
|
1621 |
+
& (benchmark_df['Family'] == family)
|
1622 |
+
& (benchmark_df['Scenario'] == scenario_general)
|
1623 |
+
& (benchmark_df['Type'] == 'General')]
|
1624 |
+
filtered_df_family = benchmark_df[(benchmark_df['Task'] == task)
|
1625 |
+
& (benchmark_df['Family'] == family)
|
1626 |
+
& (benchmark_df['Scenario'] == scenario_family)
|
1627 |
+
& (benchmark_df['Type'] == 'Family')]
|
1628 |
+
filtered_df = pd.concat([filtered_df_general, filtered_df_family])
|
1629 |
|
1630 |
+
row = filtered_df.loc[filtered_df[score].idxmax()]
|
1631 |
+
|
1632 |
return gr.Dropdown(value=row['preset'],
|
1633 |
+
info=f"Reason: {row['Scenario']} in training; we recommend the model "
|
1634 |
f"with the best {score} ({float(row[score]):.3f}) "
|
1635 |
+
f"in the {row['Scenario']} scenario on {row['Family']}.")
|
1636 |
|
1637 |
|
1638 |
screen_preset_recommend_btn.click(fn=screen_recommend_model,
|
|
|
1679 |
|
1680 |
def identify_recommend_model(smiles, task):
|
1681 |
task = TASK_MAP[task]
|
1682 |
+
score = TASK_METRIC_MAP[task]
|
1683 |
+
benchmark_df = pd.read_csv('data/benchmarks/{task}_test_metrics.csv')
|
1684 |
+
|
1685 |
+
seen_drugs = pd.read_csv(
|
1686 |
+
f'data/benchmarks/seen_drugs/all_families_full_{task.lower()}_random_split.csv')
|
1687 |
+
if rdkit_canonicalize(smiles) in seen_drugs['X1'].values:
|
1688 |
+
scenario = "Seen Compound"
|
|
|
1689 |
else:
|
1690 |
+
scenario = "Unseen Compound"
|
1691 |
+
|
1692 |
+
filtered_df = benchmark_df[(benchmark_df[f'Task'] == task)
|
1693 |
+
& (benchmark_df['Family'] == 'All Families')
|
1694 |
+
& (benchmark_df['Scenario'] == scenario)
|
1695 |
+
& (benchmark_df['Type'] == 'General')]
|
1696 |
|
|
|
|
|
1697 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
1698 |
|
1699 |
return gr.Dropdown(value=row['preset'],
|
1700 |
+
info=f"Reason: {scenario} in training; choosing the model "
|
1701 |
f"with the best {score} ({float(row[score]):3f}) "
|
1702 |
+
f"in the {scenario} scenario.")
|
1703 |
|
1704 |
|
1705 |
identify_preset_recommend_btn.click(fn=identify_recommend_model,
|