File size: 5,454 Bytes
f979fd7
 
 
 
13490f0
f979fd7
 
 
 
 
13490f0
f979fd7
 
 
 
 
 
 
 
 
13490f0
 
 
 
 
 
 
 
 
 
 
 
f979fd7
196baf1
f979fd7
 
 
13490f0
 
f979fd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13490f0
 
f979fd7
13490f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f979fd7
 
 
 
 
 
 
 
 
 
13490f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f979fd7
 
 
 
 
 
13490f0
 
 
f979fd7
 
13490f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import protac_degradation_predictor as pdp

from typing import Dict, List, Literal
import difflib
import json

import torch
import numpy as np
from rdkit import Chem
import gradio as gr
import pandas as pd


def gradio_app(
        protac_smiles: str | List[str],
        e3_ligase: str | List[str],
        target_uniprot: str | List[str],
        cell_line: str | List[str],
        use_models_from_cv: bool = False,
) -> Dict[str, np.ndarray]:
    """ Wrapper for the Gradio interface.
    
    Args:
        protac_smiles (str | List[str]): PROTAC SMILES string or list of strings
        e3_ligase (str | List[str]): E3 ligase string or list of strings
        target_uniprot (str | List[str]): Uniprot ID string or list of strings
        cell_line (str | List[str]): Cell line string or list of strings
        use_models_from_cv (bool): Whether to use models trained during cross-validation
        
    Returns:
        Dict[str, np.ndarray]: Dictionary of mean and majority vote predictions
    """

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    avail_uniprots = pdp.avail_uniprots()
    avail_cells = pdp.avail_cell_lines()

    # Check if input Uniprot ID and cell line are among the available ones. If
    # not, suggest the 3 closest matches.
    if target_uniprot not in avail_uniprots:
        suggestions = difflib.get_close_matches(target_uniprot, avail_uniprots, n=3, cutoff=0.5)
        suggestion_text = "Did you mean:" + ", ".join(suggestions) + "?" if suggestions else "No close matches found."
        raise gr.Error(f"Invalid Uniprot ID. {suggestion_text}", duration=None)

    if cell_line not in avail_cells:
        suggestions = difflib.get_close_matches(cell_line, avail_cells, n=3, cutoff=0.5)
        suggestion_text = "Did you mean:" + ", ".join(suggestions) + "?" if suggestions else "No close matches found."
        raise gr.Error(f"Invalid Cell Line. {suggestion_text}", duration=None)

    prediction = pdp.get_protac_active_proba(
        protac_smiles,
        e3_ligase,
        target_uniprot,
        cell_line,
        device=device,
        use_models_from_cv=use_models_from_cv,
    )

    mean_pred = {"Active": float(prediction['mean']), "Inactive": 1. - float(prediction['mean'])}
    majvote_pred = "Active" if prediction['majority_vote'] else "Inactive"

    # Create a DataFrame for the raw predictions suitable for a bar plot
    raw_preds_df = pd.DataFrame({
        'Model': [f"Model n.{i}" for i in range(len(prediction["preds"]))],
        'Active Probability': [float(p[0]) for p in prediction["preds"]],
    })

    # Write JSON to a file in tmp directory
    json_data = {
        "protac_smiles": protac_smiles,
        "e3_ligase": e3_ligase,
        "target_uniprot": target_uniprot,
        "cell_line": cell_line,
        "mean_prediction": mean_pred,
        "majority_vote_prediction": majvote_pred,
        "model_predictions": raw_preds_df.to_dict(orient="records")
    }
    json_content = json.dumps(json_data, indent=4)
    json_filename = "/tmp/predictions.json"
    with open(json_filename, "w") as f:
        f.write(json_content)

    return mean_pred, majvote_pred, raw_preds_df, json_filename

description = r"""A machine learning-based tool for predicting PROTAC protein degradation activity. This is a GUI app of the [PROTAC-Degradation-Predictor Github repository](https://github.com/ribesstefano/PROTAC-Degradation-Predictor/).
After having input the PROTAC SMILES string and its biological context, the app will predict its activity. A PROTAC is defined active when:
$$D_{max} \ge 60\\% \ \ \mathrm{and} \ \ pDC_{50} \ge 6$$
If you find this tool useful, please cite the following paper:
```
@article{Ribes_2024,
   title={Modeling PROTAC degradation activity with machine learning},
   volume={6},
   ISSN={2667-3185},
   url={http://dx.doi.org/10.1016/j.ailsci.2024.100104},
   DOI={10.1016/j.ailsci.2024.100104},
   journal={Artificial Intelligence in the Life Sciences},
   publisher={Elsevier BV},
   author={Ribes, Stefano and Nittinger, Eva and Tyrchan, Christian and Mercado, Rocío},
   year={2024},
   month=dec, pages={100104}
}
```
"""


demo = gr.Interface(
    fn=gradio_app,
    inputs=[
        gr.Textbox(placeholder="PROTAC SMILES", label="PROTAC SMILES"),
        gr.Dropdown(pdp.avail_e3_ligases(), label="E3 ligase"),
        gr.Textbox(placeholder="E.g., Q92769", label="Target Uniprot"),
        gr.Textbox(placeholder="E.g., HeLa", label="Cell line"),
        gr.Checkbox(label="Use models trained during cross-validation"),
    ],
    outputs=[
        gr.Label(label="Average probability (confidence)"),
        gr.Label(label="Majority vote prediction"),
        gr.BarPlot(
            x="Model",
            y="Active Probability",
            vertical=False,
            y_lim=[0, 1],
            tooltip="Active Probability",
            title="Models' activity probability prediction",
            label="Models' activity probability prediction",
            show_label=False,
        ),
        gr.DownloadButton(label="Download as JSON", size="sm"),
    ],
    examples=[
        [
            "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)[C@@H](NC(=O)COCCCCCCCCCOCC(=O)Nc2ccc(C(=O)Nc3ccc(F)cc3N)cc2)C(C)(C)C)cc1",
            "VHL",
            "Q92769",
            "HeLa",
        ],
    ],
    title="PROTAC Degradation Predictor",
    submit_btn="Predict Activity",
    description=description,
)

demo.launch()