{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Compare XGBoost and Deep Learning (DNN) Predictions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "\n", "# Add current directory to path\n", "os.sys.path.append(os.path.dirname(os.path.abspath('.')))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", " from pandas import MultiIndex, Int64Index\n" ] } ], "source": [ "import protac_degradation_predictor as pdp" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import protac_degradation_predictor as pdp\n", "import torch\n", "from rdkit import Chem" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Compound ID | \n", "Uniprot | \n", "Smiles | \n", "E3 Ligase | \n", "InChI | \n", "InChI Key | \n", "Molecular Weight | \n", "Heavy Atom Count | \n", "Ring Count | \n", "Rotatable Bond Count | \n", "... | \n", "Name | \n", "Assay (DC50/Dmax) | \n", "Exact Mass | \n", "XLogP3 | \n", "Target (Parsed) | \n", "POI Sequence | \n", "E3 Ligase Uniprot | \n", "E3 Ligase Sequence | \n", "Cell Line Identifier | \n", "Active - OR | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C73H88ClF3N10O10S4/c1-47(49-13-15-51(... | \n", "SXPDUCVNMGMWBJ-FMZBIETASA-N | \n", "1486.282 | \n", "101 | \n", "10 | \n", "24 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "
1 | \n", "2 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C74H90ClF3N10O10S4/c1-48(50-13-15-52(... | \n", "HQKUMELJMUNTTF-NMKDNUEVSA-N | \n", "1500.309 | \n", "102 | \n", "10 | \n", "25 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "
2 | \n", "3 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C75H92ClF3N10O10S4/c1-49(51-16-18-53(... | \n", "ATQCEJKUPSBDMA-QARNUTPLSA-N | \n", "1514.336 | \n", "103 | \n", "10 | \n", "26 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "
3 | \n", "4 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C76H94ClF3N10O10S4/c1-50(52-17-19-54(... | \n", "FNKQAGMHNFFSEI-DTTPTBRMSA-N | \n", "1528.363 | \n", "104 | \n", "10 | \n", "27 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "
4 | \n", "5 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C77H96ClF3N10O10S4/c1-51(53-18-20-55(... | \n", "PXVFFBGSTYQHRO-REQIQPEASA-N | \n", "1542.390 | \n", "105 | \n", "10 | \n", "28 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "True | \n", "
5 rows × 35 columns
\n", "\n", " | Uniprot | \n", "Cell Line Identifier | \n", "Smiles | \n", "E3 Ligase | \n", "DC50 (nM) | \n", "Dmax (%) | \n", "Active | \n", "
---|---|---|---|---|---|---|---|
4 | \n", "Q07817 | \n", "MOLT-4 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "53.0 | \n", "100.0 | \n", "True | \n", "
7 | \n", "Q07817 | \n", "MOLT-4 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "93.0 | \n", "90.0 | \n", "True | \n", "
60 | \n", "P00533 | \n", "H1975/WR | \n", "C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC... | \n", "VHL | \n", "25.3 | \n", "90.0 | \n", "True | \n", "
69 | \n", "P00533 | \n", "H1975/WR | \n", "C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC... | \n", "VHL | \n", "5.9 | \n", "100.0 | \n", "True | \n", "
72 | \n", "Q9NWZ3 | \n", "PH1-PBMCs-hiPSC4F1 | \n", "COc1cc2c(OC[C@@H]3CCC(=O)N3)ncc(C#CCCCCCCCCCCC... | \n", "VHL | \n", "3000.0 | \n", "50.0 | \n", "False | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2121 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "820.0 | \n", "81.0 | \n", "True | \n", "
2122 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCC... | \n", "FEM1B | \n", "250.0 | \n", "94.0 | \n", "True | \n", "
2125 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "1100.0 | \n", "85.0 | \n", "False | \n", "
2126 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "3600.0 | \n", "60.0 | \n", "False | \n", "
2127 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "1600.0 | \n", "80.0 | \n", "False | \n", "
857 rows × 7 columns
\n", "\n", " | Compound ID | \n", "Uniprot | \n", "Smiles | \n", "E3 Ligase | \n", "InChI | \n", "InChI Key | \n", "Molecular Weight | \n", "Heavy Atom Count | \n", "Ring Count | \n", "Rotatable Bond Count | \n", "... | \n", "dnn_mean | \n", "dnn_majority_vote | \n", "dnn_model_n0 | \n", "dnn_model_n1 | \n", "dnn_model_n2 | \n", "xgb_mean | \n", "xgb_majority_vote | \n", "xgb_model_n0 | \n", "xgb_model_n1 | \n", "xgb_model_n2 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "5 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C77H96ClF3N10O10S4/c1-51(53-18-20-55(... | \n", "PXVFFBGSTYQHRO-REQIQPEASA-N | \n", "1542.390 | \n", "105 | \n", "10 | \n", "28 | \n", "... | \n", "0.926091 | \n", "True | \n", "0.922488 | \n", "0.971058 | \n", "0.884728 | \n", "0.977803 | \n", "True | \n", "0.979807 | \n", "0.972958 | \n", "0.980646 | \n", "
1 | \n", "8 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C76H96ClF3N10O9S4/c1-51(53-17-19-55(2... | \n", "FQLHSKINBMZPFV-WTFVIAODSA-N | \n", "1514.380 | \n", "103 | \n", "10 | \n", "28 | \n", "... | \n", "0.934987 | \n", "True | \n", "0.938649 | \n", "0.967957 | \n", "0.898355 | \n", "0.975625 | \n", "True | \n", "0.975480 | \n", "0.972628 | \n", "0.978767 | \n", "
2 | \n", "61 | \n", "P00533 | \n", "C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC... | \n", "VHL | \n", "InChI=1S/C62H79N11O7S/c1-8-53(75)66-45-19-18-2... | \n", "HQGHIKHHXKBFOJ-OIQPADLRSA-N | \n", "1122.451 | \n", "81 | \n", "8 | \n", "25 | \n", "... | \n", "0.532295 | \n", "True | \n", "0.651652 | \n", "0.704779 | \n", "0.240453 | \n", "0.917975 | \n", "True | \n", "0.946647 | \n", "0.901747 | \n", "0.905530 | \n", "
3 | \n", "70 | \n", "P00533 | \n", "C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC... | \n", "VHL | \n", "InChI=1S/C56H67N11O7S/c1-8-47(69)60-39-13-12-1... | \n", "CBWIPIZLLNOTMZ-PDMHGPQCSA-N | \n", "1038.289 | \n", "75 | \n", "8 | \n", "19 | \n", "... | \n", "0.586461 | \n", "True | \n", "0.713173 | \n", "0.802957 | \n", "0.243252 | \n", "0.922217 | \n", "True | \n", "0.919568 | \n", "0.927843 | \n", "0.919240 | \n", "
4 | \n", "73 | \n", "Q9NWZ3 | \n", "COc1cc2c(OC[C@@H]3CCC(=O)N3)ncc(C#CCCCCCCCCCCC... | \n", "VHL | \n", "InChI=1S/C51H65N7O8S/c1-32-45(67-31-55-32)34-2... | \n", "DFCKZXBDTTURLE-ABFWCBLXSA-N | \n", "936.189 | \n", "67 | \n", "6 | \n", "21 | \n", "... | \n", "0.378277 | \n", "False | \n", "0.456931 | \n", "0.610394 | \n", "0.067505 | \n", "0.103941 | \n", "False | \n", "0.107219 | \n", "0.095667 | \n", "0.108936 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
852 | \n", "3262 | \n", "O60885 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "InChI=1S/C38H41Cl2N9O5S/c1-23-24(2)55-38-35(23... | \n", "QVNLOGQWJMAEJT-LJAQVGFWSA-N | \n", "806.777 | \n", "55 | \n", "6 | \n", "15 | \n", "... | \n", "0.461836 | \n", "False | \n", "0.631028 | \n", "0.531522 | \n", "0.222957 | \n", "0.937156 | \n", "True | \n", "0.934651 | \n", "0.940516 | \n", "0.936302 | \n", "
853 | \n", "3263 | \n", "O60885 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCC... | \n", "FEM1B | \n", "InChI=1S/C39H43Cl2N9O4S/c1-24-25(2)55-39-36(24... | \n", "IEXUFNUFUSSKQI-PMERELPUSA-N | \n", "804.805 | \n", "55 | \n", "6 | \n", "15 | \n", "... | \n", "0.478040 | \n", "False | \n", "0.602999 | \n", "0.603999 | \n", "0.227121 | \n", "0.512707 | \n", "True | \n", "0.478524 | \n", "0.512765 | \n", "0.546831 | \n", "
854 | \n", "3264 | \n", "O60885 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "InChI=1S/C40H45Cl2N9O6S/c1-25-26(2)58-40-37(25... | \n", "CPDVGNBJFIONLX-HKBQPEDESA-N | \n", "850.830 | \n", "58 | \n", "6 | \n", "18 | \n", "... | \n", "0.410410 | \n", "False | \n", "0.426177 | \n", "0.582066 | \n", "0.222986 | \n", "0.118575 | \n", "False | \n", "0.107306 | \n", "0.111763 | \n", "0.136655 | \n", "
855 | \n", "3265 | \n", "O60885 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "InChI=1S/C42H49Cl2N9O7S/c1-27-28(2)61-42-39(27... | \n", "QMBOIOPJFSHXPV-XIFFEERXSA-N | \n", "894.883 | \n", "61 | \n", "6 | \n", "21 | \n", "... | \n", "0.418736 | \n", "False | \n", "0.438183 | \n", "0.595072 | \n", "0.222955 | \n", "0.184872 | \n", "False | \n", "0.159074 | \n", "0.173036 | \n", "0.222507 | \n", "
856 | \n", "3266 | \n", "O60885 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "InChI=1S/C44H53Cl2N9O8S/c1-29-30(2)64-44-41(29... | \n", "UUCUKSPUFPMKNK-DHUJRADRSA-N | \n", "938.936 | \n", "64 | \n", "6 | \n", "24 | \n", "... | \n", "0.472904 | \n", "False | \n", "0.470890 | \n", "0.724864 | \n", "0.222957 | \n", "0.122594 | \n", "False | \n", "0.107752 | \n", "0.154437 | \n", "0.105594 | \n", "
857 rows × 45 columns
\n", "\n", " | xgb_mean | \n", "xgb_model_n0 | \n", "xgb_model_n1 | \n", "xgb_model_n2 | \n", "
---|---|---|---|---|
count | \n", "857.000000 | \n", "857.000000 | \n", "857.000000 | \n", "857.000000 | \n", "
mean | \n", "0.519258 | \n", "0.519648 | \n", "0.518779 | \n", "0.519347 | \n", "
std | \n", "0.444579 | \n", "0.443365 | \n", "0.443698 | \n", "0.447897 | \n", "
min | \n", "0.010143 | \n", "0.010036 | \n", "0.011076 | \n", "0.008641 | \n", "
25% | \n", "0.030862 | \n", "0.030901 | \n", "0.033069 | \n", "0.029464 | \n", "
50% | \n", "0.767127 | \n", "0.766466 | \n", "0.729913 | \n", "0.788420 | \n", "
75% | \n", "0.961856 | \n", "0.962569 | \n", "0.961087 | \n", "0.965674 | \n", "
max | \n", "0.988086 | \n", "0.988136 | \n", "0.986411 | \n", "0.990691 | \n", "
\n", " | Uniprot | \n", "Cell Line Identifier | \n", "Smiles | \n", "E3 Ligase | \n", "DC50 (nM) | \n", "Dmax (%) | \n", "Active | \n", "dnn_majority_vote | \n", "xgb_majority_vote | \n", "
---|---|---|---|---|---|---|---|---|---|
5 | \n", "Q9NWZ3 | \n", "PH1-PBMCs-hiPSC4F1 | \n", "CC[C@@H]1[C@H](F)C(=O)N[C@@H]1COc1ncc(C#CCCCCC... | \n", "VHL | \n", "259.0 | \n", "90.0 | \n", "True | \n", "False | \n", "True | \n", "
6 | \n", "Q9NWZ3 | \n", "PH1-PBMCs-hiPSC4F1 | \n", "CC[C@@H]1[C@H](F)C(=O)N[C@@H]1COc1ncc(C#CCN2CC... | \n", "VHL | \n", "151.0 | \n", "90.0 | \n", "True | \n", "False | \n", "True | \n", "
60 | \n", "P10275 | \n", "LNCaP | \n", "CC(=O)N[C@H](C(=O)N1C[C@@H](O)C[C@@H]1C(=O)N[C... | \n", "VHL | \n", "NaN | \n", "5.0 | \n", "False | \n", "True | \n", "False | \n", "
61 | \n", "O60885 | \n", "22Rv1 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "CRBN | \n", "NaN | \n", "36.0 | \n", "False | \n", "True | \n", "False | \n", "
68 | \n", "P10275 | \n", "VCaP | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "15.0 | \n", "76.0 | \n", "True | \n", "False | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
831 | \n", "P10415 | \n", "293T FOXP3 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "NaN | \n", "47.0 | \n", "False | \n", "True | \n", "False | \n", "
844 | \n", "Q07817 | \n", "293T FOXP3 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "NaN | \n", "38.0 | \n", "False | \n", "True | \n", "False | \n", "
851 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "810.0 | \n", "71.0 | \n", "True | \n", "False | \n", "True | \n", "
852 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCO... | \n", "FEM1B | \n", "820.0 | \n", "81.0 | \n", "True | \n", "False | \n", "True | \n", "
853 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCC... | \n", "FEM1B | \n", "250.0 | \n", "94.0 | \n", "True | \n", "False | \n", "True | \n", "
134 rows × 9 columns
\n", "\n", " | Compound ID | \n", "Uniprot | \n", "Smiles | \n", "E3 Ligase | \n", "InChI | \n", "InChI Key | \n", "Molecular Weight | \n", "Heavy Atom Count | \n", "Ring Count | \n", "Rotatable Bond Count | \n", "... | \n", "POI Sequence | \n", "E3 Ligase Uniprot | \n", "E3 Ligase Sequence | \n", "Cell Line Identifier | \n", "Active - OR | \n", "Active | \n", "Avg Tanimoto | \n", "Uniprot Group | \n", "E3 Group | \n", "Tanimoto Group | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2434 | \n", "Q92769 | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "InChI=1S/C48H62FN7O8S/c1-31-43(65-30-52-31)33-... | \n", "AGMBKWPLZPKEGR-OUZJXKGJSA-N | \n", "916.130 | \n", "65 | \n", "5 | \n", "23 | \n", "... | \n", "MAYSQGGGKKKVCYYYDGDIGNYYYGQGHPMKPHRIRMTHNLLLNY... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "HCT116-53BPI(+/-) | \n", "False | \n", "False | \n", "0.378412 | \n", "57 | \n", "6 | \n", "64 | \n", "
1 | \n", "2004 | \n", "Q9NWZ3 | \n", "CC(C)Nc1cc(-n2ccc3cc(C#N)cnc32)ncc1C(=O)N[C@H]... | \n", "CRBN | \n", "InChI=1S/C41H42N10O8/c1-22(2)47-29-17-32(50-15... | \n", "YMXWKCAXGFLWAH-MONLTZSXSA-N | \n", "802.849 | \n", "59 | \n", "7 | \n", "13 | \n", "... | \n", "MNKPITPSTYVRCLNVGLIRKLSDFIDPQEGWKKLAVAIKKPSGDD... | \n", "Q96SW2 | \n", "MAGEGDQQDAAHNMGNHLPLLPAESEEEDEMEVEDQDSKEAKKPNI... | \n", "HEK293T | \n", "False | \n", "False | \n", "0.412830 | \n", "63 | \n", "0 | \n", "95 | \n", "
2 | \n", "549 | \n", "P03372 | \n", "CCN(CCCCCCCC(=O)N[C@H](C(=O)N1C[C@H](O)C[C@H]1... | \n", "VHL | \n", "InChI=1S/C55H65N5O8S2/c1-6-59(28-29-68-43-24-1... | \n", "IDYPSHVHIRZTPO-VADRYRRNSA-N | \n", "988.286 | \n", "70 | \n", "7 | \n", "22 | \n", "... | \n", "MTMTLHTKASGMALLHQIQGNELEPLNRPQLKIPLERPLGEVYLDS... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MCF-7 | \n", "False | \n", "False | \n", "0.374558 | \n", "13 | \n", "6 | \n", "61 | \n", "
3 | \n", "1098 | \n", "P04035 | \n", "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2cccc(OCCCc3cn(CCO... | \n", "CRBN | \n", "InChI=1S/C59H67FN8O13/c1-37(2)54-53(57(75)62-4... | \n", "FLZOWPIEHOMVRX-OEUTXLNUSA-N | \n", "1115.226 | \n", "81 | \n", "8 | \n", "31 | \n", "... | \n", "MLSRLFRMHGLFVASHPWEVIVGTVTLTICMMSMNMFTGNNKICGW... | \n", "Q96SW2 | \n", "MAGEGDQQDAAHNMGNHLPLLPAESEEEDEMEVEDQDSKEAKKPNI... | \n", "NaN | \n", "False | \n", "False | \n", "0.359937 | \n", "14 | \n", "0 | \n", "47 | \n", "
4 | \n", "937 | \n", "O75530 | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "InChI=1S/C43H48N10O6S/c1-26-36(60-25-48-26)29-... | \n", "PPNNFXIBKLCMTI-WXEAQWFJSA-N | \n", "832.988 | \n", "60 | \n", "7 | \n", "15 | \n", "... | \n", "MSEREVSTAPAGTDMPAAKKQKLSSDENSNPDLSGDENDDAVSIES... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "DB | \n", "False | \n", "True | \n", "0.406916 | \n", "8 | \n", "6 | \n", "90 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
81 | \n", "1103 | \n", "P04035 | \n", "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2cccc(OCCCCCc3cn(C... | \n", "CRBN | \n", "InChI=1S/C63H75FN8O14/c1-41(2)58-57(61(79)66-4... | \n", "METKOBDLEKDPEQ-MOHJDXDLSA-N | \n", "1187.333 | \n", "86 | \n", "8 | \n", "36 | \n", "... | \n", "MLSRLFRMHGLFVASHPWEVIVGTVTLTICMMSMNMFTGNNKICGW... | \n", "Q96SW2 | \n", "MAGEGDQQDAAHNMGNHLPLLPAESEEEDEMEVEDQDSKEAKKPNI... | \n", "NaN | \n", "False | \n", "False | \n", "0.329734 | \n", "14 | \n", "0 | \n", "21 | \n", "
82 | \n", "2424 | \n", "O15379 | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "InChI=1S/C46H59N7O7S/c1-30-41(61-29-49-30)32-1... | \n", "QANLZMZKZIPVPD-DRTKLYQKSA-N | \n", "854.087 | \n", "61 | \n", "5 | \n", "20 | \n", "... | \n", "MAKTVAYFYDPDVGNFHYGAGHPMKPHRLALTHSLVLHYGLYKKMI... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "HCT116-53BPI(+/-) | \n", "False | \n", "False | \n", "0.390991 | \n", "3 | \n", "6 | \n", "75 | \n", "
83 | \n", "335 | \n", "O60885 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCn... | \n", "CRBN | \n", "InChI=1S/C41H41ClN10O8S/c1-22-23(2)61-41-34(22... | \n", "QDKBKEFTDKCTPJ-UFXYQILXSA-N | \n", "869.361 | \n", "61 | \n", "8 | \n", "16 | \n", "... | \n", "MSAESGPGTRLRNLPVMGDGLETSQMSTTQAQAQPQPANAASTNPP... | \n", "Q96SW2 | \n", "MAGEGDQQDAAHNMGNHLPLLPAESEEEDEMEVEDQDSKEAKKPNI... | \n", "NCI-H661 | \n", "False | \n", "False | \n", "0.376327 | \n", "7 | \n", "0 | \n", "62 | \n", "
84 | \n", "2520 | \n", "P10275 | \n", "CCN(c1ccc(C#N)c(Cl)c1)[C@H]1CC[C@H](NC(=O)c2cc... | \n", "CRBN | \n", "InChI=1S/C44H49ClN8O5/c1-2-52(35-10-5-29(27-46... | \n", "VZVAWYNZTBXESG-BULKPCKOSA-N | \n", "805.380 | \n", "58 | \n", "8 | \n", "9 | \n", "... | \n", "MEVQLGLGRVYPRPPSKTYRGAFQNLFQSVREVIQNPGPRHPEAAS... | \n", "Q96SW2 | \n", "MAGEGDQQDAAHNMGNHLPLLPAESEEEDEMEVEDQDSKEAKKPNI... | \n", "VCaP | \n", "True | \n", "True | \n", "0.390809 | \n", "17 | \n", "0 | \n", "75 | \n", "
85 | \n", "2528 | \n", "P10275 | \n", "CN(c1ccc(C#N)c(Cl)c1)[C@H]1CC[C@H](NC(=O)c2ccc... | \n", "CRBN | \n", "InChI=1S/C41H43ClN8O5/c1-46(30-9-4-26(22-43)35... | \n", "RNZSZCNUPQYOIX-RVWRWSNSSA-N | \n", "763.299 | \n", "55 | \n", "8 | \n", "8 | \n", "... | \n", "MEVQLGLGRVYPRPPSKTYRGAFQNLFQSVREVIQNPGPRHPEAAS... | \n", "Q96SW2 | \n", "MAGEGDQQDAAHNMGNHLPLLPAESEEEDEMEVEDQDSKEAKKPNI... | \n", "VCaP | \n", "True | \n", "True | \n", "0.439834 | \n", "17 | \n", "0 | \n", "117 | \n", "
86 rows × 39 columns
\n", "\n", " | Compound ID | \n", "Uniprot | \n", "Smiles | \n", "E3 Ligase | \n", "InChI | \n", "InChI Key | \n", "Molecular Weight | \n", "Heavy Atom Count | \n", "Ring Count | \n", "Rotatable Bond Count | \n", "... | \n", "dnn_mean | \n", "dnn_majority_vote | \n", "dnn_model_n0 | \n", "dnn_model_n1 | \n", "dnn_model_n2 | \n", "xgb_mean | \n", "xgb_majority_vote | \n", "xgb_model_n0 | \n", "xgb_model_n1 | \n", "xgb_model_n2 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2434 | \n", "Q92769 | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "InChI=1S/C48H62FN7O8S/c1-31-43(65-30-52-31)33-... | \n", "AGMBKWPLZPKEGR-OUZJXKGJSA-N | \n", "916.130 | \n", "65 | \n", "5 | \n", "23 | \n", "... | \n", "0.079379 | \n", "False | \n", "0.170209 | \n", "0.063416 | \n", "0.004514 | \n", "0.026209 | \n", "False | \n", "0.024188 | \n", "0.029409 | \n", "0.025029 | \n", "
1 | \n", "2004 | \n", "Q9NWZ3 | \n", "CC(C)Nc1cc(-n2ccc3cc(C#N)cnc32)ncc1C(=O)N[C@H]... | \n", "CRBN | \n", "InChI=1S/C41H42N10O8/c1-22(2)47-29-17-32(50-15... | \n", "YMXWKCAXGFLWAH-MONLTZSXSA-N | \n", "802.849 | \n", "59 | \n", "7 | \n", "13 | \n", "... | \n", "0.435443 | \n", "False | \n", "0.269258 | \n", "0.663059 | \n", "0.374012 | \n", "0.721998 | \n", "True | \n", "0.766466 | \n", "0.608752 | \n", "0.790776 | \n", "
2 | \n", "549 | \n", "P03372 | \n", "CCN(CCCCCCCC(=O)N[C@H](C(=O)N1C[C@H](O)C[C@H]1... | \n", "VHL | \n", "InChI=1S/C55H65N5O8S2/c1-6-59(28-29-68-43-24-1... | \n", "IDYPSHVHIRZTPO-VADRYRRNSA-N | \n", "988.286 | \n", "70 | \n", "7 | \n", "22 | \n", "... | \n", "0.257002 | \n", "False | \n", "0.435150 | \n", "0.277998 | \n", "0.057857 | \n", "0.199748 | \n", "False | \n", "0.306645 | \n", "0.227117 | \n", "0.065481 | \n", "
3 | \n", "1098 | \n", "P04035 | \n", "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2cccc(OCCCc3cn(CCO... | \n", "CRBN | \n", "InChI=1S/C59H67FN8O13/c1-37(2)54-53(57(75)62-4... | \n", "FLZOWPIEHOMVRX-OEUTXLNUSA-N | \n", "1115.226 | \n", "81 | \n", "8 | \n", "31 | \n", "... | \n", "0.048816 | \n", "False | \n", "0.046254 | \n", "0.098125 | \n", "0.002068 | \n", "0.010581 | \n", "False | \n", "0.010565 | \n", "0.012536 | \n", "0.008641 | \n", "
4 | \n", "937 | \n", "O75530 | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "InChI=1S/C43H48N10O6S/c1-26-36(60-25-48-26)29-... | \n", "PPNNFXIBKLCMTI-WXEAQWFJSA-N | \n", "832.988 | \n", "60 | \n", "7 | \n", "15 | \n", "... | \n", "0.538979 | \n", "True | \n", "0.644068 | \n", "0.732764 | \n", "0.240103 | \n", "0.902249 | \n", "True | \n", "0.873345 | \n", "0.916330 | \n", "0.917072 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
81 | \n", "1103 | \n", "P04035 | \n", "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2cccc(OCCCCCc3cn(C... | \n", "CRBN | \n", "InChI=1S/C63H75FN8O14/c1-41(2)58-57(61(79)66-4... | \n", "METKOBDLEKDPEQ-MOHJDXDLSA-N | \n", "1187.333 | \n", "86 | \n", "8 | \n", "36 | \n", "... | \n", "0.042708 | \n", "False | \n", "0.052732 | \n", "0.073325 | \n", "0.002068 | \n", "0.010631 | \n", "False | \n", "0.010794 | \n", "0.012440 | \n", "0.008659 | \n", "
82 | \n", "2424 | \n", "O15379 | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "InChI=1S/C46H59N7O7S/c1-30-41(61-29-49-30)32-1... | \n", "QANLZMZKZIPVPD-DRTKLYQKSA-N | \n", "854.087 | \n", "61 | \n", "5 | \n", "20 | \n", "... | \n", "0.146975 | \n", "False | \n", "0.251751 | \n", "0.182186 | \n", "0.006988 | \n", "0.616503 | \n", "True | \n", "0.532181 | \n", "0.683957 | \n", "0.633372 | \n", "
83 | \n", "335 | \n", "O60885 | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCn... | \n", "CRBN | \n", "InChI=1S/C41H41ClN10O8S/c1-22-23(2)61-41-34(22... | \n", "QDKBKEFTDKCTPJ-UFXYQILXSA-N | \n", "869.361 | \n", "61 | \n", "8 | \n", "16 | \n", "... | \n", "0.190578 | \n", "False | \n", "0.189161 | \n", "0.351043 | \n", "0.031529 | \n", "0.029169 | \n", "False | \n", "0.033773 | \n", "0.033058 | \n", "0.020677 | \n", "
84 | \n", "2520 | \n", "P10275 | \n", "CCN(c1ccc(C#N)c(Cl)c1)[C@H]1CC[C@H](NC(=O)c2cc... | \n", "CRBN | \n", "InChI=1S/C44H49ClN8O5/c1-2-52(35-10-5-29(27-46... | \n", "VZVAWYNZTBXESG-BULKPCKOSA-N | \n", "805.380 | \n", "58 | \n", "8 | \n", "9 | \n", "... | \n", "0.558503 | \n", "True | \n", "0.788242 | \n", "0.655330 | \n", "0.231937 | \n", "0.910048 | \n", "True | \n", "0.901576 | \n", "0.915484 | \n", "0.913085 | \n", "
85 | \n", "2528 | \n", "P10275 | \n", "CN(c1ccc(C#N)c(Cl)c1)[C@H]1CC[C@H](NC(=O)c2ccc... | \n", "CRBN | \n", "InChI=1S/C41H43ClN8O5/c1-46(30-9-4-26(22-43)35... | \n", "RNZSZCNUPQYOIX-RVWRWSNSSA-N | \n", "763.299 | \n", "55 | \n", "8 | \n", "8 | \n", "... | \n", "0.519538 | \n", "True | \n", "0.898071 | \n", "0.482898 | \n", "0.177645 | \n", "0.960892 | \n", "True | \n", "0.956234 | \n", "0.954487 | \n", "0.971954 | \n", "
86 rows × 49 columns
\n", "\n", " | xgb_mean | \n", "xgb_model_n0 | \n", "xgb_model_n1 | \n", "xgb_model_n2 | \n", "
---|---|---|---|---|
count | \n", "86.000000 | \n", "86.000000 | \n", "86.000000 | \n", "86.000000 | \n", "
mean | \n", "0.556621 | \n", "0.560646 | \n", "0.551290 | \n", "0.557927 | \n", "
std | \n", "0.376574 | \n", "0.377049 | \n", "0.378438 | \n", "0.384407 | \n", "
min | \n", "0.010581 | \n", "0.010565 | \n", "0.012440 | \n", "0.008641 | \n", "
25% | \n", "0.139769 | \n", "0.149495 | \n", "0.137282 | \n", "0.128790 | \n", "
50% | \n", "0.655310 | \n", "0.669479 | \n", "0.657044 | \n", "0.661293 | \n", "
75% | \n", "0.934663 | \n", "0.938701 | \n", "0.934932 | \n", "0.932825 | \n", "
max | \n", "0.983716 | \n", "0.984821 | \n", "0.984761 | \n", "0.984929 | \n", "
\n", " | Uniprot | \n", "Cell Line Identifier | \n", "Smiles | \n", "E3 Ligase | \n", "DC50 (nM) | \n", "Dmax (%) | \n", "Active | \n", "dnn_majority_vote | \n", "xgb_majority_vote | \n", "
---|---|---|---|---|---|---|---|---|---|
1 | \n", "Q9NWZ3 | \n", "HEK293T | \n", "CC(C)Nc1cc(-n2ccc3cc(C#N)cnc32)ncc1C(=O)N[C@H]... | \n", "CRBN | \n", "NaN | \n", "46.0 | \n", "False | \n", "False | \n", "True | \n", "
7 | \n", "Q06187 | \n", "THP-1 | \n", "NC(=O)c1c(-c2ccc(Oc3ccc(F)cc3F)cc2)nn([C@@H]2C... | \n", "CRBN | \n", "398.5 | \n", "68.0 | \n", "True | \n", "False | \n", "True | \n", "
13 | \n", "P11802 | \n", "Jurkat | \n", "CC(=O)c1c(C)c2cnc(Nc3ccc(N4CCN(CC(=O)NCCOCCOCC... | \n", "VHL | \n", "2512.0 | \n", "NaN | \n", "False | \n", "True | \n", "False | \n", "
14 | \n", "P10275 | \n", "VCaP | \n", "N#Cc1ccc(O[C@H]2CC[C@H](NC(=O)c3ccc(N4CCN(CCNc... | \n", "CRBN | \n", "0.8 | \n", "77.0 | \n", "True | \n", "False | \n", "True | \n", "
30 | \n", "P00533 | \n", "NCI-H3255 | \n", "COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCCCOCCOCC... | \n", "VHL | \n", "22.3 | \n", "96.6 | \n", "True | \n", "False | \n", "True | \n", "
36 | \n", "P33981 | \n", "HCT 116 | \n", "CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... | \n", "CRBN | \n", "25.4 | \n", "57.0 | \n", "False | \n", "False | \n", "True | \n", "
44 | \n", "Q06187 | \n", "Ramos | \n", "NC(=O)c1c(-c2ccc(Oc3ccc(F)cc3F)cc2)nn([C@@H]2C... | \n", "CRBN | \n", "36.9 | \n", "85.0 | \n", "True | \n", "False | \n", "True | \n", "
48 | \n", "Q9NWZ3 | \n", "HEK293T | \n", "CC(C)Nc1cc(-n2ccc3cc(C#N)cnc32)ncc1C(=O)N[C@H]... | \n", "CRBN | \n", "190.0 | \n", "93.0 | \n", "True | \n", "False | \n", "True | \n", "
50 | \n", "Q9UM73 | \n", "SR | \n", "CCc1cc2c(cc1N1CCC(N3CCN(C(=O)CNc4cccc5c4CN(C4C... | \n", "CRBN | \n", "6.1 | \n", "87.7 | \n", "True | \n", "False | \n", "True | \n", "
53 | \n", "P10275 | \n", "VCaP | \n", "N#Cc1ccc(O[C@H]2CC[C@H](NC(=O)c3ccc(NCCCCCCCCC... | \n", "CRBN | \n", "1000.0 | \n", "42.0 | \n", "False | \n", "False | \n", "True | \n", "
55 | \n", "Q9UBN7 | \n", "NaN | \n", "CCCCN(Cc1ccc(C(=O)NO)cc1)C(=O)Nc1ccc(OCCCCCn2c... | \n", "CRBN | \n", "18.0 | \n", "45.0 | \n", "False | \n", "False | \n", "True | \n", "
58 | \n", "Q16539 | \n", "MDA-MB-231 | \n", "COc1cc2c(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4... | \n", "VHL | \n", "230.0 | \n", "90.1 | \n", "True | \n", "False | \n", "True | \n", "
61 | \n", "P33981 | \n", "HCT 116 | \n", "CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... | \n", "CRBN | \n", "58.1 | \n", "53.0 | \n", "False | \n", "False | \n", "True | \n", "
62 | \n", "Q96SW2 | \n", "HEK293T | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "20.4 | \n", "99.6 | \n", "True | \n", "False | \n", "True | \n", "
66 | \n", "P01116 | \n", "NCI-H358 | \n", "C=C(F)C(=O)N1CCN(c2nc(OC[C@@H]3CCCN3CCCOCCC(=O... | \n", "VHL | \n", "520.0 | \n", "40.0 | \n", "False | \n", "False | \n", "True | \n", "
69 | \n", "O60885 | \n", "HEK293T | \n", "Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=N[C@@H](CC(=O)NCCC... | \n", "FEM1B | \n", "250.0 | \n", "94.0 | \n", "True | \n", "False | \n", "True | \n", "
76 | \n", "P10275 | \n", "LNCaP | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "50.0 | \n", "71.0 | \n", "True | \n", "False | \n", "True | \n", "
82 | \n", "O15379 | \n", "HCT116-53BPI(+/-) | \n", "Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... | \n", "VHL | \n", "640.0 | \n", "59.0 | \n", "False | \n", "False | \n", "True | \n", "