{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Best Fingerprint Parameters\n", "\n", "This notebook is used to identify the best parameters for encoding PROTAC SMILES as Morgan fingerprints.\n", "\n", "It uses Optuna to optimize the parameters." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "from collections import defaultdict\n", "import warnings\n", "import logging\n", "from typing import Literal\n", "\n", "sys.path.append('~/PROTAC-Degradation-Predictor/protac_degradation_predictor')\n", "import protac_degradation_predictor as pdp\n", "\n", "import pytorch_lightning as pl\n", "from rdkit import Chem\n", "from rdkit.Chem import AllChem\n", "from rdkit import DataStructs\n", "from jsonargparse import CLI\n", "import pandas as pd\n", "# Import tqdm for notebook\n", "from tqdm.notebook import tqdm\n", "import numpy as np\n", "from sklearn.preprocessing import OrdinalEncoder\n", "from sklearn.model_selection import (\n", " StratifiedKFold,\n", " StratifiedGroupKFold,\n", ")\n", "\n", "\n", "active_col = 'Active (Dmax 0.6, pDC50 6.0)'\n", "pDC50_threshold = 6.0\n", "Dmax_threshold = 0.6\n", "\n", "protac_df = pd.read_csv('~/PROTAC-Degradation-Predictor/data/PROTAC-Degradation-DB.csv')\n", "protac_df['E3 Ligase'] = protac_df['E3 Ligase'].str.replace('Iap', 'IAP')\n", "protac_df[active_col] = protac_df.apply(\n", " lambda x: pdp.is_active(x['DC50 (nM)'], x['Dmax (%)'], pDC50_threshold=pDC50_threshold, Dmax_threshold=Dmax_threshold), axis=1\n", ")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "771" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_random_split_indices(active_df: pd.DataFrame, test_split: float) -> pd.Index:\n", " \"\"\" Get the indices of the test set using a random split.\n", " \n", " Args:\n", " active_df (pd.DataFrame): The DataFrame containing the active PROTACs.\n", " test_split (float): The percentage of the active PROTACs to use as the test set.\n", " \n", " Returns:\n", " pd.Index: The indices of the test set.\n", " \"\"\"\n", " test_df = active_df.sample(frac=test_split, random_state=42)\n", " return test_df.index\n", "\n", "protac_df['pDC50'] = -np.log10(protac_df['DC50 (nM)'] * 1e-9)\n", "active_df = protac_df[protac_df[active_col].notna()].copy()\n", "test_split = 0.1\n", "test_indices = get_random_split_indices(active_df, test_split)\n", "train_val_df = active_df[~active_df.index.isin(test_indices)].copy()\n", "len(train_val_df)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6.935675466781487" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get the mean of pDC50 for the active PROTACs\n", "mean_pDC50 = active_df['pDC50'].mean()\n", "mean_pDC50" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Compound ID', 'Uniprot', 'Smiles', 'E3 Ligase', 'InChI', 'InChI Key',\n", " 'Molecular Weight', 'Heavy Atom Count', 'Ring Count',\n", " 'Rotatable Bond Count', 'Topological Polar Surface Area',\n", " 'Hydrogen Bond Acceptor Count', 'Hydrogen Bond Donor Count',\n", " 'Cell Type', 'Treatment Time (h)', 'DC50 (nM)', 'Dmax (%)', 'Active',\n", " 'Article DOI', 'Comments', 'Database', 'Molecular Formula', 'cLogP',\n", " 'Target', 'PDB', 'Name', 'Assay (DC50/Dmax)', 'Exact Mass', 'XLogP3',\n", " 'Target (Parsed)', 'POI Sequence', 'E3 Ligase Uniprot',\n", " 'E3 Ligase Sequence', 'Cell Line Identifier', 'Active - OR',\n", " 'Active (Dmax 0.6, pDC50 6.0)', 'pDC50'],\n", " dtype='object')" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_val_df.columns" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
69C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC...H1975/WRTrue5.9100.0P00533VHLNaN10.1016/j.ejmech.2020.112199
1229C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC...A431 siYAPFalse2000.0NaNP00533VHLDegradation of EGFR in A431 cells after 16 h t...10.1016/j.ejmech.2020.112199
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "69 C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC... H1975/WR \n", "1229 C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CC... A431 siYAP \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "69 True 5.9 100.0 P00533 VHL \n", "1229 False 2000.0 NaN P00533 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "69 NaN \n", "1229 Degradation of EGFR in A431 cells after 16 h t... \n", "\n", " Article DOI \n", "69 10.1016/j.ejmech.2020.112199 \n", "1229 10.1016/j.ejmech.2020.112199 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CCN... & H1975/WR & True & 8.229148 & 100.0 & P00533 & VHL & NaN & 10.1016/j.ejmech.2020.112199 \\\\\n", " NaN & C=CC(=O)Nc1cccc(-n2c(=O)cc(C)c3cnc(Nc4ccc(N5CCN... & A431 siYAP & False & 5.698970 & NaN & P00533 & VHL & Degradation of EGFR in A431 cells after 16 h tr... & 10.1016/j.ejmech.2020.112199 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1793CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...COLO 205True30.2864.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
1794CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...HCT 116False1000.0028.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1793 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... COLO 205 \n", "1794 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... HCT 116 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1793 True 30.28 64.0 P33981 CRBN \n", "1794 False 1000.00 28.0 P33981 CRBN \n", "\n", " Assay (DC50/Dmax) \\\n", "1793 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "1794 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "\n", " Article DOI \n", "1793 10.1021/acs.jmedchem.1c01768 \n", "1794 10.1021/acs.jmedchem.1c01768 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & COLO 205 & True & 7.518844 & 64.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & HCT 116 & False & 6.000000 & 28.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1795CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...COLO 205True31.3572.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
1796CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...HCT 116False1000.0041.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1795 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... COLO 205 \n", "1796 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... HCT 116 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1795 True 31.35 72.0 P33981 CRBN \n", "1796 False 1000.00 41.0 P33981 CRBN \n", "\n", " Assay (DC50/Dmax) \\\n", "1795 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "1796 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "\n", " Article DOI \n", "1795 10.1021/acs.jmedchem.1c01768 \n", "1796 10.1021/acs.jmedchem.1c01768 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & COLO 205 & True & 7.503762 & 72.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & HCT 116 & False & 6.000000 & 41.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1771CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...COLO 205True4.9776.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
1772CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...HCT 116False20.3050.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1771 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... COLO 205 \n", "1772 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... HCT 116 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1771 True 4.97 76.0 P33981 CRBN \n", "1772 False 20.30 50.0 P33981 CRBN \n", "\n", " Assay (DC50/Dmax) \\\n", "1771 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "1772 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "\n", " Article DOI \n", "1771 10.1021/acs.jmedchem.1c01768 \n", "1772 10.1021/acs.jmedchem.1c01768 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & COLO 205 & True & 8.303644 & 76.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & HCT 116 & False & 7.692504 & 50.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1789CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...COLO 205True7.0384.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
1790CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...HCT 116False161.0043.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1789 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... COLO 205 \n", "1790 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... HCT 116 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1789 True 7.03 84.0 P33981 CRBN \n", "1790 False 161.00 43.0 P33981 CRBN \n", "\n", " Assay (DC50/Dmax) \\\n", "1789 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "1790 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "\n", " Article DOI \n", "1789 10.1021/acs.jmedchem.1c01768 \n", "1790 10.1021/acs.jmedchem.1c01768 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & COLO 205 & True & 8.153045 & 84.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & HCT 116 & False & 6.793174 & 43.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1781CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...COLO 205True3.0485.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
1782CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...HCT 116False663.0050.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1781 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... COLO 205 \n", "1782 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... HCT 116 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1781 True 3.04 85.0 P33981 CRBN \n", "1782 False 663.00 50.0 P33981 CRBN \n", "\n", " Assay (DC50/Dmax) \\\n", "1781 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "1782 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "\n", " Article DOI \n", "1781 10.1021/acs.jmedchem.1c01768 \n", "1782 10.1021/acs.jmedchem.1c01768 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & COLO 205 & True & 8.517126 & 85.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & HCT 116 & False & 6.178486 & 50.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1799CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...COLO 205True21.7188.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
1800CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc...HCT 116False1000.0013.0P33981CRBNDegradation of TTK in COLO-205/HCT-116 cells a...10.1021/acs.jmedchem.1c01768
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1799 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... COLO 205 \n", "1800 CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4cc... HCT 116 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1799 True 21.71 88.0 P33981 CRBN \n", "1800 False 1000.00 13.0 P33981 CRBN \n", "\n", " Assay (DC50/Dmax) \\\n", "1799 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "1800 Degradation of TTK in COLO-205/HCT-116 cells a... \n", "\n", " Article DOI \n", "1799 10.1021/acs.jmedchem.1c01768 \n", "1800 10.1021/acs.jmedchem.1c01768 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & COLO 205 & True & 7.66334 & 88.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", " NaN & CCC(=O)N[C@H]1CC[C@@H](n2c(=O)cc(C)c3cnc(Nc4ccc... & HCT 116 & False & 6.00000 & 13.0 & P33981 & CRBN & Degradation of TTK in COLO-205/HCT-116 cells af... & 10.1021/acs.jmedchem.1c01768 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
516COc1cc(-c2cn(C)c(=O)c3cnccc23)cc(OC)c1CN1CCN(C...IH-1False560.010.0Q9H8M2VHLNaN10.1021/acs.jmedchem.8b01413
925COc1cc(-c2cn(C)c(=O)c3cnccc23)cc(OC)c1CN1CCN(C...HeLaTrue560.080.0Q9H8M2VHLDegradation of BRD9 in HeLa cells after 4 h tr...10.1021/acs.jmedchem.8b01413
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "516 COc1cc(-c2cn(C)c(=O)c3cnccc23)cc(OC)c1CN1CCN(C... IH-1 \n", "925 COc1cc(-c2cn(C)c(=O)c3cnccc23)cc(OC)c1CN1CCN(C... HeLa \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "516 False 560.0 10.0 Q9H8M2 VHL \n", "925 True 560.0 80.0 Q9H8M2 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "516 NaN \n", "925 Degradation of BRD9 in HeLa cells after 4 h tr... \n", "\n", " Article DOI \n", "516 10.1021/acs.jmedchem.8b01413 \n", "925 10.1021/acs.jmedchem.8b01413 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & COc1cc(-c2cn(C)c(=O)c3cnccc23)cc(OC)c1CN1CCN(CC... & IH-1 & False & 6.251812 & 10.0 & Q9H8M2 & VHL & NaN & 10.1021/acs.jmedchem.8b01413 \\\\\n", " NaN & COc1cc(-c2cn(C)c(=O)c3cnccc23)cc(OC)c1CN1CCN(CC... & HeLa & True & 6.251812 & 80.0 & Q9H8M2 & VHL & Degradation of BRD9 in HeLa cells after 4 h tre... & 10.1021/acs.jmedchem.8b01413 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1557COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...A549 Cas9True243.080.0Q05397VHLDegradation of Fak in A549/Hep3B2.1-7/SNU-387 ...10.1021/acs.jmedchem.8b01826
1558COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...NaNFalse631.059.0Q05397VHLDegradation of Fak in A549/Hep3B2.1-7/SNU-387 ...10.1021/acs.jmedchem.8b01826
1559COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...SNU-387True32.098.0Q05397VHLDegradation of Fak in A549/Hep3B2.1-7/SNU-387 ...10.1021/acs.jmedchem.8b01826
1560COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...HLETrue25.085.0Q05397VHLDegradation of Fak in HLE/HuH-7/SNU-423 cells ...10.1021/acs.jmedchem.8b01826
1561COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...Huh-7True100.079.0Q05397VHLDegradation of Fak in HLE/HuH-7/SNU-423 cells ...10.1021/acs.jmedchem.8b01826
1562COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...SNU-423True79.087.0Q05397VHLDegradation of Fak in HLE/HuH-7/SNU-423 cells ...10.1021/acs.jmedchem.8b01826
1563COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...HLFTrue40.066.0Q05397VHLDegradation of Fak in HLF/SNU-398/HUCCT1 cells...10.1021/acs.jmedchem.8b01826
1564COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...SNU-398True10.099.0Q05397VHLDegradation of Fak in HLF/SNU-398/HUCCT1 cells...10.1021/acs.jmedchem.8b01826
1565COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...HuCC-T1-G10True126.086.0Q05397VHLDegradation of Fak in HLF/SNU-398/HUCCT1 cells...10.1021/acs.jmedchem.8b01826
1566COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...HuH-1True79.079.0Q05397VHLDegradation of Fak in HUH-1/HepG2/SK-Hep-1 cel...10.1021/acs.jmedchem.8b01826
1567COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...HepG2 hALRTrue631.065.0Q05397VHLDegradation of Fak in HUH-1/HepG2/SK-Hep-1 cel...10.1021/acs.jmedchem.8b01826
1568COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@...SK-HEP-1True126.087.0Q05397VHLDegradation of Fak in HUH-1/HepG2/SK-Hep-1 cel...10.1021/acs.jmedchem.8b01826
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1557 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... A549 Cas9 \n", "1558 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... NaN \n", "1559 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... SNU-387 \n", "1560 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... HLE \n", "1561 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... Huh-7 \n", "1562 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... SNU-423 \n", "1563 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... HLF \n", "1564 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... SNU-398 \n", "1565 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... HuCC-T1-G10 \n", "1566 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... HuH-1 \n", "1567 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... HepG2 hALR \n", "1568 COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@... SK-HEP-1 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1557 True 243.0 80.0 Q05397 VHL \n", "1558 False 631.0 59.0 Q05397 VHL \n", "1559 True 32.0 98.0 Q05397 VHL \n", "1560 True 25.0 85.0 Q05397 VHL \n", "1561 True 100.0 79.0 Q05397 VHL \n", "1562 True 79.0 87.0 Q05397 VHL \n", "1563 True 40.0 66.0 Q05397 VHL \n", "1564 True 10.0 99.0 Q05397 VHL \n", "1565 True 126.0 86.0 Q05397 VHL \n", "1566 True 79.0 79.0 Q05397 VHL \n", "1567 True 631.0 65.0 Q05397 VHL \n", "1568 True 126.0 87.0 Q05397 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "1557 Degradation of Fak in A549/Hep3B2.1-7/SNU-387 ... \n", "1558 Degradation of Fak in A549/Hep3B2.1-7/SNU-387 ... \n", "1559 Degradation of Fak in A549/Hep3B2.1-7/SNU-387 ... \n", "1560 Degradation of Fak in HLE/HuH-7/SNU-423 cells ... \n", "1561 Degradation of Fak in HLE/HuH-7/SNU-423 cells ... \n", "1562 Degradation of Fak in HLE/HuH-7/SNU-423 cells ... \n", "1563 Degradation of Fak in HLF/SNU-398/HUCCT1 cells... \n", "1564 Degradation of Fak in HLF/SNU-398/HUCCT1 cells... \n", "1565 Degradation of Fak in HLF/SNU-398/HUCCT1 cells... \n", "1566 Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cel... \n", "1567 Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cel... \n", "1568 Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cel... \n", "\n", " Article DOI \n", "1557 10.1021/acs.jmedchem.8b01826 \n", "1558 10.1021/acs.jmedchem.8b01826 \n", "1559 10.1021/acs.jmedchem.8b01826 \n", "1560 10.1021/acs.jmedchem.8b01826 \n", "1561 10.1021/acs.jmedchem.8b01826 \n", "1562 10.1021/acs.jmedchem.8b01826 \n", "1563 10.1021/acs.jmedchem.8b01826 \n", "1564 10.1021/acs.jmedchem.8b01826 \n", "1565 10.1021/acs.jmedchem.8b01826 \n", "1566 10.1021/acs.jmedchem.8b01826 \n", "1567 10.1021/acs.jmedchem.8b01826 \n", "1568 10.1021/acs.jmedchem.8b01826 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", " Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & A549 Cas9 & True & 6.614394 & 80.0 & Q05397 & VHL & Degradation of Fak in A549/Hep3B2.1-7/SNU-387 c... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & NaN & False & 6.199971 & 59.0 & Q05397 & VHL & Degradation of Fak in A549/Hep3B2.1-7/SNU-387 c... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & SNU-387 & True & 7.494850 & 98.0 & Q05397 & VHL & Degradation of Fak in A549/Hep3B2.1-7/SNU-387 c... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & HLE & True & 7.602060 & 85.0 & Q05397 & VHL & Degradation of Fak in HLE/HuH-7/SNU-423 cells a... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & Huh-7 & True & 7.000000 & 79.0 & Q05397 & VHL & Degradation of Fak in HLE/HuH-7/SNU-423 cells a... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & SNU-423 & True & 7.102373 & 87.0 & Q05397 & VHL & Degradation of Fak in HLE/HuH-7/SNU-423 cells a... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & HLF & True & 7.397940 & 66.0 & Q05397 & VHL & Degradation of Fak in HLF/SNU-398/HUCCT1 cells ... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & SNU-398 & True & 8.000000 & 99.0 & Q05397 & VHL & Degradation of Fak in HLF/SNU-398/HUCCT1 cells ... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & HuCC-T1-G10 & True & 6.899629 & 86.0 & Q05397 & VHL & Degradation of Fak in HLF/SNU-398/HUCCT1 cells ... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & HuH-1 & True & 7.102373 & 79.0 & Q05397 & VHL & Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cell... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & HepG2 hALR & True & 6.199971 & 65.0 & Q05397 & VHL & Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cell... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-0319 & COc1cc(C(=O)NCCOCCOCCOCC(=O)N[C@H](C(=O)N2C[C@H... & SK-HEP-1 & True & 6.899629 & 87.0 & Q05397 & VHL & Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cell... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1545COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...HLETrue158.079.0Q05397CRBNDegradation of Fak in HLE/HuH-7/SNU-423 cells ...10.1021/acs.jmedchem.8b01826
1546COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...Huh-7True50.093.0Q05397CRBNDegradation of Fak in HLE/HuH-7/SNU-423 cells ...10.1021/acs.jmedchem.8b01826
1547COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...SNU-423True13.093.0Q05397CRBNDegradation of Fak in HLE/HuH-7/SNU-423 cells ...10.1021/acs.jmedchem.8b01826
1548COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...HuH-1False251.050.0Q05397CRBNDegradation of Fak in HUH-1/HepG2/SK-Hep-1 cel...10.1021/acs.jmedchem.8b01826
1549COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...HepG2 hALRTrue32.089.0Q05397CRBNDegradation of Fak in HUH-1/HepG2/SK-Hep-1 cel...10.1021/acs.jmedchem.8b01826
1550COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...SK-HEP-1True32.089.0Q05397CRBNDegradation of Fak in HUH-1/HepG2/SK-Hep-1 cel...10.1021/acs.jmedchem.8b01826
1551COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...A549 Cas9True27.095.0Q05397CRBNDegradation of Fak in A549/Hep3B2.1-7/SNU-387 ...10.1021/acs.jmedchem.8b01826
1552COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...NaNTrue13.096.0Q05397CRBNDegradation of Fak in A549/Hep3B2.1-7/SNU-387 ...10.1021/acs.jmedchem.8b01826
1553COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...SNU-387True25.090.0Q05397CRBNDegradation of Fak in A549/Hep3B2.1-7/SNU-387 ...10.1021/acs.jmedchem.8b01826
1555COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...SNU-398True3.095.0Q05397CRBNDegradation of Fak in HLF/SNU-398/HUCCT1 cells...10.1021/acs.jmedchem.8b01826
1556COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(...HuCC-T1-G10True13.090.0Q05397CRBNDegradation of Fak in HLF/SNU-398/HUCCT1 cells...10.1021/acs.jmedchem.8b01826
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1545 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... HLE \n", "1546 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... Huh-7 \n", "1547 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... SNU-423 \n", "1548 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... HuH-1 \n", "1549 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... HepG2 hALR \n", "1550 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... SK-HEP-1 \n", "1551 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... A549 Cas9 \n", "1552 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... NaN \n", "1553 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... SNU-387 \n", "1555 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... SNU-398 \n", "1556 COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(... HuCC-T1-G10 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1545 True 158.0 79.0 Q05397 CRBN \n", "1546 True 50.0 93.0 Q05397 CRBN \n", "1547 True 13.0 93.0 Q05397 CRBN \n", "1548 False 251.0 50.0 Q05397 CRBN \n", "1549 True 32.0 89.0 Q05397 CRBN \n", "1550 True 32.0 89.0 Q05397 CRBN \n", "1551 True 27.0 95.0 Q05397 CRBN \n", "1552 True 13.0 96.0 Q05397 CRBN \n", "1553 True 25.0 90.0 Q05397 CRBN \n", "1555 True 3.0 95.0 Q05397 CRBN \n", "1556 True 13.0 90.0 Q05397 CRBN \n", "\n", " Assay (DC50/Dmax) \\\n", "1545 Degradation of Fak in HLE/HuH-7/SNU-423 cells ... \n", "1546 Degradation of Fak in HLE/HuH-7/SNU-423 cells ... \n", "1547 Degradation of Fak in HLE/HuH-7/SNU-423 cells ... \n", "1548 Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cel... \n", "1549 Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cel... \n", "1550 Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cel... \n", "1551 Degradation of Fak in A549/Hep3B2.1-7/SNU-387 ... \n", "1552 Degradation of Fak in A549/Hep3B2.1-7/SNU-387 ... \n", "1553 Degradation of Fak in A549/Hep3B2.1-7/SNU-387 ... \n", "1555 Degradation of Fak in HLF/SNU-398/HUCCT1 cells... \n", "1556 Degradation of Fak in HLF/SNU-398/HUCCT1 cells... \n", "\n", " Article DOI \n", "1545 10.1021/acs.jmedchem.8b01826 \n", "1546 10.1021/acs.jmedchem.8b01826 \n", "1547 10.1021/acs.jmedchem.8b01826 \n", "1548 10.1021/acs.jmedchem.8b01826 \n", "1549 10.1021/acs.jmedchem.8b01826 \n", "1550 10.1021/acs.jmedchem.8b01826 \n", "1551 10.1021/acs.jmedchem.8b01826 \n", "1552 10.1021/acs.jmedchem.8b01826 \n", "1553 10.1021/acs.jmedchem.8b01826 \n", "1555 10.1021/acs.jmedchem.8b01826 \n", "1556 10.1021/acs.jmedchem.8b01826 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", " Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & HLE & True & 6.801343 & 79.0 & Q05397 & CRBN & Degradation of Fak in HLE/HuH-7/SNU-423 cells a... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & Huh-7 & True & 7.301030 & 93.0 & Q05397 & CRBN & Degradation of Fak in HLE/HuH-7/SNU-423 cells a... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & SNU-423 & True & 7.886057 & 93.0 & Q05397 & CRBN & Degradation of Fak in HLE/HuH-7/SNU-423 cells a... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & HuH-1 & False & 6.600326 & 50.0 & Q05397 & CRBN & Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cell... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & HepG2 hALR & True & 7.494850 & 89.0 & Q05397 & CRBN & Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cell... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & SK-HEP-1 & True & 7.494850 & 89.0 & Q05397 & CRBN & Degradation of Fak in HUH-1/HepG2/SK-Hep-1 cell... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & A549 Cas9 & True & 7.568636 & 95.0 & Q05397 & CRBN & Degradation of Fak in A549/Hep3B2.1-7/SNU-387 c... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & NaN & True & 7.886057 & 96.0 & Q05397 & CRBN & Degradation of Fak in A549/Hep3B2.1-7/SNU-387 c... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & SNU-387 & True & 7.602060 & 90.0 & Q05397 & CRBN & Degradation of Fak in A549/Hep3B2.1-7/SNU-387 c... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & SNU-398 & True & 8.522879 & 95.0 & Q05397 & CRBN & Degradation of Fak in HLF/SNU-398/HUCCT1 cells ... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "BI-3663 & COc1cc(C(=O)NCCOCCOCCOCCC(=O)Nc2cccc3c2C(=O)N(C... & HuCC-T1-G10 & True & 7.886057 & 90.0 & Q05397 & CRBN & Degradation of Fak in HLF/SNU-398/HUCCT1 cells ... & 10.1021/acs.jmedchem.8b01826 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1294Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O...HeLaTrue790.092.0O75530VHLDegradation of EED in HeLa/DB cells after 24 h...10.1016/j.chembiol.2019.11.006
1296Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O...HeLaTrue300.075.0Q15910VHLDegradation of EZH2 in HeLa/DB cells after 24 ...10.1016/j.chembiol.2019.11.006
1297Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O...DBTrue670.096.0Q15910VHLDegradation of EZH2 in HeLa/DB cells after 24 ...10.1016/j.chembiol.2019.11.006
1298Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O...HeLaFalseNaN22.0Q15022VHLDegradation of SUZ12 in HeLa/DB cells after 24...10.1016/j.chembiol.2019.11.006
1299Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O...DBTrue590.082.0Q15022VHLDegradation of SUZ12 in HeLa/DB cells after 24...10.1016/j.chembiol.2019.11.006
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1294 Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... HeLa \n", "1296 Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... HeLa \n", "1297 Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... DB \n", "1298 Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... HeLa \n", "1299 Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... DB \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1294 True 790.0 92.0 O75530 VHL \n", "1296 True 300.0 75.0 Q15910 VHL \n", "1297 True 670.0 96.0 Q15910 VHL \n", "1298 False NaN 22.0 Q15022 VHL \n", "1299 True 590.0 82.0 Q15022 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "1294 Degradation of EED in HeLa/DB cells after 24 h... \n", "1296 Degradation of EZH2 in HeLa/DB cells after 24 ... \n", "1297 Degradation of EZH2 in HeLa/DB cells after 24 ... \n", "1298 Degradation of SUZ12 in HeLa/DB cells after 24... \n", "1299 Degradation of SUZ12 in HeLa/DB cells after 24... \n", "\n", " Article DOI \n", "1294 10.1016/j.chembiol.2019.11.006 \n", "1296 10.1016/j.chembiol.2019.11.006 \n", "1297 10.1016/j.chembiol.2019.11.006 \n", "1298 10.1016/j.chembiol.2019.11.006 \n", "1299 10.1016/j.chembiol.2019.11.006 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", " Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", "UNC6852 & Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)... & HeLa & True & 6.102373 & 92.0 & O75530 & VHL & Degradation of EED in HeLa/DB cells after 24 h ... & 10.1016/j.chembiol.2019.11.006 \\\\\n", "UNC6852 & Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)... & HeLa & True & 6.522879 & 75.0 & Q15910 & VHL & Degradation of EZH2 in HeLa/DB cells after 24 h... & 10.1016/j.chembiol.2019.11.006 \\\\\n", "UNC6852 & Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)... & DB & True & 6.173925 & 96.0 & Q15910 & VHL & Degradation of EZH2 in HeLa/DB cells after 24 h... & 10.1016/j.chembiol.2019.11.006 \\\\\n", "UNC6852 & Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)... & HeLa & False & NaN & 22.0 & Q15022 & VHL & Degradation of SUZ12 in HeLa/DB cells after 24 ... & 10.1016/j.chembiol.2019.11.006 \\\\\n", "UNC6852 & Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)... & DB & True & 6.229148 & 82.0 & Q15022 & VHL & Degradation of SUZ12 in HeLa/DB cells after 24 ... & 10.1016/j.chembiol.2019.11.006 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1435Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O...LNCaPTrue50.071.0P10275VHLDegradation of AR in LNCaP/VCaP cells after 2-...10.1021/acsmedchemlett.9b00115
1436Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O...VCaPFalse50.051.0P10275VHLDegradation of AR in LNCaP/VCaP cells after 2-...10.1021/acsmedchemlett.9b00115
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1435 Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... LNCaP \n", "1436 Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O... VCaP \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1435 True 50.0 71.0 P10275 VHL \n", "1436 False 50.0 51.0 P10275 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "1435 Degradation of AR in LNCaP/VCaP cells after 2-... \n", "1436 Degradation of AR in LNCaP/VCaP cells after 2-... \n", "\n", " Article DOI \n", "1435 10.1021/acsmedchemlett.9b00115 \n", "1436 10.1021/acsmedchemlett.9b00115 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)... & LNCaP & True & 7.30103 & 71.0 & P10275 & VHL & Degradation of AR in LNCaP/VCaP cells after 2-4... & 10.1021/acsmedchemlett.9b00115 \\\\\n", " NaN & Cc1ncsc1-c1ccc(CNC(=O)[C@@H]2C[C@@H](O)CN2C(=O)... & VCaP & False & 7.30103 & 51.0 & P10275 & VHL & Degradation of AR in LNCaP/VCaP cells after 2-4... & 10.1021/acsmedchemlett.9b00115 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
4Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)...MOLT-4True53.0100.0Q07817VHLNaN10.1016/j.ejmech.2020.112186
1245Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)...MOLT-4True63.090.8Q07817VHLDegradation of BCL-xL in MOLT-4/platelets cell...10.1038/s41591-019-0668-z
1246Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)...2T60False3000.026.0Q07817VHLDegradation of BCL-xL in MOLT-4/platelets cell...10.1038/s41591-019-0668-z
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "4 Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... MOLT-4 \n", "1245 Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... MOLT-4 \n", "1246 Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... 2T60 \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "4 True 53.0 100.0 Q07817 VHL \n", "1245 True 63.0 90.8 Q07817 VHL \n", "1246 False 3000.0 26.0 Q07817 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "4 NaN \n", "1245 Degradation of BCL-xL in MOLT-4/platelets cell... \n", "1246 Degradation of BCL-xL in MOLT-4/platelets cell... \n", "\n", " Article DOI \n", "4 10.1016/j.ejmech.2020.112186 \n", "1245 10.1038/s41591-019-0668-z \n", "1246 10.1038/s41591-019-0668-z " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", " Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)C... & MOLT-4 & True & 7.275724 & 100.0 & Q07817 & VHL & NaN & 10.1016/j.ejmech.2020.112186 \\\\\n", "DT2216 & Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)C... & MOLT-4 & True & 7.200659 & 90.8 & Q07817 & VHL & Degradation of BCL-xL in MOLT-4/platelets cells... & 10.1038/s41591-019-0668-z \\\\\n", "DT2216 & Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)C... & 2T60 & False & 5.522879 & 26.0 & Q07817 & VHL & Degradation of BCL-xL in MOLT-4/platelets cells... & 10.1038/s41591-019-0668-z \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1479Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)...LNCaPTrue50.071.0P10275VHLDegradation of AR in LNCaP/VCaP cells using EL...10.1021/acsmedchemlett.0c00236
1480Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)...VCaPFalse50.051.0P10275VHLDegradation of AR in LNCaP/VCaP cells using EL...10.1021/acsmedchemlett.0c00236
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1479 Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... LNCaP \n", "1480 Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... VCaP \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1479 True 50.0 71.0 P10275 VHL \n", "1480 False 50.0 51.0 P10275 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "1479 Degradation of AR in LNCaP/VCaP cells using EL... \n", "1480 Degradation of AR in LNCaP/VCaP cells using EL... \n", "\n", " Article DOI \n", "1479 10.1021/acsmedchemlett.0c00236 \n", "1480 10.1021/acsmedchemlett.0c00236 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)C... & LNCaP & True & 7.30103 & 71.0 & P10275 & VHL & Degradation of AR in LNCaP/VCaP cells using ELI... & 10.1021/acsmedchemlett.0c00236 \\\\\n", " NaN & Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)C... & VCaP & False & 7.30103 & 51.0 & P10275 & VHL & Degradation of AR in LNCaP/VCaP cells using ELI... & 10.1021/acsmedchemlett.0c00236 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmilesCell Line IdentifierActive (Dmax 0.6, pDC50 6.0)DC50 (nM)Dmax (%)UniprotE3 LigaseAssay (DC50/Dmax)Article DOI
1477Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)...LNCaPTrue50.071.0P10275VHLDegradation of AR in LNCaP/VCaP cells using EL...10.1021/acsmedchemlett.0c00236
1478Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)...VCaPFalse50.051.0P10275VHLDegradation of AR in LNCaP/VCaP cells using EL...10.1021/acsmedchemlett.0c00236
\n", "
" ], "text/plain": [ " Smiles Cell Line Identifier \\\n", "1477 Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... LNCaP \n", "1478 Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... VCaP \n", "\n", " Active (Dmax 0.6, pDC50 6.0) DC50 (nM) Dmax (%) Uniprot E3 Ligase \\\n", "1477 True 50.0 71.0 P10275 VHL \n", "1478 False 50.0 51.0 P10275 VHL \n", "\n", " Assay (DC50/Dmax) \\\n", "1477 Degradation of AR in LNCaP/VCaP cells using EL... \n", "1478 Degradation of AR in LNCaP/VCaP cells using EL... \n", "\n", " Article DOI \n", "1477 10.1021/acsmedchemlett.0c00236 \n", "1478 10.1021/acsmedchemlett.0c00236 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{llllrrllll}\n", "\\toprule\n", "Name & Smiles & Cell Line Identifier & Active (Dmax 0.6, pDC50 6.0) & pDC50 & Dmax (\\%) & Uniprot & E3 Ligase & Assay (DC50/Dmax) & Article DOI \\\\\n", "\\midrule\n", " NaN & Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)C... & LNCaP & True & 7.30103 & 71.0 & P10275 & VHL & Degradation of AR in LNCaP/VCaP cells using ELI... & 10.1021/acsmedchemlett.0c00236 \\\\\n", " NaN & Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)C... & VCaP & False & 7.30103 & 51.0 & P10275 & VHL & Degradation of AR in LNCaP/VCaP cells using ELI... & 10.1021/acsmedchemlett.0c00236 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3711258/332080580.py:17: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n", " print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n" ] } ], "source": [ "# Get the entries for which the same SMILES is active for one cell and inactive for another\n", "# This is a sign of a potential error in the data\n", "# This is a sign of a potential error in the data\n", "active_smiles = train_val_df[train_val_df[active_col] == 1]['Smiles']\n", "inactive_smiles = train_val_df[train_val_df[active_col] == 0]['Smiles']\n", "active_smiles = active_smiles.drop_duplicates()\n", "inactive_smiles = inactive_smiles.drop_duplicates()\n", "\n", "common_smiles = active_smiles[active_smiles.isin(inactive_smiles)]\n", "common_df = train_val_df[train_val_df['Smiles'].isin(common_smiles)]\n", "\n", "# # Group by Smiles and check if the same SMILES is active for one cell and inactive for another\n", "# grouped = common_df.groupby('Smiles')\n", "# for name, group in grouped:\n", "# if group[active_col].nunique() > 1 and group['Cell Line Identifier'].nunique() > 1:\n", "# display(group[['Smiles', 'Cell Line Identifier', active_col, 'DC50 (nM)', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']])\n", "# print(group[['Name', 'Smiles', 'Cell Line Identifier', active_col, 'pDC50', 'Dmax (%)', 'Uniprot', 'E3 Ligase', 'Assay (DC50/Dmax)', 'Article DOI']].to_latex(index=False))\n", "# print()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "import optuna\n", "\n", "def objective(trial: optuna.Trial, verbose: int = 0) -> float:\n", " \n", " radius = trial.suggest_int('radius', 1, 15)\n", " fpsize = trial.suggest_int('fpsize', 128, 2048, step=128)\n", "\n", " morgan_fpgen = AllChem.GetMorganGenerator(\n", " radius=radius,\n", " fpSize=fpsize,\n", " includeChirality=True,\n", " )\n", "\n", " smiles2fp = {}\n", " for smiles in train_val_df['Smiles'].unique().tolist():\n", " smiles2fp[smiles] = pdp.get_fingerprint(smiles, morgan_fpgen)\n", "\n", " # Count the number of unique SMILES and the number of unique Morgan fingerprints\n", " unique_fps = set([tuple(fp) for fp in smiles2fp.values()])\n", " # Get the list of SMILES with overlapping fingerprints\n", " overlapping_smiles = []\n", " unique_fps = set()\n", " for smiles, fp in smiles2fp.items():\n", " if tuple(fp) in unique_fps:\n", " overlapping_smiles.append(smiles)\n", " else:\n", " unique_fps.add(tuple(fp))\n", " num_overlaps = len(train_val_df[train_val_df[\"Smiles\"].isin(overlapping_smiles)])\n", " num_overlaps_tot = len(protac_df[protac_df[\"Smiles\"].isin(overlapping_smiles)])\n", "\n", " if verbose:\n", " print(f'Radius: {radius}')\n", " print(f'FP length: {fpsize}')\n", " print(f'Number of unique SMILES: {len(smiles2fp)}')\n", " print(f'Number of unique fingerprints: {len(unique_fps)}')\n", " print(f'Number of SMILES with overlapping fingerprints: {len(overlapping_smiles)}')\n", " print(f'Number of overlapping SMILES in train_val_df: {num_overlaps}')\n", " print(f'Number of overlapping SMILES in protac_df: {num_overlaps_tot}')\n", " return num_overlaps + radius + fpsize / 100" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-04-29 11:28:05,626] A new study created in memory with name: no-name-4db5d822-6220-4ab8-bc3a-c776b0e5cac2\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "678150f59ec548bb89562e2230993989", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/50 [00:00