ribesstefano commited on
Commit
e7f9851
1 Parent(s): dafeec2

Added IaP to IAP conversion in data curation

Browse files
Files changed (1) hide show
  1. notebooks/data_curation.ipynb +511 -3
notebooks/data_curation.ipynb CHANGED
@@ -1094,18 +1094,19 @@
1094
  },
1095
  {
1096
  "cell_type": "code",
1097
- "execution_count": 12,
1098
  "metadata": {},
1099
  "outputs": [],
1100
  "source": [
1101
  "# Rename Cereblon to CRBN in E3 Ligase column\n",
1102
  "protac_pedia_df['E3 Ligase'] = protac_pedia_df['E3 Ligase'].replace(\n",
1103
- " 'Cereblon', 'CRBN')"
 
1104
  ]
1105
  },
1106
  {
1107
  "cell_type": "code",
1108
- "execution_count": 13,
1109
  "metadata": {},
1110
  "outputs": [
1111
  {
@@ -1126,10 +1127,517 @@
1126
  " nan_comments_idx][['Compound ID', 'Cell Type', 'Comments', 'DC50 (nM)', 'Dmax (%)']].to_csv(\n",
1127
  " os.path.join(data_dir, 'processed', 'multiple_cell_types.csv'), index=False)\n",
1128
  "\n",
 
 
1129
  "protac_pedia_df = protac_pedia_df[~multiple_cells_idx]\n",
1130
  "print(f'Number of rows with single cell types: {len(protac_pedia_df)}')"
1131
  ]
1132
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1133
  {
1134
  "cell_type": "code",
1135
  "execution_count": 14,
 
1094
  },
1095
  {
1096
  "cell_type": "code",
1097
+ "execution_count": 13,
1098
  "metadata": {},
1099
  "outputs": [],
1100
  "source": [
1101
  "# Rename Cereblon to CRBN in E3 Ligase column\n",
1102
  "protac_pedia_df['E3 Ligase'] = protac_pedia_df['E3 Ligase'].replace(\n",
1103
+ " 'Cereblon', 'CRBN')\n",
1104
+ "protac_pedia_df['E3 Ligase'] = protac_pedia_df['E3 Ligase'].str.replace('Iap', 'IAP')"
1105
  ]
1106
  },
1107
  {
1108
  "cell_type": "code",
1109
+ "execution_count": 14,
1110
  "metadata": {},
1111
  "outputs": [
1112
  {
 
1127
  " nan_comments_idx][['Compound ID', 'Cell Type', 'Comments', 'DC50 (nM)', 'Dmax (%)']].to_csv(\n",
1128
  " os.path.join(data_dir, 'processed', 'multiple_cell_types.csv'), index=False)\n",
1129
  "\n",
1130
+ "multiple_cells_df = protac_pedia_df[multiple_cells_idx].copy()\n",
1131
+ "\n",
1132
  "protac_pedia_df = protac_pedia_df[~multiple_cells_idx]\n",
1133
  "print(f'Number of rows with single cell types: {len(protac_pedia_df)}')"
1134
  ]
1135
  },
1136
+ {
1137
+ "cell_type": "code",
1138
+ "execution_count": 18,
1139
+ "metadata": {},
1140
+ "outputs": [
1141
+ {
1142
+ "data": {
1143
+ "text/plain": [
1144
+ "Index(['Compound ID', 'Smiles', 'Active/Inactive', 'Best PROTAC', 'Cell Type',\n",
1145
+ " 'cLogP', 'Comments', 'Curator', 'DC50 (nM)', 'Dmax (%)',\n",
1146
+ " 'E3 Binder SMILES', 'E3 Ligase', 'Ec50 of Ligand Cells',\n",
1147
+ " 'Ec50 of PROTAC Cells', 'exit_vector', 'Hydrogen Bond Acceptor Count',\n",
1148
+ " 'Hydrogen Bond Donor Count', 'Ic50 of Ligand', 'Ic50 of PROTAC',\n",
1149
+ " 'Ligand Name', 'Ligand SMILES', 'Linker', 'Linker Type', 'linker_ha',\n",
1150
+ " 'linker_no', 'linker_rb', 'Molecular Weight', 'Off Targets Reported',\n",
1151
+ " 'PATENT', 'Ligand PDB', 'Ligand ID', 'Pubmed', 'PROTAC Name',\n",
1152
+ " 'Proteomics Data Available', 'Secondary Pubmed', 'Status', 'Uniprot',\n",
1153
+ " 'Tested A Non Binding E3 Control', 'Tested Competition With Ligand',\n",
1154
+ " 'Tested Engagement In Cells', 'Tested Proteaseome Inhibitor',\n",
1155
+ " 'Treatment Time (h)', 'Topological Polar Surface Area'],\n",
1156
+ " dtype='object')"
1157
+ ]
1158
+ },
1159
+ "execution_count": 18,
1160
+ "metadata": {},
1161
+ "output_type": "execute_result"
1162
+ }
1163
+ ],
1164
+ "source": [
1165
+ "multiple_cells_df.columns"
1166
+ ]
1167
+ },
1168
+ {
1169
+ "cell_type": "code",
1170
+ "execution_count": 21,
1171
+ "metadata": {},
1172
+ "outputs": [
1173
+ {
1174
+ "name": "stdout",
1175
+ "output_type": "stream",
1176
+ "text": [
1177
+ "RS4;11, MOLM-13\n",
1178
+ "MV4-11, MOLM-13, KG1\n",
1179
+ "MOLT-4, MOLM14\n",
1180
+ "MOLM14, MV4-11\n",
1181
+ "Panc02.13, H23, A549, H1792, H2110, HCC827\n",
1182
+ "NAMALWA, XLA\n",
1183
+ "MV4;11, SUM149, SUM159, MOLM13, MM1.SCRBN−/−, MM1.SW\n",
1184
+ "MV4;11, 293FT, 293FTCRBN−/−\n",
1185
+ "MV-4-11 SK-MEL-5, NCI-H1568\n",
1186
+ "HeLa, DLBCL\n",
1187
+ "Jurkat and Panc Tu‐I\n",
1188
+ "HeLa, Mino, Jeko-1, HUVEC, MDA-MB-231, MM.1S\n",
1189
+ "Hela, MM.1S\n",
1190
+ "HeLa, MV4;11, A549\n",
1191
+ "AML cells, CRBN-/-\n",
1192
+ "SU-DHL-1 and NCI-H2228\n",
1193
+ "MCF-7, MM.1S\n",
1194
+ "DU145/Cy\n",
1195
+ "PC3, MDA-MB-231\n",
1196
+ "Namalwa, CA-46, Ramos\n",
1197
+ "HBL1, Ramos, Mino, IgE MM\n",
1198
+ "Ramos, THP-1\n",
1199
+ "MV4-11, MOLM-14\n",
1200
+ "MDA-MB-231, HeLa\n",
1201
+ "HEK293, CAMA-1, ZR-75-1\n",
1202
+ "MiaPaCa2, HPNE\n",
1203
+ "MCF-7, MDA-MB-231, HepG2, LO2, B16\n",
1204
+ "Karpas 422,ULA, SUDHL4, OCI-Ly1, Ramos\n",
1205
+ "SR, H2228, NCI–H69, NCI–H1688, NCI–H446\n",
1206
+ "OVCAR8 (WT EGFR), HeLa (EGFR Exon 20 Ins), SKBr3 (HER2)\n",
1207
+ "HCC827 (Exon 19 del), H3255 (L858R)\n",
1208
+ "MDA-MB-231, GTL16\n",
1209
+ "RS4;11, MV4;11, MOLM-13,\n",
1210
+ "MCF-7, U2OS, MDA-MB-231, MDA-MB-435, Flag-Cdc20, MDA-MB-231, 22Rv1, LNCaP\n",
1211
+ "Hella, RI-1\n",
1212
+ "Hella, RI-1, EOL-1, A-204\n",
1213
+ "MM1S, HEK293T, CRBNY384A/W386A\n",
1214
+ "Molm-16, SU-DHL-1\n",
1215
+ "RS4;11, MV4;11\n",
1216
+ "LNCaP, VCaP, and 22Rv\n",
1217
+ "MV4-11, Molm-13\n",
1218
+ "Mino, Ramos\n",
1219
+ "MCF7, A549\n",
1220
+ "HOP62/INC-H23\n",
1221
+ "E14 mouse embryonic stem cells; Human colon cancer cell line HCT116\n",
1222
+ "Pancreatic cancer cell lines BxPC-3; MIA PaCa-2\n",
1223
+ "KYSE520 esophageal cancer cell line; MV4;11 cell line\n",
1224
+ "HeLa, HL60, MV4;11\n",
1225
+ "HeLa, HEK293, U2OS\n",
1226
+ "MDA-MB-231, A549, A549/DDP, HUVEC\n",
1227
+ "HeLa, HEK293\n",
1228
+ "231MFP breast cancer cells, HeLa\n",
1229
+ "K562, A549, HCT116, MCF-7, HEK293T\n",
1230
+ "MM1S; Mouse 4935 cells\n",
1231
+ "PC9; HCC827; H1975\n",
1232
+ "T47D, BT474, MDAMB231, MCF7, HS578T, LS180, HCT116, HCT15, DLD1, SW480, LOVO, SW620, HEL, KCL22, THP1, K562, ML2, NOMO1, MONOMAC6, MOLM13, LAMA84, SHI1, MV411, KU812, KG1, KBM7, HL60, BV173, HUH7, NCIH460, A549, NCIH2228, PC9, NCIH446, HELA, T98G, MCF10A, HAP1, DU145, PC3, LNCaP, A375, LOXIMVI\n",
1233
+ "5W1573; NCI-H2030; NCI-H358; HCT116; MIA-PaCa-2\n",
1234
+ "Fibroblasts from patients; HeLa\n",
1235
+ "SU-DHL-1; H3122\n",
1236
+ "IgEMM; hTERT-RPE1; HeLa; Jurkat; RPMI8226; MM.1S\n",
1237
+ "Jurkat; Molt4; Granta-519; Mino; Jeko; Rec1; Maver\n",
1238
+ "Mice primary Sertoli cells and primary Germ cells\n",
1239
+ "Ba/F3; H1975\n",
1240
+ "Ba/F3\n",
1241
+ "HT1080, IMR-32, MCF-7\n",
1242
+ "HT1080, IMR-32\n",
1243
+ "BxPC3, DLD-1, HCT-116, MDA-MB-231, A549, PC-9\n",
1244
+ "MDA-MB-231; K562\n",
1245
+ "MOLT4; BT549; HCC1806; COV362; Kuramochi; OVCAR8\n",
1246
+ "SRD15; Huh7\n",
1247
+ "SW480, HCT116\n",
1248
+ "K562, BaF3\n",
1249
+ "BBL358, T47D\n",
1250
+ "A549; H1299; B16-F10; MDAMB231; Jurkat\n"
1251
+ ]
1252
+ }
1253
+ ],
1254
+ "source": [
1255
+ "for c in multiple_cells_df['Cell Type'].unique():\n",
1256
+ " print(c)"
1257
+ ]
1258
+ },
1259
+ {
1260
+ "cell_type": "code",
1261
+ "execution_count": 36,
1262
+ "metadata": {},
1263
+ "outputs": [
1264
+ {
1265
+ "name": "stdout",
1266
+ "output_type": "stream",
1267
+ "text": [
1268
+ "Direct degration studies in this paper only conducted for first and final compounds; during the med chem campaign cell viability was used as the read out. Complex structure with the ligand was modeled based on 4Z93. IC50 of the ligand is between 2nM and 7nM, depending on which BRD. DC50 is between 3nM-10nM.\n",
1269
+ "RS4;11, MOLM-13\n",
1270
+ "100 %\n",
1271
+ "< 10 nM\n",
1272
+ "--------------------\n",
1273
+ "General kinase PROTAC, DCmax is for the most degraded kinase. IC50 of the ligand is for 193 kinases in the panel. IC50 of the PROTAC is by FLT3 kinase activity.\n",
1274
+ "MOLT-4, MOLM14\n",
1275
+ "> 85 %\n",
1276
+ "< 100 nM\n",
1277
+ "--------------------\n",
1278
+ "DC50 is 9.1 nM (WT BTK, NAMALWA cells), 14.6 nM (WT BTK, XLA cells), 14.9 nM (C481S, XLA cells). IC50 of PROTAC is 46.9 (WT BTK), 20.9 (C481S). IC50 of ligand is 51.0 nM (WT BTK), 30.7 (C481S)\n",
1279
+ "NAMALWA, XLA\n",
1280
+ "> 99 %\n",
1281
+ "< 14.9 nM\n",
1282
+ "--------------------\n",
1283
+ "EC50 of PROTAC is 28nM in MV-4-11, 68nM in NCI-H1568. DC50 is (SMARCA2 6nM, SMARCA4 11nM, PBRM1 32nM in MV-4-11; SMARCA2 3.3nM, PBRM1 15.6nM in NCI-H1568)\n",
1284
+ "MV-4-11 SK-MEL-5, NCI-H1568\n",
1285
+ "nan\n",
1286
+ "< 32 nM\n",
1287
+ "--------------------\n",
1288
+ "DC50 value is for HeLa cells. DC50 is 0.61 uM for DLBCL cells. DMAX value is for HeLa cells. DMAX is 96 % for DLBCL cells.\n",
1289
+ "HeLa, DLBCL\n",
1290
+ "92 %\n",
1291
+ "0.79 uM\n",
1292
+ "--------------------\n",
1293
+ "proteomics of PDEdelta degradation show upregulation of enzymes involved in lipid metabolism - deltasonamide 1 also causes this. Dmax was measured in Panc-Tu-1 cell line. DC50 is 83.4% (24 h, Panc-Tu-1), 85% (24 h, 1 uM, Jurkat). IC50 Deltasonamide 1 is 203 pM, and of the Bn derivative is 8 nM.\n",
1294
+ "Jurkat and Panc Tu‐I\n",
1295
+ "> 83.4 %\n",
1296
+ "48 nM\n",
1297
+ "--------------------\n",
1298
+ "DC50 is 1-3nM\n",
1299
+ "HeLa, MV4;11, A549\n",
1300
+ "100 %\n",
1301
+ "< 3 nM\n",
1302
+ "--------------------\n",
1303
+ "DC50 is 10-30nM\n",
1304
+ "HeLa, MV4;11, A549\n",
1305
+ "99 %\n",
1306
+ "< 30 nM\n",
1307
+ "--------------------\n",
1308
+ "competition with ligand only slightly rescued protein degradation. EC50 of ligand and PROTAC is measured on SUDHL-1 cell line. DC50 is SU-DHL-1: 3 ± 1 nM, NCI-H2228: 34 ± 9 nM.\n",
1309
+ "SU-DHL-1 and NCI-H2228\n",
1310
+ "> 90 %\n",
1311
+ "< 34 nM\n",
1312
+ "--------------------\n",
1313
+ "competition with ligand only slightly rescued protein degradation. EC50 of ligand and PROTAC is measured on SUDHL-1 cell line. DC50 is SU-DHL-1: 11 ± 2 nM, NCI-H2228: 59 ± 16 nM.\n",
1314
+ "SU-DHL-1 and NCI-H2228\n",
1315
+ "> 90 %\n",
1316
+ "< 59 nM\n",
1317
+ "--------------------\n",
1318
+ "\n",
1319
+ "\n",
1320
+ "DC50 and Dmax are for MCF-7 cells\n",
1321
+ "MCF-7, MM.1S\n",
1322
+ "70.5 %\n",
1323
+ "34 nM\n",
1324
+ "--------------------\n",
1325
+ "also degrades ibrutinib-resistant C481S BTK. DC50 is 6.3 nM (HBL1), 8.5 nM (Ramos), 9.2 nM (Mino), 11.4 nM (IgE MM)\n",
1326
+ "HBL1, Ramos, Mino, IgE MM\n",
1327
+ "94 %\n",
1328
+ "< 11.4 nM\n",
1329
+ "--------------------\n",
1330
+ "Ligand name is from PMID 24068666. DCmax is 18-33%\n",
1331
+ "Ramos, THP-1\n",
1332
+ "< 33 %\n",
1333
+ "nan\n",
1334
+ "--------------------\n",
1335
+ "Ligand name is from PMID 24068666. DCmax is 15-34%\n",
1336
+ "Ramos, THP-1\n",
1337
+ "< 34 %\n",
1338
+ "nan\n",
1339
+ "--------------------\n",
1340
+ "Ligand name is from PMID 24068666. DCmax is 36-64%. DC50 is 1487 - 1994.5 nM.\n",
1341
+ "Ramos, THP-1\n",
1342
+ "< 64 %\n",
1343
+ "< 1994.5 nM\n",
1344
+ "--------------------\n",
1345
+ "Ligand name is from PMID 24068666. DCmax is 68-85%. DC50 is 36.9 - 398.5 nM.\n",
1346
+ "Ramos, THP-1\n",
1347
+ "> 68 %\n",
1348
+ "< 398.5 nM\n",
1349
+ "--------------------\n",
1350
+ "Ligand name is from PMID 24068666. DCmax is 63-84%. DC50 is 21.8 - 469.9 nM.\n",
1351
+ "Ramos, THP-1\n",
1352
+ "> 63 %\n",
1353
+ "< 469.9 nM\n",
1354
+ "--------------------\n",
1355
+ "Ligand name is from PMID 24068666. DCmax is 75-87%. DC50 is 4.5 - 90.5 nM.\n",
1356
+ "Ramos, THP-1\n",
1357
+ "> 75 %\n",
1358
+ "< 90.5 nM\n",
1359
+ "--------------------\n",
1360
+ "Ligand name is from PMID 24068666. DCmax is 71-85%. DC50 is 5.9 - 217.7 nM.\n",
1361
+ "Ramos, THP-1\n",
1362
+ "> 71 %\n",
1363
+ "< 217.7 nM\n",
1364
+ "--------------------\n",
1365
+ "Ligand name is from PMID 24068666. DCmax is 80-87%. DC50 is 1.1 - 37.4 nM.\n",
1366
+ "Ramos, THP-1\n",
1367
+ "> 80 %\n",
1368
+ "< 37.4 nM\n",
1369
+ "--------------------\n",
1370
+ "\n",
1371
+ "\n",
1372
+ "Ligand name is from PMID 24068666. DCmax is 70-85%. DC50 is 9.7 - 184.1 nM.\n",
1373
+ "Ramos, THP-1\n",
1374
+ "> 70 %\n",
1375
+ "< 184.1 nM\n",
1376
+ "--------------------\n",
1377
+ "DC50 error ± 1.0 nM. DMAX error ± 1.1 %.\n",
1378
+ "MDA-MB-231, HeLa\n",
1379
+ "99.6 %\n",
1380
+ "9.5 nM\n",
1381
+ "--------------------\n",
1382
+ "DC50 error ± 81.3 nM. DMAX error ± 10.1 %.\n",
1383
+ "MDA-MB-231, HeLa\n",
1384
+ "34.5 %\n",
1385
+ "45.9 nM\n",
1386
+ "--------------------\n",
1387
+ "DC50 is for WT EGFR. DC50 for EGFR Exon 20 Ins is 736.2 nM. DMAX is for WT EGFR. DMAX for EGFR Exon 20 Ins is 68.8 %.\n",
1388
+ "OVCAR8 (WT EGFR), HeLa (EGFR Exon 20 Ins), SKBr3 (HER2)\n",
1389
+ "97.6 %\n",
1390
+ "39.2 nM\n",
1391
+ "--------------------\n",
1392
+ "DC50 is for EGFR (Exon 19 del). DC50 for EGFR (L858R) 22.3 nM. DMAX is for EGFR (Exon 19 del). DMAX for EGFR (L858R) 96.6 %.\n",
1393
+ "HCC827 (Exon 19 del), H3255 (L858R)\n",
1394
+ "98.9 %\n",
1395
+ "11.7 nM\n",
1396
+ "--------------------\n",
1397
+ "DC50 is 1.76 nM and 4.5 nM\n",
1398
+ "Hella, RI-1, EOL-1, A-204\n",
1399
+ "90 %\n",
1400
+ "< 4.5 nM\n",
1401
+ "--------------------\n",
1402
+ "DC50 is 0.86nM in LNCaP, 0.76 in VCaP and 10.4 nM at 1uM in 22Rv1. EC50 in cells is 0.25nM for LNCaP, 0.34 nM for VCaP, 183nM for 22Rv1.\n",
1403
+ "LNCaP, VCaP, and 22Rv\n",
1404
+ "> 95 %\n",
1405
+ "< 10.4 nM\n",
1406
+ "--------------------\n",
1407
+ "XD2-149 was initially designed to degrade STAT3. However, experiments showed that XD2-149 down-regulate STAT3 level in a proteasome-independent manner. Proteomics data revealed that an E3 ligase, ZFP91, was the true substrate for this PROTAC. This paper reported a total of 22 PROTACs, which differed from each other in linker design and E3 binder choices (pomalidomide/thalidomide/lenalidomide). However, the authors didn't mention whether the remaining 21 molecules could degrade ZFP91 or not. It is also interesting that pomalidomide itself can induce the degradation of CRBN neo-substrates like ZFP91 (DC50: 0.42uM, 5-fold less potent than XD2-149), since pomalidomide can remodel CRBN surface for binding proteins like ZFP91 (Nat Med., 2019, doi: 10.1038/s41591-019-0668-z).\n",
1408
+ "Pancreatic cancer cell lines BxPC-3; MIA PaCa-2\n",
1409
+ "> 90 %\n",
1410
+ "~ 70 nM\n",
1411
+ "--------------------\n",
1412
+ "DC50 for KYSE520 cell: 6.0nM;DC50 for MV4;11 cell: 2.6nM; EC50 for KYSE520 cell: 0.66uM; EC50 for MV4;11 cell: 9.9nM;\n",
1413
+ "KYSE520 esophageal cancer cell line; MV4;11 cell line\n",
1414
+ "100 %\n",
1415
+ "6.0 nM\n",
1416
+ "--------------------\n",
1417
+ "pEC50 for MV4;11 and HL60 cells: 6.75±0.03 and 5.84±0.06, respectively.\n",
1418
+ "pDC50 for Brd4 short/Brd4 long/Brd3/Brd2: 7.0/7.0/6.5/6.2, respectively (24h, HeLa cells).\n",
1419
+ "Dmax for Brd4 short/Brd4 long/Brd3/Brd2: 96%/97%/97%/93%, respectively (HeLa cells).\n",
1420
+ "HeLa, HL60, MV4;11\n",
1421
+ "> 93 %\n",
1422
+ "< 0.1 uM\n",
1423
+ "--------------------\n",
1424
+ "\n",
1425
+ "\n",
1426
+ "pEC50 for MV4;11 and HL60 cells: 7.57±0.03 and 6.66±0.05, respectively. pDC50 for Brd4 short/Brd4 long/Brd3/Brd2: 8.1/8.6/7.0/7.4, respectively (24h, HeLa cells). Dmax for Brd4 short/Brd4 long/Brd3/Brd2: 98%/100%/100%/98%, respectively (HeLa cells).\n",
1427
+ "HeLa, HL60, MV4;11\n",
1428
+ "> 98 %\n",
1429
+ "< 2.5 nM\n",
1430
+ "--------------------\n",
1431
+ "pEC50 for MV4;11 and HL60 cells: 6.91±0.04 and 5.90±0.05, respectively. pDC50 for Brd4 short/Brd4 long/Brd3/Brd2: 8.4/8.0/6.5/6.7, respectively (24h, HeLa cells). Dmax for Brd4 short/Brd4 long/Brd3/Brd2: 99%/100%/99%/97%, respectively (HeLa cells).\n",
1432
+ "HeLa, HL60, MV4;11\n",
1433
+ "> 97 %\n",
1434
+ "< 4 nM\n",
1435
+ "--------------------\n",
1436
+ "pEC50 for MV4;11 and HL60 cells: 7.77±0.06 and 7.46±0.03, respectively. pDC50 for Brd4 short/Brd4 long/Brd3/Brd2: 9.2/9.0/9.1/8.2, respectively (24h, HeLa cells). Dmax for Brd4 short/Brd4 long/Brd3/Brd2: 97%/100%/98%/83%, respectively (HeLa cells).\n",
1437
+ "HeLa, HL60, MV4;11\n",
1438
+ "> 83 %\n",
1439
+ "< 1 nM\n",
1440
+ "--------------------\n",
1441
+ "pEC50 for MV4;11 and HL60 cells: 6.24±0.05 and 6.17±0.03, respectively. pDC50 for Brd4 short/Brd4 long/Brd3/Brd2: 6.9/6.7/6.8/NA, respectively (24h, HeLa cells). Dmax for Brd4 short/Brd4 long/Brd3/Brd2: 94%/78%/74%/37%, respectively (HeLa cells).\n",
1442
+ "HeLa, HL60, MV4;11\n",
1443
+ "nan\n",
1444
+ "~ 0.1 uM\n",
1445
+ "--------------------\n",
1446
+ "pEC50 for MV4;11 and HL60 cells: 7.31±0.03 and 6.57±0.02, respectively. pDC50 for Brd4 short/Brd4 long/Brd3/Brd2: 8.1/7.6/7.3/NA, respectively (24h, HeLa cells). Dmax for Brd4 short/Brd4 long/Brd3/Brd2: 98%/95%/91%/43%, respectively (HeLa cells).\n",
1447
+ "HeLa, HL60, MV4;11\n",
1448
+ "> 90 %\n",
1449
+ "~ 7.9 nM\n",
1450
+ "--------------------\n",
1451
+ "pEC50 for MV4;11 and HL60 cells: 7.08±0.05 and 6.37±0.03, respectively. pDC50 for Brd4 short/Brd4 long/Brd3/Brd2: 8.1/7.5/7.7/NA, respectively (24h, HeLa cells). Dmax for Brd4 short/Brd4 long/Brd3/Brd2: 95%/93%/92%/26%, respectively (HeLa cells).\n",
1452
+ "HeLa, HL60, MV4;11\n",
1453
+ "> 90 %\n",
1454
+ "~ 7.9 nM\n",
1455
+ "--------------------\n",
1456
+ "Reported DC50 and Dmax above are in HeLa cells. DC50 for HEK293 cells: 230nM; Dmax for HEK293 cells: 98%. 14a can degrade VHL at higher concentration. See Target UniprotID P40337 for details.\n",
1457
+ "HeLa, HEK293\n",
1458
+ "88 %\n",
1459
+ "200 nM\n",
1460
+ "--------------------\n",
1461
+ "EC50/DC50/Dmax reported above were obtained using NCI-H2030 cells. DC50: 0.25~0.76uM; Dmax: ~75%-90%, specific value depends on the cell line.\n",
1462
+ "5W1573; NCI-H2030; NCI-H358; HCT116; MIA-PaCa-2\n",
1463
+ "~ 80 %\n",
1464
+ "0.59 uM\n",
1465
+ "--------------------\n",
1466
+ "38\n"
1467
+ ]
1468
+ }
1469
+ ],
1470
+ "source": [
1471
+ "cnt = 0\n",
1472
+ "for i, row in multiple_cells_df.dropna(subset=['Comments', 'Cell Type', 'Dmax (%)', 'DC50 (nM)'], how='all').iterrows():\n",
1473
+ " if pd.isnull(row['Comments']):\n",
1474
+ " continue\n",
1475
+ " if 'DC' in row['Comments'].upper() or 'max' in row['Comments'].upper():\n",
1476
+ " cnt += 1\n",
1477
+ " print(row['Comments'])\n",
1478
+ " print(row['Cell Type'])\n",
1479
+ " print(row['Dmax (%)'])\n",
1480
+ " print(row['DC50 (nM)'])\n",
1481
+ " print('-' * 20)\n",
1482
+ " if cnt % 10 == 0:\n",
1483
+ " print('\\n')\n",
1484
+ "print(cnt)"
1485
+ ]
1486
+ },
1487
+ {
1488
+ "cell_type": "code",
1489
+ "execution_count": 19,
1490
+ "metadata": {},
1491
+ "outputs": [
1492
+ {
1493
+ "data": {
1494
+ "text/html": [
1495
+ "<div>\n",
1496
+ "<style scoped>\n",
1497
+ " .dataframe tbody tr th:only-of-type {\n",
1498
+ " vertical-align: middle;\n",
1499
+ " }\n",
1500
+ "\n",
1501
+ " .dataframe tbody tr th {\n",
1502
+ " vertical-align: top;\n",
1503
+ " }\n",
1504
+ "\n",
1505
+ " .dataframe thead th {\n",
1506
+ " text-align: right;\n",
1507
+ " }\n",
1508
+ "</style>\n",
1509
+ "<table border=\"1\" class=\"dataframe\">\n",
1510
+ " <thead>\n",
1511
+ " <tr style=\"text-align: right;\">\n",
1512
+ " <th></th>\n",
1513
+ " <th>Cell Type</th>\n",
1514
+ " <th>Comments</th>\n",
1515
+ " <th>Dmax (%)</th>\n",
1516
+ " <th>DC50 (nM)</th>\n",
1517
+ " </tr>\n",
1518
+ " </thead>\n",
1519
+ " <tbody>\n",
1520
+ " <tr>\n",
1521
+ " <th>105</th>\n",
1522
+ " <td>RS4;11, MOLM-13</td>\n",
1523
+ " <td>Direct degration studies in this paper only co...</td>\n",
1524
+ " <td>100 %</td>\n",
1525
+ " <td>&lt; 10 nM</td>\n",
1526
+ " </tr>\n",
1527
+ " <tr>\n",
1528
+ " <th>106</th>\n",
1529
+ " <td>RS4;11, MOLM-13</td>\n",
1530
+ " <td>Complex structure with the ligand was modeled ...</td>\n",
1531
+ " <td>100 %</td>\n",
1532
+ " <td>&lt; 1 nM</td>\n",
1533
+ " </tr>\n",
1534
+ " <tr>\n",
1535
+ " <th>107</th>\n",
1536
+ " <td>RS4;11, MOLM-13</td>\n",
1537
+ " <td>Complex structure with the ligand was modeled ...</td>\n",
1538
+ " <td>100 %</td>\n",
1539
+ " <td>&lt; 1 nM</td>\n",
1540
+ " </tr>\n",
1541
+ " <tr>\n",
1542
+ " <th>108</th>\n",
1543
+ " <td>RS4;11, MOLM-13</td>\n",
1544
+ " <td>Complex structure with the ligand was modeled ...</td>\n",
1545
+ " <td>NaN</td>\n",
1546
+ " <td>~ 3 nM</td>\n",
1547
+ " </tr>\n",
1548
+ " <tr>\n",
1549
+ " <th>109</th>\n",
1550
+ " <td>RS4;11, MOLM-13</td>\n",
1551
+ " <td>Complex structure with the ligand was modeled ...</td>\n",
1552
+ " <td>NaN</td>\n",
1553
+ " <td>~ 10 nM</td>\n",
1554
+ " </tr>\n",
1555
+ " <tr>\n",
1556
+ " <th>...</th>\n",
1557
+ " <td>...</td>\n",
1558
+ " <td>...</td>\n",
1559
+ " <td>...</td>\n",
1560
+ " <td>...</td>\n",
1561
+ " </tr>\n",
1562
+ " <tr>\n",
1563
+ " <th>1182</th>\n",
1564
+ " <td>A549; H1299; B16-F10; MDAMB231; Jurkat</td>\n",
1565
+ " <td>NaN</td>\n",
1566
+ " <td>NaN</td>\n",
1567
+ " <td>NaN</td>\n",
1568
+ " </tr>\n",
1569
+ " <tr>\n",
1570
+ " <th>1185</th>\n",
1571
+ " <td>A549; H1299; B16-F10; MDAMB231; Jurkat</td>\n",
1572
+ " <td>NaN</td>\n",
1573
+ " <td>NaN</td>\n",
1574
+ " <td>NaN</td>\n",
1575
+ " </tr>\n",
1576
+ " <tr>\n",
1577
+ " <th>1194</th>\n",
1578
+ " <td>A549; H1299; B16-F10; MDAMB231; Jurkat</td>\n",
1579
+ " <td>NaN</td>\n",
1580
+ " <td>NaN</td>\n",
1581
+ " <td>NaN</td>\n",
1582
+ " </tr>\n",
1583
+ " <tr>\n",
1584
+ " <th>1195</th>\n",
1585
+ " <td>A549; H1299; B16-F10; MDAMB231; Jurkat</td>\n",
1586
+ " <td>NaN</td>\n",
1587
+ " <td>NaN</td>\n",
1588
+ " <td>NaN</td>\n",
1589
+ " </tr>\n",
1590
+ " <tr>\n",
1591
+ " <th>1198</th>\n",
1592
+ " <td>A549; H1299; B16-F10; MDAMB231; Jurkat</td>\n",
1593
+ " <td>NaN</td>\n",
1594
+ " <td>NaN</td>\n",
1595
+ " <td>NaN</td>\n",
1596
+ " </tr>\n",
1597
+ " </tbody>\n",
1598
+ "</table>\n",
1599
+ "<p>278 rows × 4 columns</p>\n",
1600
+ "</div>"
1601
+ ],
1602
+ "text/plain": [
1603
+ " Cell Type \\\n",
1604
+ "105 RS4;11, MOLM-13 \n",
1605
+ "106 RS4;11, MOLM-13 \n",
1606
+ "107 RS4;11, MOLM-13 \n",
1607
+ "108 RS4;11, MOLM-13 \n",
1608
+ "109 RS4;11, MOLM-13 \n",
1609
+ "... ... \n",
1610
+ "1182 A549; H1299; B16-F10; MDAMB231; Jurkat \n",
1611
+ "1185 A549; H1299; B16-F10; MDAMB231; Jurkat \n",
1612
+ "1194 A549; H1299; B16-F10; MDAMB231; Jurkat \n",
1613
+ "1195 A549; H1299; B16-F10; MDAMB231; Jurkat \n",
1614
+ "1198 A549; H1299; B16-F10; MDAMB231; Jurkat \n",
1615
+ "\n",
1616
+ " Comments Dmax (%) DC50 (nM) \n",
1617
+ "105 Direct degration studies in this paper only co... 100 % < 10 nM \n",
1618
+ "106 Complex structure with the ligand was modeled ... 100 % < 1 nM \n",
1619
+ "107 Complex structure with the ligand was modeled ... 100 % < 1 nM \n",
1620
+ "108 Complex structure with the ligand was modeled ... NaN ~ 3 nM \n",
1621
+ "109 Complex structure with the ligand was modeled ... NaN ~ 10 nM \n",
1622
+ "... ... ... ... \n",
1623
+ "1182 NaN NaN NaN \n",
1624
+ "1185 NaN NaN NaN \n",
1625
+ "1194 NaN NaN NaN \n",
1626
+ "1195 NaN NaN NaN \n",
1627
+ "1198 NaN NaN NaN \n",
1628
+ "\n",
1629
+ "[278 rows x 4 columns]"
1630
+ ]
1631
+ },
1632
+ "execution_count": 19,
1633
+ "metadata": {},
1634
+ "output_type": "execute_result"
1635
+ }
1636
+ ],
1637
+ "source": [
1638
+ "multiple_cells_df[['Cell Type', 'Comments', 'Dmax (%)', 'DC50 (nM)']]"
1639
+ ]
1640
+ },
1641
  {
1642
  "cell_type": "code",
1643
  "execution_count": 14,