diff --git a/.gitattributes b/.gitattributes index c7d9f3332a950355d5a77d85000f05e6f45435ea..c68da8fbb21e708b959e30dda54b2b19cf5d3756 100644 --- a/.gitattributes +++ b/.gitattributes @@ -32,3 +32,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model/knn_JAK1.sav filter=lfs diff=lfs merge=lfs -text +model/knn_JAK2.sav filter=lfs diff=lfs merge=lfs -text +model/knn_JAK3.sav filter=lfs diff=lfs merge=lfs -text +model/knn_TYK2.sav filter=lfs diff=lfs merge=lfs -text +model/RF_JAK1.sav filter=lfs diff=lfs merge=lfs -text +model/RF_JAK2.sav filter=lfs diff=lfs merge=lfs -text +model/RF_JAK3.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_linear_JAK1.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_linear_JAK2.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_linear_JAK3.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_poly_JAK1.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_poly_JAK2.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_poly_JAK3.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_poly_TYK2.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_rbf_JAK1.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_rbf_JAK2.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_rbf_JAK3.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_rbf_TYK2.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_sigmoid_JAK1.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_sigmoid_JAK2.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_sigmoid_JAK3.sav filter=lfs diff=lfs merge=lfs -text +model/SVM_sigmoid_TYK2.sav filter=lfs diff=lfs merge=lfs -text +streamlit-hello-2022-08-13-03-08-23.gif filter=lfs diff=lfs merge=lfs -text diff --git a/AUC/CNN_JAK1_fpr.pickle b/AUC/CNN_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..42128a221c9f1196f2c861c78d2db7642da329a5 --- /dev/null +++ b/AUC/CNN_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c641d6b4144e3f6502470c895dcf15a369a7ae8179a475ad404e3a2b8bfe6f +size 2000 diff --git a/AUC/CNN_JAK1_tpr.pickle b/AUC/CNN_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..6307cf36739d6e86f8b8d44628c3c47462bb9af8 --- /dev/null +++ b/AUC/CNN_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f143743a930c8c2efc1b1ede9e20527db2dd8542d2e4c3f944bbee2c693914c +size 2000 diff --git a/AUC/CNN_JAK2_fpr.pickle b/AUC/CNN_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..fa71a49d9186edbe00c66db8810084abc08339a7 --- /dev/null +++ b/AUC/CNN_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e9cea3ef2496be6d2621ee8467515e3e10d5d80ee45a27fbdda8775af7312a +size 2000 diff --git a/AUC/CNN_JAK2_tpr.pickle b/AUC/CNN_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..a4c7610f726eafd3e9e746f9f662befa4b631713 --- /dev/null +++ b/AUC/CNN_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e66bef332a2ba9cd544589366297c65001409bdeac35f5b7aba8d9448a0a970 +size 2000 diff --git a/AUC/CNN_JAK3_fpr.pickle b/AUC/CNN_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0c83fc7e44ed4b09fb528488781898f1d44a5068 --- /dev/null +++ b/AUC/CNN_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096defe1507293e25e5f07ae3a9d16c25ba8ebd17d0a967d75795c7fde65e98d +size 2000 diff --git a/AUC/CNN_JAK3_tpr.pickle b/AUC/CNN_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0c83fc7e44ed4b09fb528488781898f1d44a5068 --- /dev/null +++ b/AUC/CNN_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096defe1507293e25e5f07ae3a9d16c25ba8ebd17d0a967d75795c7fde65e98d +size 2000 diff --git a/AUC/CNN_TYK2_fpr.pickle b/AUC/CNN_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..540daac2cfd05ffd45609e8df45f5bad9988b1d8 --- /dev/null +++ b/AUC/CNN_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe1c72146b9027ef88ddc2bbd4ad1594cc5a5b23a1b5e6fd29dbb5f35c77059 +size 2000 diff --git a/AUC/CNN_TYK2_tpr.pickle b/AUC/CNN_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..e49533803779c726558ea0430b2b366c7110c8c8 --- /dev/null +++ b/AUC/CNN_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:410bcb9635c41395ae8e5f269ca039fa6e6c171256b93867fdcf183eb935a293 +size 2000 diff --git a/AUC/GVAE_JAK1_fpr.pickle b/AUC/GVAE_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..5ffd01e3e1aa5a3c31702c532082eed737dd73ed --- /dev/null +++ b/AUC/GVAE_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32ef6f1ca4cd972bad41d7e831a15b7321aeb45955e4117a163b43c6cb9c841 +size 38100 diff --git a/AUC/GVAE_JAK1_tpr.pickle b/AUC/GVAE_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0cd822255d2f7f6ba7047941e5b155fdcd2fc582 --- /dev/null +++ b/AUC/GVAE_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6eb2e393f5883135631131ab5453a988ceaa5807538b691ebc89cb473dc32a +size 38100 diff --git a/AUC/GVAE_JAK2_fpr.pickle b/AUC/GVAE_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..46616647ad53d91c18f785c88dd77e9de3e1ffdb --- /dev/null +++ b/AUC/GVAE_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6cb3df7f444e116bbbf2c10698cfd1c573620aaad1709109a768e47646be5bd +size 38100 diff --git a/AUC/GVAE_JAK2_tpr.pickle b/AUC/GVAE_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..216a24b0c9c09bf8c7888f1348ac9856c3966fc5 --- /dev/null +++ b/AUC/GVAE_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482bf2c571b8c754350f36fe66e6c6c22d51e5c8f9c15e6ac4018c0b7e4ea7c5 +size 38100 diff --git a/AUC/GVAE_JAK3_fpr.pickle b/AUC/GVAE_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0b0df5f815436d2ab7838d1aafa20d285a2960a3 --- /dev/null +++ b/AUC/GVAE_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a44e1dd6eaf1d88594b4f7f254fa0b66bc50c9b06511fc2800c388f524c3f8e +size 38100 diff --git a/AUC/GVAE_JAK3_tpr.pickle b/AUC/GVAE_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..46eefd7526fb71e33fb2de46b14278ec07c0a239 --- /dev/null +++ b/AUC/GVAE_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08eb2b9472191847a1bec7879a065be5e69c24b96979cd28b7550a2f7a137912 +size 38100 diff --git a/AUC/GVAE_TYK2_fpr.pickle b/AUC/GVAE_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..9f448260cc1763b168b813389a34d02c06c451ce --- /dev/null +++ b/AUC/GVAE_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d0360a072afa04f8829309499706a5f626ee09ecae7b1046adb42fff7f7c68 +size 38100 diff --git a/AUC/GVAE_TYK2_tpr.pickle b/AUC/GVAE_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0008c92368ac7d433bd736352c1ab49d3423d949 --- /dev/null +++ b/AUC/GVAE_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ee4f3655f5b41a5c2ced588c73b7233a946ce32e2fdb053b0ea5b2a2fe973c +size 38100 diff --git a/AUC/RF_JAK1_fpr.pickle b/AUC/RF_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0487646a678a0418b36db66b9fcc8fdb87005afa --- /dev/null +++ b/AUC/RF_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de10bf4ea6d80ba006e0f963a396b19556f727db1b35bfcb3235a444eccfdfb +size 2000 diff --git a/AUC/RF_JAK1_tpr.pickle b/AUC/RF_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..ccd0b2be2123d6b71f577c5fb26b9cc5cf1a24a7 --- /dev/null +++ b/AUC/RF_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d5f94545186f827f225b36d9416c54f5d61d52e948f67ff12264c4dd7612ae +size 2000 diff --git a/AUC/RF_JAK2_fpr.pickle b/AUC/RF_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..9c3a19eff3b0f778fe506ad09748cde85acda1d8 --- /dev/null +++ b/AUC/RF_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1341907c10dcce88d6d41b1da83ae98afcb4584a0411cb0abc2c4308015ff7 +size 2000 diff --git a/AUC/RF_JAK2_tpr.pickle b/AUC/RF_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..8e4b19e840443b801fe42b1c44afc40bfb58393c --- /dev/null +++ b/AUC/RF_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ad9ff88c31095bc51b44ae5f686a288af0bea898599ad48c4619a1301b21580 +size 2000 diff --git a/AUC/RF_JAK3_fpr.pickle b/AUC/RF_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..f9e158c47641c1336f6395d51ed1163e5a6300d3 --- /dev/null +++ b/AUC/RF_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a39be25f3e8bdb8d369eaaedcb68706add4530c0d45718340f09dc8e2beec0 +size 2000 diff --git a/AUC/RF_JAK3_tpr.pickle b/AUC/RF_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..737d83d98cf5ca1aa4a125717ccf3e0b20f7b480 --- /dev/null +++ b/AUC/RF_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47f474167b0f5e705bd4eaaef90f4493a783c7817c0df36a354599f3ef756441 +size 2000 diff --git a/AUC/RF_TYK2_fpr.pickle b/AUC/RF_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..71f9b63cb97ea2f080ed69ae960bb1441ed21f80 --- /dev/null +++ b/AUC/RF_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1e769b6e6e42aeab42e4f2409fc8c1b2a25b3eaa84d1bbdb618e7808d2ac3f +size 2000 diff --git a/AUC/RF_TYK2_tpr.pickle b/AUC/RF_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..80de6b75d8a35e903a29ddd3d843e9410ceb7637 --- /dev/null +++ b/AUC/RF_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:563f6b8878205619210b87a78dc921784828350c5ee40f8ebe21bc80f2e2559f +size 2000 diff --git a/AUC/SVM_linear_JAK1_fpr.pickle b/AUC/SVM_linear_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..71fb3f4e4d46bb75c8d1950a6f739e085e15cd64 --- /dev/null +++ b/AUC/SVM_linear_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2386b8c0498a470ea11dc13972cbe0accbc670c947f1a511dd39478890347a58 +size 2000 diff --git a/AUC/SVM_linear_JAK1_tpr.pickle b/AUC/SVM_linear_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..961d88c225019f757fe366aa7315ae0bf162bfbd --- /dev/null +++ b/AUC/SVM_linear_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35290812750b4ccc90baf5e34fdc76d8aaee1576e5da3edfa3448d580592a30a +size 2000 diff --git a/AUC/SVM_linear_JAK2_fpr.pickle b/AUC/SVM_linear_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..1ebc410b0703753fd369fbaa635e966a908a3309 --- /dev/null +++ b/AUC/SVM_linear_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08ecd8a262aee6d265c2968e4304a9dff7153439dfa3505300f12e4bff31729 +size 2000 diff --git a/AUC/SVM_linear_JAK2_tpr.pickle b/AUC/SVM_linear_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..acfd7987dd35d62c1b09a0695ed97b8ebd72999c --- /dev/null +++ b/AUC/SVM_linear_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd55a1a9beb9fcab6bd82783ee4568c315cb9091b86b4835b24de2d57fb0b8c +size 2000 diff --git a/AUC/SVM_linear_JAK3_fpr.pickle b/AUC/SVM_linear_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..881fa62728100148c2357f5f9b0322722cbe8b2e --- /dev/null +++ b/AUC/SVM_linear_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5103e28f2113f6db1ff8b437ce0ff3f8becb2e58c9afe6cbda01f39b5f90ef1e +size 2000 diff --git a/AUC/SVM_linear_JAK3_tpr.pickle b/AUC/SVM_linear_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..53002feb681b6894663665b052087e4a38ad2c46 --- /dev/null +++ b/AUC/SVM_linear_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f0d234256834212169b9abd693e80d15291c6bac11bcbbabcefb19384886c0 +size 2000 diff --git a/AUC/SVM_linear_TYK2_fpr.pickle b/AUC/SVM_linear_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..7309c0e50636c046924a5eb05784c94fb8ce7757 --- /dev/null +++ b/AUC/SVM_linear_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:838459f2d101070345ea3b52f48dd00d99cdee17f6130433d496438edb61a00a +size 2000 diff --git a/AUC/SVM_linear_TYK2_tpr.pickle b/AUC/SVM_linear_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..1931df7e014ad3e2b5dd5a31f5bb792844ae61eb --- /dev/null +++ b/AUC/SVM_linear_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e3bb82d7f7bb671bc6754c0b3b30ca2b5dc899c5e2b00322fce1d40269a047 +size 2000 diff --git a/AUC/SVM_poly_JAK1_fpr.pickle b/AUC/SVM_poly_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..00b0c3e88a3d22b7eca8fe304f53a82eeb041ed2 --- /dev/null +++ b/AUC/SVM_poly_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4510cace26ff56d0abe5aef47d266945b9294247b453e9c2ef7a0c154914b682 +size 2000 diff --git a/AUC/SVM_poly_JAK1_tpr.pickle b/AUC/SVM_poly_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..79039288c1ce9eae2ff3f614cbca4b32ab45542c --- /dev/null +++ b/AUC/SVM_poly_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248cab7655fc3a68426a73068a66b784841583d347f513bf17c5583336ddcdd2 +size 2000 diff --git a/AUC/SVM_poly_JAK2_fpr.pickle b/AUC/SVM_poly_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..1a4f87f7ede40a6f5d45286046d0ff745d7aa14d --- /dev/null +++ b/AUC/SVM_poly_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d83f90b094be56121bd56a27e869acbbaf79ee320ef8dc040e9ffef7324008a +size 2000 diff --git a/AUC/SVM_poly_JAK2_tpr.pickle b/AUC/SVM_poly_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..4d6cde00659f7dad1436f19e0d4f01c97436441d --- /dev/null +++ b/AUC/SVM_poly_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c45dd690d439991d3fdd15acba616cdd4ac87e4a76c8ca6d997c533c036ba01 +size 2000 diff --git a/AUC/SVM_poly_JAK3_fpr.pickle b/AUC/SVM_poly_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..425e76f65709fc80283d7d8a56921fcd69ceab0b --- /dev/null +++ b/AUC/SVM_poly_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65fcc78d665db9ed7b04bc01c1fce22fb28eea19b06c46e265812ea3454ac44b +size 2000 diff --git a/AUC/SVM_poly_JAK3_tpr.pickle b/AUC/SVM_poly_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..b12a43436088400c5d20cdaaa52798044806da67 --- /dev/null +++ b/AUC/SVM_poly_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d154837b1a690f1c21bd8a6ecf9e3ab7698ba2291f9f0cd2bdeeca29a8cf40f +size 2000 diff --git a/AUC/SVM_poly_TYK2_fpr.pickle b/AUC/SVM_poly_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..9018cb4d02c44ef5b75f3ad7e21fd36fa263b5d0 --- /dev/null +++ b/AUC/SVM_poly_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f128419166787a4a3d8ac22ad71383a9cd5f67625a3535dc38e758eda0d85d7 +size 2000 diff --git a/AUC/SVM_poly_TYK2_tpr.pickle b/AUC/SVM_poly_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..2bd78cbeb3262cfc7b3669881c386b6a869a6d25 --- /dev/null +++ b/AUC/SVM_poly_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dfc106abe9e0272f14cf325cd9032cd1debc12aa8d1b352467e47d8fff24e75 +size 2000 diff --git a/AUC/SVM_rbf_JAK1_fpr.pickle b/AUC/SVM_rbf_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..ef36b298d2a9d4e418b0032f3fda44f844e2ef4e --- /dev/null +++ b/AUC/SVM_rbf_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a5959ea6aae68054f83b83a76833abbc6379348256452fa80fbfa7961e6e1eb +size 2000 diff --git a/AUC/SVM_rbf_JAK1_tpr.pickle b/AUC/SVM_rbf_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..c2b40fb6521de80d2b6fb8af9b18eb10f160d6e7 --- /dev/null +++ b/AUC/SVM_rbf_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38898ae3e949fe0e07aabee8a8b1b66336e35713c82906134442dfd4b7992383 +size 2000 diff --git a/AUC/SVM_rbf_JAK2_fpr.pickle b/AUC/SVM_rbf_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..28b51d71bd3c159ce1d2c250502589aed9faf5a6 --- /dev/null +++ b/AUC/SVM_rbf_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1fd275b6486382047af1863740a71e03a636cbec7f6b398a688b38c1e4c3769 +size 2000 diff --git a/AUC/SVM_rbf_JAK2_tpr.pickle b/AUC/SVM_rbf_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..9e887ad3bd3296aac5676588aa49cd498f085b83 --- /dev/null +++ b/AUC/SVM_rbf_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d123eac816c16bb35783f4ae5af2ae2dac3a2e65f735c13fb3e6010b1eacc1 +size 2000 diff --git a/AUC/SVM_rbf_JAK3_fpr.pickle b/AUC/SVM_rbf_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..35268447800b0df276afc7da199563c0d7d37ff5 --- /dev/null +++ b/AUC/SVM_rbf_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec46c27734829e23d976bc85d0ab36acd1ca42db02bf25370b5067382fac9f88 +size 2000 diff --git a/AUC/SVM_rbf_JAK3_tpr.pickle b/AUC/SVM_rbf_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..a839e6bdbf34ba6b7e1e6f046d5cc72410940943 --- /dev/null +++ b/AUC/SVM_rbf_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bc5a4b7c6d96f5da68dfebe125b16dcaa13b7ca8070b89629e5e40e0e0901ab +size 2000 diff --git a/AUC/SVM_rbf_TYK2_fpr.pickle b/AUC/SVM_rbf_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..848257cf4ef8535a5eb594e99ba4a84c62934612 --- /dev/null +++ b/AUC/SVM_rbf_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14515588ca71018fea87117186158ba03952b4cb800b8cd02c8c70f46d04ab9c +size 2000 diff --git a/AUC/SVM_rbf_TYK2_tpr.pickle b/AUC/SVM_rbf_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..dee5df2bfc7216aa1bdf8ea9376b3f255e9aaa59 --- /dev/null +++ b/AUC/SVM_rbf_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5855f0910dc454a73295cbf78a90f5122946ddd5912439af601c51bb3c1306 +size 2000 diff --git a/AUC/SVM_sigmoid_JAK1_fpr.pickle b/AUC/SVM_sigmoid_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..02e39c5a422f93365bc8799e3993c48e7c1dbed6 --- /dev/null +++ b/AUC/SVM_sigmoid_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc3ce863f54b2d33aeab7766897c60465c7cb15a0fa1b79a78873b7bc0382f32 +size 2000 diff --git a/AUC/SVM_sigmoid_JAK1_tpr.pickle b/AUC/SVM_sigmoid_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..e8b2916332dbafbb81315dc04feaa636fc30cbfc --- /dev/null +++ b/AUC/SVM_sigmoid_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e67a4b8cb3b0a45dd3570b11926843b8f2ba34c19fb0c74e51a2fd2c7a5fafb +size 2000 diff --git a/AUC/SVM_sigmoid_JAK2_fpr.pickle b/AUC/SVM_sigmoid_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..979d12a76fb5bb572ecbbb4af4f5646eeb126d56 --- /dev/null +++ b/AUC/SVM_sigmoid_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f207b028dcb9ece22fdaf22b85a2fdb4c4c8fc2966e5876a906ccbf1988a351 +size 2000 diff --git a/AUC/SVM_sigmoid_JAK2_tpr.pickle b/AUC/SVM_sigmoid_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..2089c2feac8e3ccf4cf27385855414cce828ee09 --- /dev/null +++ b/AUC/SVM_sigmoid_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e308e0d095eaa0f305666c5927ca8b0cc31ba3a51902c33c3047fed3a930f6 +size 2000 diff --git a/AUC/SVM_sigmoid_JAK3_fpr.pickle b/AUC/SVM_sigmoid_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..83711afb53ec943b4bd1283a42822bd851619d12 --- /dev/null +++ b/AUC/SVM_sigmoid_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:177cf5127ae6d4a1575b491fdec026c96532a2df678b5a88103e9ed7c19823f7 +size 2000 diff --git a/AUC/SVM_sigmoid_JAK3_tpr.pickle b/AUC/SVM_sigmoid_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..458b79af4f9f6cab7c787f148bc4fabdb3919c0a --- /dev/null +++ b/AUC/SVM_sigmoid_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:682e440bdc8b0a20f43a36d41359b1e7809d10ce349d46f111a7704f8625b413 +size 2000 diff --git a/AUC/SVM_sigmoid_TYK2_fpr.pickle b/AUC/SVM_sigmoid_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..9c0aea905cecaf0f38cd57da1cc71540937cbedb --- /dev/null +++ b/AUC/SVM_sigmoid_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd7f99ca68601c94bfca5204b211578f2db393e5570cb4d7d1a6eec9bd71725 +size 2000 diff --git a/AUC/SVM_sigmoid_TYK2_tpr.pickle b/AUC/SVM_sigmoid_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..fa961529bfbed37bd4170d752a42fc851d1527f4 --- /dev/null +++ b/AUC/SVM_sigmoid_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a81165ef26291865c78441a7702db79cfcd1ea7dbbeea437b1a756b4f8c4bc3 +size 2000 diff --git a/AUC/XGBoost_JAK1_fpr.pickle b/AUC/XGBoost_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..f1f1f00b34049bd98c7230682a31e4a79d5f57b7 --- /dev/null +++ b/AUC/XGBoost_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e908e219da5b5837131ba4783814d4f3af4ffb38f252ae864ed712420c10bc9f +size 2000 diff --git a/AUC/XGBoost_JAK1_tpr.pickle b/AUC/XGBoost_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..d65a8387e2f0b06109b79ed79d2d0e33602d6eb1 --- /dev/null +++ b/AUC/XGBoost_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e8949993f44575ae14ead88e2ca5ed028493e9a8fd7a1e27f0027122543546 +size 2000 diff --git a/AUC/XGBoost_JAK2_fpr.pickle b/AUC/XGBoost_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..943ef1ba349b96006add1d997d25b0c3dd4ed3ec --- /dev/null +++ b/AUC/XGBoost_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45236308a0733b9a553dee32e47076c3a30fbc1af122aee7c5ad5602e4e84f39 +size 2000 diff --git a/AUC/XGBoost_JAK2_tpr.pickle b/AUC/XGBoost_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..8978154b4b5c0e99a712d7814c6abd41ed7e8178 --- /dev/null +++ b/AUC/XGBoost_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731819f4915951580f2e0f6ff34f88ce121efda6fdaeeddaf3c73ed3765dc89f +size 2000 diff --git a/AUC/XGBoost_JAK3_fpr.pickle b/AUC/XGBoost_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..c1d18e2f794b0b4a73b3f74355efa61cdad846fc --- /dev/null +++ b/AUC/XGBoost_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c98a292beb29b4575c3da92e82c6694022397de596db682854cb379545883e +size 2000 diff --git a/AUC/XGBoost_JAK3_tpr.pickle b/AUC/XGBoost_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..b92f6e6da60d4cd66573554c29b1b49a42a5641f --- /dev/null +++ b/AUC/XGBoost_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fdbc76b950088846eeb6126f4ca53640c1e21da809582b342ec5e6fa64868da +size 2000 diff --git a/AUC/XGBoost_TYK2_fpr.pickle b/AUC/XGBoost_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..4316a050fa727c6edfeb8c27e349b30512ede149 --- /dev/null +++ b/AUC/XGBoost_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e12bb335040d5bcc22e33d7ede56f43fe51007fc0e48c0e6939cb24ef455e7 +size 2000 diff --git a/AUC/XGBoost_TYK2_tpr.pickle b/AUC/XGBoost_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..836d58e89fd0a055e454565af7446866c4e2f59b --- /dev/null +++ b/AUC/XGBoost_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f386b877f5ab7d2add05ef4d2bb5d4872f879b79c1d061ec208231cd2f991e7e +size 2000 diff --git a/AUC/chembert_JAK1_fpr.pickle b/AUC/chembert_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..157ebbc5fab9ecf69b960c9c0c9777b966afc79d --- /dev/null +++ b/AUC/chembert_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57bcf72b7d90edcd83c8ca1b9716bd64bae66759fb4492f0f4d3bd3e4ce5195 +size 2000 diff --git a/AUC/chembert_JAK1_tpr.pickle b/AUC/chembert_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..7dd441a386cec81a5faacab40f2a20f0c9a5c7b4 --- /dev/null +++ b/AUC/chembert_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701b847db795658adb3b673b5f54365ac5a84ffdf4b18043554a7abe583167d3 +size 2000 diff --git a/AUC/chembert_JAK2_fpr.pickle b/AUC/chembert_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..956bcfd49e45c5294003eb6663b4ddc4d6bdc0ae --- /dev/null +++ b/AUC/chembert_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad182b9ca2b0414ad5ba21ec9d8d6f869ce924f5cb40dd8f643ed3b2f667f64 +size 2000 diff --git a/AUC/chembert_JAK2_tpr.pickle b/AUC/chembert_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..f275f6c4a7f67efe828583b27e0a6c95eef327ec --- /dev/null +++ b/AUC/chembert_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ff2ff408a3da3294509cdaf878409e999b671be84942809a94c1e1fb55a951 +size 2000 diff --git a/AUC/chembert_JAK3_fpr.pickle b/AUC/chembert_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..dd2a6da209a3987ef5b403bf626c207db3886401 --- /dev/null +++ b/AUC/chembert_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1890c6298766b2022ccb9cedbf5ee4806bc46aa33624266992febea82ffcccf +size 2000 diff --git a/AUC/chembert_JAK3_tpr.pickle b/AUC/chembert_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..5ed7ac93995fa09243f5cb346d860d802f8460f2 --- /dev/null +++ b/AUC/chembert_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96cde74b8818a8657e70ffd76fa8bcd743faf6171c36f4dc05e30da71a699a5 +size 2000 diff --git a/AUC/chembert_TYK2_fpr.pickle b/AUC/chembert_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..2efdbcad730b6f8f0c0a937c30d82f66acf13034 --- /dev/null +++ b/AUC/chembert_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b581ee5215a02748ac3eb152a0447043377fb61a3e28b0b723913d8d7ecb61 +size 2000 diff --git a/AUC/chembert_TYK2_tpr.pickle b/AUC/chembert_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..5825aeff02613ead4b9ec03b3413b4dce9be7f83 --- /dev/null +++ b/AUC/chembert_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d7c0d2e9d42ebed3b6238a8e0fcb298949d1664e27f8bee33ed602f6c3f9ca +size 2000 diff --git a/AUC/knn_JAK1_fpr.pickle b/AUC/knn_JAK1_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..4d0c94878dd586e66904dccd653af4f63f89d6ac --- /dev/null +++ b/AUC/knn_JAK1_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acad04f1820e27281912da883615d5b856a742c4ac9f7aff786ad2866659bca5 +size 190118 diff --git a/AUC/knn_JAK1_tpr.pickle b/AUC/knn_JAK1_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..45e72373e510257044911315af3daae75a78ba08 --- /dev/null +++ b/AUC/knn_JAK1_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1549f1847b764064f6d24d825e849bcecc7fdb8f49132b7f140257aa672e67 +size 190118 diff --git a/AUC/knn_JAK2_fpr.pickle b/AUC/knn_JAK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..6bf03e81bed8b5479bdc8efea4351d6cc3e4987f --- /dev/null +++ b/AUC/knn_JAK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ee66e00a4fef5c018dc9afbbe93ab39d0a98735afab15eed8fb6383a00c253 +size 190118 diff --git a/AUC/knn_JAK2_tpr.pickle b/AUC/knn_JAK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..62a894be8b8df7174791f92153937f027fbd34dc --- /dev/null +++ b/AUC/knn_JAK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be6c43d57faffd03e08e87dfd57870d8e771f0f8278f373fd73d6c47941288f5 +size 190118 diff --git a/AUC/knn_JAK3_fpr.pickle b/AUC/knn_JAK3_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..c3eafea7119be78a5e717e0450e838c0261c129b --- /dev/null +++ b/AUC/knn_JAK3_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a365dd8b2b33e58538d5fe74fdf8bc641aa53043d2378330fc1242c742af6963 +size 190118 diff --git a/AUC/knn_JAK3_tpr.pickle b/AUC/knn_JAK3_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..f2bd1c7c25ab045a2ab8c74fec7d8618cfb74a13 --- /dev/null +++ b/AUC/knn_JAK3_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b60f377a01c45418dc8324c79c2f603795a762f55e9165f633e310dc13ab3e +size 190118 diff --git a/AUC/knn_TYK2_fpr.pickle b/AUC/knn_TYK2_fpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..90fc394c6ac938352cfcb1a1f8b695f46a837386 --- /dev/null +++ b/AUC/knn_TYK2_fpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fb2052e5d993946d3b0c197a856749f89d9acf511ef6aa54af963bd914e752 +size 190118 diff --git a/AUC/knn_TYK2_tpr.pickle b/AUC/knn_TYK2_tpr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..6ea7128bcb3707b9d2897e1c88da10ca3bbd15c5 --- /dev/null +++ b/AUC/knn_TYK2_tpr.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26358a1dfd78fae525cbb5e102d0255a0721bcfe5bae87b3c05652c13b6760e9 +size 190118 diff --git a/README.md b/README.md index 0dd28c3e5dcfcad0bc7d3ebaac2ec469ec4209ba..017290dadda1660b2c27464a1fe0c80b3acd6e6b 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,16 @@ ---- -title: JAK ML -emoji: 💩 -colorFrom: gray -colorTo: blue -sdk: streamlit -sdk_version: 1.10.0 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# App for JAK inhibition prediction: + +### Compatible with Edge, Chrome, and Safari, may encounter sidebar problem on Firefox. + + + + +[![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://yingzibu-jak-app-version-2-hello-dym1uj.streamlitapp.com/JAK) + + +![streamlit-hello-2022-08-13-03-08-23.gif](streamlit-hello-2022-08-13-03-08-23.gif) diff --git a/hello.py b/hello.py new file mode 100644 index 0000000000000000000000000000000000000000..f0e10e21108e94094cb4507d9cbf7ff6b0114d68 --- /dev/null +++ b/hello.py @@ -0,0 +1,42 @@ +import streamlit as st +from rdkit.Chem import MACCSkeys +from rdkit import Chem +import numpy as np +import pandas as pd +import xgboost as xgb +# import torch +# import torch.nn as nn +# import torch.nn.functional as F +# from torch.autograd import Variable +# from torch.utils.data import Dataset +# import torch.utils.data +# from torch_geometric.data import DataLoader +# from torch_geometric.data import Data + +# from torch_geometric.nn import GATConv, RGCNConv, GCNConv, global_add_pool, global_mean_pool, global_max_pool, GlobalAttention, Set2Set +from sklearn.metrics import f1_score, accuracy_score, average_precision_score, roc_auc_score + +import rdkit +from rdkit.Chem.Scaffolds import MurckoScaffold + +# from itertools import compress +# import random +# from collections import defaultdict +import pickle +device = 'cpu' +model_path = 'model/' + +st.set_page_config( + page_title='Hello' +) +st.write('# JAK inhibiition prediction app') +st.sidebar.success('Select a page above') + +st.markdown( + """ + * This is an open-source app framework built specifically for JAK inhibition of a certain drug with its SMILES as input. + * Suitable model(s) could be chosen for prediction based on your need (in JAK page). + * Simple machine learning models, tree models, graph-based models and bert models are trained ane evaluated (results in Model Evaluation page). + * Area uder the curve could also be drawn based on our test set results (in Plot AUC page). + Prediction should be used with caution and just for reference. +""") diff --git a/model/CNN_JAK1.pt b/model/CNN_JAK1.pt new file mode 100644 index 0000000000000000000000000000000000000000..275c807ed86147ac8c58ae6569607306ef0ca437 --- /dev/null +++ b/model/CNN_JAK1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98825360ddff9539ccf7f3a5754feb9f3ed3d36ebf1eb38ee5f48c2ccd8ad04b +size 101380 diff --git a/model/CNN_JAK2.pt b/model/CNN_JAK2.pt new file mode 100644 index 0000000000000000000000000000000000000000..778c454b6a9a83b8e983cd92ff260d876dff4fd2 --- /dev/null +++ b/model/CNN_JAK2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf60821eccc3db26fe92b331d26d3e4151d330addf9700b662eafccc02ce019a +size 101380 diff --git a/model/CNN_JAK3.pt b/model/CNN_JAK3.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2a3994de832eaee6922119c8bf2d89bb007abe8 --- /dev/null +++ b/model/CNN_JAK3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a55cbc087fd2a34e2666264edc635bd57371eff28adb345daf94a60f5b6b9d0 +size 101380 diff --git a/model/CNN_TYK2.pt b/model/CNN_TYK2.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee66e709f47af2b2b99cebb3a21433e5b56033a9 --- /dev/null +++ b/model/CNN_TYK2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79f14bee2945bcf4db9788190f0328cf62971fd26611129aea6340006164fd8 +size 101380 diff --git a/model/CNN_encoder_pretrain2.pt b/model/CNN_encoder_pretrain2.pt new file mode 100644 index 0000000000000000000000000000000000000000..272282ef09786b82ccee78f80de60ae916021c8b --- /dev/null +++ b/model/CNN_encoder_pretrain2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd58fed5a3b9ed645f1382a6285e0e8d8d9774441d658b0150b2b7a12803450 +size 99275 diff --git a/model/GVAE_JAK1.pt b/model/GVAE_JAK1.pt new file mode 100644 index 0000000000000000000000000000000000000000..541330c87d41efea23e3ace11917553a53771894 --- /dev/null +++ b/model/GVAE_JAK1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dedd22f4839dac2210670b14db25e494e612dc4133c3e0e11bff44126bb6614 +size 442976 diff --git a/model/GVAE_JAK2.pt b/model/GVAE_JAK2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b6e9665dc7890c9cc2df3f9765006e61809ec1b --- /dev/null +++ b/model/GVAE_JAK2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f24577133431f0cbc9572742182c503769b6d88cefe08351805cbc50b24e9f +size 442976 diff --git a/model/GVAE_JAK3.pt b/model/GVAE_JAK3.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f08664b37b7b9894d582d3ebbd311418791bd92 --- /dev/null +++ b/model/GVAE_JAK3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed671e923cc359f96dadfef90c0b5141b469080f4789e2e5a6d299a0c5eb546f +size 442976 diff --git a/model/GVAE_TYK2.pt b/model/GVAE_TYK2.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6668e1eca3e848ccc267014f0cce1cbec74ff17 --- /dev/null +++ b/model/GVAE_TYK2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c4cfd0767ab504c92525f6bcf2db594c037e6420afe62528ca3b3715403e19 +size 442976 diff --git a/model/RF_JAK1.sav b/model/RF_JAK1.sav new file mode 100644 index 0000000000000000000000000000000000000000..f1e9a4065b0266964a9283cf2a8de023a4c06822 --- /dev/null +++ b/model/RF_JAK1.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3a6532e754d4d132135d893e24b36858a42efc44bf72efbee843df20cf739a +size 4662879 diff --git a/model/RF_JAK2.sav b/model/RF_JAK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..70a8531f06556357d6903e74d71e04de5e982784 --- /dev/null +++ b/model/RF_JAK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc042e06a12424b039f559553b188dc290239af59bbd65e40c4a0734b21bc997 +size 17667727 diff --git a/model/RF_JAK3.sav b/model/RF_JAK3.sav new file mode 100644 index 0000000000000000000000000000000000000000..b9fa11f74909d2261935c6f6e03f51f161ef5ec8 --- /dev/null +++ b/model/RF_JAK3.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156c7a7cf9fd60904a7a57b9dfd73dba60a3d271edadf2a48769c4d0853f50e4 +size 7913159 diff --git a/model/RF_TYK2.sav b/model/RF_TYK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..f1840cd84d24fbd7d84b74735584d816fec7db2f Binary files /dev/null and b/model/RF_TYK2.sav differ diff --git a/model/SVM_linear_JAK1.sav b/model/SVM_linear_JAK1.sav new file mode 100644 index 0000000000000000000000000000000000000000..5af610204ce27849d921c63ce06736d1d5ba8a51 --- /dev/null +++ b/model/SVM_linear_JAK1.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734a61649acc08ba2c5574eb033667221dcc68eebb73a1db63e6f4510d2c8c95 +size 2298250 diff --git a/model/SVM_linear_JAK2.sav b/model/SVM_linear_JAK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..b5898ec349dcf7f380bf6c22c7e4523968cf2f45 --- /dev/null +++ b/model/SVM_linear_JAK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52bea25a0b98b3367cfc475ab8a26ef814ecda76f0a7142777ff3b32a2d0e493 +size 5029234 diff --git a/model/SVM_linear_JAK3.sav b/model/SVM_linear_JAK3.sav new file mode 100644 index 0000000000000000000000000000000000000000..a1aaedaee12cec695500251e6b0cb669603cf09c --- /dev/null +++ b/model/SVM_linear_JAK3.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce31e2500bcec57ce3b015a85296c06a715633b919ce21bc8f43f58bc5995e2 +size 3898330 diff --git a/model/SVM_linear_TYK2.sav b/model/SVM_linear_TYK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..f9196cecdd5e2657ae98ebe15c1852bcdc5ad2bc Binary files /dev/null and b/model/SVM_linear_TYK2.sav differ diff --git a/model/SVM_poly_JAK1.sav b/model/SVM_poly_JAK1.sav new file mode 100644 index 0000000000000000000000000000000000000000..8e115202c8d5c3ed5fb049b6ed7b5c550b9d97e3 --- /dev/null +++ b/model/SVM_poly_JAK1.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79057d3f2f43d864dec9b664da30dc4a5dcacdddb1a6c0efcc71db09ac5cb9e4 +size 1940264 diff --git a/model/SVM_poly_JAK2.sav b/model/SVM_poly_JAK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..53ca81ac9b59b0b9d8a6fba216b772b831aa5f9a --- /dev/null +++ b/model/SVM_poly_JAK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a676c9cdd4f5a0f6ef3b6279b0efb31b5f20e97584b6641e79737004b246eeca +size 4085456 diff --git a/model/SVM_poly_JAK3.sav b/model/SVM_poly_JAK3.sav new file mode 100644 index 0000000000000000000000000000000000000000..701843efdf30078f2dee828667afe01d346dab24 --- /dev/null +++ b/model/SVM_poly_JAK3.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6bc860f6305f95e0d3bfbbdaa08fc4ddd96b5ceefe46914acb400b7e97b881 +size 3469832 diff --git a/model/SVM_poly_TYK2.sav b/model/SVM_poly_TYK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..4b995e1c131560e1de257513111b80441861d542 --- /dev/null +++ b/model/SVM_poly_TYK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328bd74b228766a969f49dfc7abf306c65836faf43c34c8372ca4c02e774ff15 +size 1022252 diff --git a/model/SVM_rbf_JAK1.sav b/model/SVM_rbf_JAK1.sav new file mode 100644 index 0000000000000000000000000000000000000000..eaae6fe81548cfff05e0d0b0d6a9cd3c30040f00 --- /dev/null +++ b/model/SVM_rbf_JAK1.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c3cfb9640f63337cc121bd2dd72ec441837f34d47a97b82ebf6ee8843b5d6ad +size 2508427 diff --git a/model/SVM_rbf_JAK2.sav b/model/SVM_rbf_JAK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..3c211311c3773c75f2f2e0148defe1e80d370f2c --- /dev/null +++ b/model/SVM_rbf_JAK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69e49f3be53f7c1eb019bd00542dfdec41171182d5b9accdf114419232e7bd0 +size 5014315 diff --git a/model/SVM_rbf_JAK3.sav b/model/SVM_rbf_JAK3.sav new file mode 100644 index 0000000000000000000000000000000000000000..6b6f783592841970ac08639aa570c30d4c2e0953 --- /dev/null +++ b/model/SVM_rbf_JAK3.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cadb364e726dd14fb75a291a4bc2d07299ddc09b375ed0340423f0a88fbe062 +size 4153255 diff --git a/model/SVM_rbf_TYK2.sav b/model/SVM_rbf_TYK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..98c3268976d4caf5602e2b63969467ff89682f80 --- /dev/null +++ b/model/SVM_rbf_TYK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e41e2bb07178ea20af8eb4ed04e5c11664776c027ad6d2be4631c3b258e12ebb +size 1167343 diff --git a/model/SVM_sigmoid_JAK1.sav b/model/SVM_sigmoid_JAK1.sav new file mode 100644 index 0000000000000000000000000000000000000000..0249158feb10bacc7a22389f5680069571bcda2b --- /dev/null +++ b/model/SVM_sigmoid_JAK1.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ac4081b1143f11ff142e9c63cd56b27b21298ce64fd9d723f562de5af64b59 +size 3039983 diff --git a/model/SVM_sigmoid_JAK2.sav b/model/SVM_sigmoid_JAK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..b6e12d44fc02a787ef1878c9f470d86c2a878cd7 --- /dev/null +++ b/model/SVM_sigmoid_JAK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc1ead7610dbe3ced07c0f24f492d0edffaf865a455afaeb3e2111d2975b053 +size 5172971 diff --git a/model/SVM_sigmoid_JAK3.sav b/model/SVM_sigmoid_JAK3.sav new file mode 100644 index 0000000000000000000000000000000000000000..e7d5782c4fb387fbb3a327754a346684c329ed80 --- /dev/null +++ b/model/SVM_sigmoid_JAK3.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f96177050f2b5be8ee04b88aa31ff09ed99d69af35e4056c0d9ff3af83e4db3 +size 4128851 diff --git a/model/SVM_sigmoid_TYK2.sav b/model/SVM_sigmoid_TYK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..9118a120f238b559cb6a187e8a805c6e293b1f23 --- /dev/null +++ b/model/SVM_sigmoid_TYK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df678388f71bb3feee5a548aa40e3d040aa852794000efb149661a6bc8939f95 +size 1264979 diff --git a/model/XGBoost_JAK1.pkl b/model/XGBoost_JAK1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c47bbcb66e2df02545bc2fe00567e7a617eab200 --- /dev/null +++ b/model/XGBoost_JAK1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05c40065253537a1e7fc2e2e7fc325bc507082c1353e5415f3423238b251ddd +size 275721 diff --git a/model/XGBoost_JAK1.sav b/model/XGBoost_JAK1.sav new file mode 100644 index 0000000000000000000000000000000000000000..d1934edd429655b254404bd1e0f7624a91fd8741 Binary files /dev/null and b/model/XGBoost_JAK1.sav differ diff --git a/model/XGBoost_JAK2.pkl b/model/XGBoost_JAK2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fa4396f1324f021aa18e1848695af8f9bfaa5b15 --- /dev/null +++ b/model/XGBoost_JAK2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de17c52d2e85faefd9336d88683de1a97b40b110a54406eea2682aafab826f56 +size 335369 diff --git a/model/XGBoost_JAK2.sav b/model/XGBoost_JAK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..db8a6a31c7d356ada0375e2cf8b06cb94d273b2e Binary files /dev/null and b/model/XGBoost_JAK2.sav differ diff --git a/model/XGBoost_JAK3.pkl b/model/XGBoost_JAK3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..16bb1301435d4e76b68b68dd56a4f2f70719eea0 --- /dev/null +++ b/model/XGBoost_JAK3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aad1fde19b8ddff0e4474d023f199f095435f51617efc778dbf9f45b76611a9 +size 312589 diff --git a/model/XGBoost_JAK3.sav b/model/XGBoost_JAK3.sav new file mode 100644 index 0000000000000000000000000000000000000000..85ce9f0646229bebc8354ed6efb32f7fe42f79df Binary files /dev/null and b/model/XGBoost_JAK3.sav differ diff --git a/model/XGBoost_TYK2.pkl b/model/XGBoost_TYK2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d1d8d7bd9156854d20774974d41a7241285d59c7 --- /dev/null +++ b/model/XGBoost_TYK2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440a6ee2a1a6b8ddd1b3c897a2604665bebe34a1e213b541741faf1941b0fd39 +size 220572 diff --git a/model/XGBoost_TYK2.sav b/model/XGBoost_TYK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..afc663599b5d36d0770ec57c7e9adf037434e61b Binary files /dev/null and b/model/XGBoost_TYK2.sav differ diff --git a/model/a b/model/a new file mode 100644 index 0000000000000000000000000000000000000000..78981922613b2afb6025042ff6bd878ac1994e85 --- /dev/null +++ b/model/a @@ -0,0 +1 @@ +a diff --git a/model/knn_JAK1.pkl b/model/knn_JAK1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cb63ea2b80b1b4fc71da7307c4b1642cfddf5bc3 --- /dev/null +++ b/model/knn_JAK1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f9ea5e17ee6c7e8516de48d0b8e20ff794b810a6e85cfb30d629b29009f13c +size 7927555 diff --git a/model/knn_JAK1.sav b/model/knn_JAK1.sav new file mode 100644 index 0000000000000000000000000000000000000000..8984fbfcebaae6cedede6b9e67a8cf8279aa2f70 --- /dev/null +++ b/model/knn_JAK1.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a518332449e3d2c23ac63148334905acb4043035ac32e30d477967e15ced4775 +size 7927555 diff --git a/model/knn_JAK2.pkl b/model/knn_JAK2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..da85ab74497452ce941c4b8ff37f068e5cba82c1 --- /dev/null +++ b/model/knn_JAK2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb658b4af02459d6da6094c1773491c5829625fc77d399cbda72da5a30a7132 +size 10924684 diff --git a/model/knn_JAK2.sav b/model/knn_JAK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..e0a2e1367ddb44eef17b8b6031c3bcd9141c8d83 --- /dev/null +++ b/model/knn_JAK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4cec6ebf2c095765ede0e7e67936d888bb8b6f3ead26c7def0e273eccb1a24 +size 10924684 diff --git a/model/knn_JAK3.pkl b/model/knn_JAK3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..405de872ff6f270c5c3c861c3fec45dc51e27c97 --- /dev/null +++ b/model/knn_JAK3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64643fa32e184efeb9d492cd6304b164911427fc1932e4cbdc9456205f2bc70a +size 8302531 diff --git a/model/knn_JAK3.sav b/model/knn_JAK3.sav new file mode 100644 index 0000000000000000000000000000000000000000..15f375c37b8d12ba4692c4abb6155b12e9f12c5f --- /dev/null +++ b/model/knn_JAK3.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09aaa884380d4bff4014a2c0c98e9bfef8eee434256a9fd8b06ee84093b58c37 +size 8302531 diff --git a/model/knn_TYK2.pkl b/model/knn_TYK2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d231da080bcf502524108bf3f2abdf1101ecbe4c --- /dev/null +++ b/model/knn_TYK2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1528a7b945d455f5d1f7163ac3b4e193f8cbc66909850a81cf1329ce66b2b190 +size 2606659 diff --git a/model/knn_TYK2.sav b/model/knn_TYK2.sav new file mode 100644 index 0000000000000000000000000000000000000000..32a019b69d9d8eb1ba3179bde604d72649b5d5d1 --- /dev/null +++ b/model/knn_TYK2.sav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c9c18f4ec9e7066a16cde055c90124a295ccb707ac779d2fc28171a56a4efb +size 2606659 diff --git a/pages/1_JAK.py b/pages/1_JAK.py new file mode 100644 index 0000000000000000000000000000000000000000..59a05fbd62d768a61066ca53b18327e678a6dfbc --- /dev/null +++ b/pages/1_JAK.py @@ -0,0 +1,874 @@ +import streamlit as st +from rdkit.Chem import MACCSkeys +from rdkit import Chem +import numpy as np +import pandas as pd +import xgboost as xgb +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from torch.utils.data import Dataset +import torch.utils.data +from torch_geometric.data import DataLoader +from torch_geometric.data import Data +import os +from tqdm import tqdm +import pandas as pd +import numpy as np +import torch.optim as optim +import torch.nn as nn +from torch.utils.data import Dataset, DataLoader +from sklearn.metrics import classification_report, confusion_matrix, average_precision_score, roc_auc_score +model_path = 'model/' +import torch +import matplotlib.pyplot as plt +import torch.nn.functional as F +from torch.autograd import Variable +from torch.utils.data import Dataset +import torch.utils.data +from torch_geometric.data import DataLoader +from torch_geometric.data import Data + +from torch_geometric.nn import GATConv, RGCNConv, GCNConv, global_add_pool, global_mean_pool, global_max_pool, GlobalAttention, Set2Set +from sklearn.metrics import f1_score, accuracy_score, average_precision_score, roc_auc_score + +import rdkit +from rdkit.Chem.Scaffolds import MurckoScaffold + +from itertools import compress +import random +from collections import defaultdict +if torch.cuda.is_available(): + map_location=lambda storage, loc: storage.cuda() +else: + map_location='cpu' +import torch +from torch_geometric.nn import GATConv, RGCNConv, GCNConv, global_add_pool, global_mean_pool, global_max_pool, GlobalAttention, Set2Set +from sklearn.metrics import f1_score, accuracy_score, average_precision_score, roc_auc_score, classification_report, confusion_matrix +from sklearn.model_selection import KFold, train_test_split +import rdkit +from rdkit.Chem.Scaffolds import MurckoScaffold +from transformers import AutoModelWithLMHead, AutoTokenizer +import math + + +# from itertools import compress +# import random +# from collections import defaultdict +import pickle +device = 'cpu' +model_path = 'model/' + +adj_max=80 +fps_len=167 +max_len=120 + +vocabulary = {'C': 1, 'c': 2, '1': 3, '(': 4, '-': 5, '2': 6, 's': 7, 'N': 8, '=': 9, ')': 10, 'n': 11, '[': 12, + '@': 13, + 'H': 14, ']': 15, 'O': 16, 'S': 17, '3': 18, 'l': 19, 'B': 20, 'r': 21, '/': 22, '\\': 23, 'o': 24, + '4': 25, + '5': 26, '6': 27, '7': 28, '+': 29, '.': 30, 'I': 31, 'F': 32, '8': 33, '#': 34, 'P': 35, '9': 36, + 'a': 37, + '%': 38, '0': 39, 'i': 40, 'e': 41, 'L': 42, 'K': 43, 't': 44, 'T': 45, 'A': 46, 'g': 47, 'Z': 48, + 'M': 49, + 'R': 50, 'p': 51, 'b': 52, 'X': 53} + +known_drugs = ['O=C(NCCC(O)=O)C(C=C1)=CC=C1/N=N/C(C=C2C(O)=O)=CC=C2OCCOC3=CC=C(NC4=NC=C(C)C(NC5=CC=CC(S(NC(C)(C)C)(=O)=O)=C5)=N4)C=C3', + 'OCCOC1=CC=C(NC2=NC=C(C)C(NC3=CC=CC(S(NC(C)(C)C)(=O)=O)=C3)=N2)C=C1', + 'C1CCC(C1)C(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3', + 'CC1CCN(CC1N(C)C2=NC=NC3=C2C=CN3)C(=O)CC#N', + 'CCS(=O)(=O)N1CC(C1)(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3', + 'C1CC1C(=O)NC2=NN3C(=N2)C=CC=C3C4=CC=C(C=C4)CN5CCS(=O)(=O)CC5', + 'CCC1CN(CC1C2=CN=C3N2C4=C(NC=C4)N=C3)C(=O)NCC(F)(F)F', + 'OC(COC1=CC=C(NC2=NC=C(C)C(NC3=CC=CC(S(NC(C)(C)C)(=O)=O)=C3)=N2)C=C1)=O', + 'O=C(NCCC(O)=O)C(C=C1)=CC=C1/N=N/C(C=C2C(O)=O)=CC=C2OCCOC3=CC=C(NC4=NC=C(C)C(NC5=CC=CC(S(N)(=O)=O)=C5)=N4)C=C3', + 'OC1=CC=C(NC2=NC=C(C)C(NC3=CC=CC(S(NC(C)(C)C)(=O)=O)=C3)=N2)C=C1', + 'OCCOC1=CC=C(NC2=NC=C(C)C(NC3=CC=CC(S(N)(=O)=O)=C3)=N2)C=C1', + 'CC1=CN=C(N=C1NC2=CC(=CC=C2)S(=O)(=O)NC(C)(C)C)NC3=CC=C(C=C3)OCCN4CCCC4', + 'C1CCN(C1)CCOC2=C3COCC=CCOCC4=CC(=CC=C4)C5=NC(=NC=C5)NC(=C3)C=C2'] + +device = torch.device('cpu') + +class jak_dataset(Dataset): + def __init__(self, dataframe, max_len=80): + super(jak_dataset, self).__init__() + self.len = len(dataframe) + self.dataframe = dataframe + self.max_len = max_len + def __getitem__(self, idx): + y = 1 if self.dataframe.Activity[idx]==1 else 0 + X = torch.zeros(self.max_len) + for idx, atom in enumerate(list(self.dataframe.Smiles[idx])[:self.max_len]): + X[idx] = vocabulary[atom] + + return X.long(), y + + def __len__(self): + return self.len +class encoder(nn.Module): + def __init__(self, input_length, num_words, embedding_size=32, inner_size=32, output_size=fps_len, stride=1): + super(encoder, self).__init__() + + self.input_length = input_length + self.num_words = num_words + self.embedding_size = embedding_size + self.inner_size = inner_size + self.output_size = output_size + self.stride = stride + + self.embedding = nn.Embedding(self.num_words + 1, self.embedding_size, padding_idx=0) + + self.conv_1 = nn.Conv1d(self.embedding_size, self.inner_size, 1, self.stride) + self.conv_2 = nn.Conv1d(self.embedding_size, self.inner_size, 2, self.stride) + self.conv_3 = nn.Conv1d(self.embedding_size, self.inner_size, 3, self.stride) + + self.w = nn.Linear(self.inner_size * 3, self.output_size) + + self.activation = nn.LeakyReLU() + self.dropout = nn.Dropout(0.25) + self.init_weights() + + def init_weights(self): + torch.nn.init.xavier_uniform_(self.conv_1.weight) + torch.nn.init.xavier_uniform_(self.conv_2.weight) + torch.nn.init.xavier_uniform_(self.conv_3.weight) + torch.nn.init.xavier_uniform_(self.w.weight) + torch.nn.init.xavier_uniform_(self.embedding.weight) + + def forward(self, x): + x = self.embedding(x).permute(0, 2, 1) + tri = self.conv_3(x) + bi = self.conv_2(x) + uni = self.conv_1(x) + + tri_maxpool = nn.MaxPool1d(tri.shape[2]) + bi_maxpool = nn.MaxPool1d(bi.shape[2]) + uni_maxpool = nn.MaxPool1d(uni.shape[2]) + integrate_feat = torch.cat( + (tri_maxpool(tri).squeeze(2), bi_maxpool(bi).squeeze(2), uni_maxpool(uni).squeeze(2)), dim=1) + #print(integrate_feat.shape) + return self.w(self.activation(integrate_feat)) + +def generate_scaffold(smiles, include_chirality=False): + """ + Obtain Bemis-Murcko scaffold from smiles + :param smiles: + :param include_chirality: + :return: smiles of scaffold + """ + scaffold = MurckoScaffold.MurckoScaffoldSmiles( + smiles=smiles, includeChirality=include_chirality + ) + return scaffold + +def random_scaffold_split( + dataset, + smiles_list, + task_idx=None, + null_value=0, + frac_train=0.8, + frac_valid=0.1, + frac_test=0.1, + seed=42, +): + """ + Adapted from https://github.com/pfnet-research/chainer-chemistry/blob/master/\ + chainer_chemistry/dataset/splitters/scaffold_splitter.py + Split dataset by Bemis-Murcko scaffolds + This function can also ignore examples containing null values for a + selected task when splitting. Deterministic split + :param dataset: pytorch geometric dataset obj + :param smiles_list: list of smiles corresponding to the dataset obj + :param task_idx: column idx of the data.y tensor. Will filter out + examples with null value in specified task column of the data.y tensor + prior to splitting. If None, then no filtering + :param null_value: float that specifies null value in data.y to filter if + task_idx is provided + :param frac_train: + :param frac_valid: + :param frac_test: + :param seed; + :return: train, valid, test slices of the input dataset obj + """ + + np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0) + + if task_idx is not None: + # filter based on null values in task_idx + # get task array + y_task = np.array([data.y[task_idx].item() for data in dataset]) + # boolean array that correspond to non null values + non_null = y_task != null_value + smiles_list = list(compress(enumerate(smiles_list), non_null)) + else: + non_null = np.ones(len(dataset)) == 1 + smiles_list = list(compress(enumerate(smiles_list), non_null)) + + rng = np.random.RandomState(seed) + + scaffolds = defaultdict(list) + for ind, smiles in smiles_list: + scaffold = generate_scaffold(smiles, include_chirality=True) + scaffolds[scaffold].append(ind) + + scaffold_sets = rng.permutation(list(scaffolds.values())) + + n_total_valid = int(np.floor(frac_valid * len(dataset))) + n_total_test = int(np.floor(frac_test * len(dataset))) + + train_idx = [] + valid_idx = [] + test_idx = [] + + for scaffold_set in scaffold_sets: + if len(valid_idx) + len(scaffold_set) <= n_total_valid: + valid_idx.extend(scaffold_set) + elif len(test_idx) + len(scaffold_set) <= n_total_test: + test_idx.extend(scaffold_set) + else: + train_idx.extend(scaffold_set) + + return train_idx, valid_idx, test_idx + +def load_smi_y(enzyme): + try: + path = 'data/' + enzyme + '_' + 'MACCS.csv' + data = pd.read_csv(path) + except: + path = enzyme + '_' + 'MACCS.csv' + data = pd.read_csv(path) + + X = data['Smiles'] + y = data['Activity'] + return X, y +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class CNNforclassification(nn.Module): + def __init__(self, max_len, voc_len, load_path='model/CNN_encoder_pretrain2.pt', + last_layer_size=fps_len, output_size=2): + super(CNNforclassification, self).__init__() + + self.last_layer_size = last_layer_size + self.output_size = output_size + self.pretrained = encoder(max_len, voc_len) + self.pretrained.load_state_dict( + torch.load(load_path, map_location=device)) + + self.w = nn.Linear(self.last_layer_size, self.output_size) + + self.activation = nn.LeakyReLU() + + def forward(self, x): + + return self.w(self.activation(self.pretrained(x))) + + +def CNN_predict(enzyme, smi): + ml = 'CNN' + known_drugs = [smi] + + file_path = 'model/' + ml + '_' + enzyme + '.pt' + print(file_path) + weight_dict = {1: torch.tensor([3.0, 1.0]), 2: torch.tensor([2.0, 1.0]), 3: torch.tensor([2.0, 1.0]), + 4: torch.tensor([2.0, 1.0])} + model = CNNforclassification(max_len, len(vocabulary)) + model.load_state_dict(torch.load(file_path, map_location=torch.device('cpu'))) + model.eval() + + params = {'batch_size':16, 'shuffle':False, 'drop_last':False, 'num_workers':0} + + known_df = pd.DataFrame(known_drugs) + known_df.columns = ['Smiles'] + known_df['Activity'] = 0 + known_data = jak_dataset(known_df) + known_loader = DataLoader(known_data, **params) + for idx, (X, y_true) in tqdm(enumerate(known_loader), total=len(known_loader)): + # print(X) + model.eval() + # print(X) + output = model(X.clone().detach()) + # print(output) + a, y_pred = torch.max(output, 1) + # print(a) + # print(output) + # print(torch.max(torch.softmax(output, 1), 1)[0].tolist()) + # print(a.tolist()) + # print(torch.max(torch.softmax(output, 1), 1)[1].tolist()) + y_prob = torch.softmax(output,1)[:, 1].tolist() + # print(y_prob) + # print(y_pred.tolist()) + return y_prob, y_pred + +class RGCN_VAE(torch.nn.Module): + def __init__(self, in_embd, layer_embd, out_embd, num_relations, dropout): + super(RGCN_VAE, self).__init__() + self.embedding = nn.ModuleList([nn.Embedding(35,in_embd), nn.Embedding(10,in_embd), \ + nn.Embedding(5,in_embd), nn.Embedding(7,in_embd), \ + nn.Embedding(5,in_embd), nn.Embedding(5,in_embd)]) + + self.GATConv1 = RGCNConv(6*in_embd, layer_embd, num_relations) + self.GATConv2 = RGCNConv(layer_embd, out_embd*2, num_relations) + +# self.GATConv1 = GCNConv(6*in_embd, layer_embd, num_relations) +# self.GATConv2 = GCNConv(layer_embd, out_embd*2, num_relations) + + self.GATConv1.reset_parameters() + self.GATConv2.reset_parameters() + + self.activation = nn.Sigmoid() + self.d = out_embd + + self.pool = GlobalAttention(gate_nn=nn.Sequential( \ + nn.Linear(out_embd, out_embd), nn.BatchNorm1d(out_embd), nn.ReLU(), nn.Linear(out_embd, 1))) + + self.graph_linear = nn.Linear(out_embd, 1) + + def recognition_model(self, x, edge_index, edge_type, batch): + for i in range(6): + embds = self.embedding[i](x[:,i]) + if i == 0: + x_ = embds + else: + x_ = torch.cat((x_, embds), 1) + out = self.activation(self.GATConv1(x_, edge_index, edge_type)) + out = self.activation(self.GATConv2(out, edge_index, edge_type)) + +# out = self.activation(self.GATConv1(x_, edge_index)) +# out = self.activation(self.GATConv2(out, edge_index)) + + mu = out[:,0:self.d] + logvar = out[:,self.d:2*self.d] + + return mu, logvar + + def reparametrize(self, mu, logvar): + std = logvar.mul(0.5).exp_() + eps = Variable(std.data.new(std.size()).normal_()) + + return eps.mul(std) + mu + + def generation_model(self, Z): + out = self.activation(Z@Z.T) + + return out + + def forward(self, x, edge_index, edge_type, batch, type_): + if type_=='pretrain': + mu, logvar = self.recognition_model(x, edge_index, edge_type, batch) + Z = self.reparametrize(mu, logvar) + A_hat = self.generation_model(Z) + + N = x.size(0) + A = torch.zeros((N,N), device=device) + with torch.no_grad(): + for i in range(edge_index.size(1)): + A[edge_index[0,i], edge_index[1,i]] = 1 + # print(A.size(),A_hat.size()) + return A, A_hat, mu, logvar + else: + mu = self.cal_mu(x, edge_index, edge_type, batch) + out = self.pool(mu, batch) + out = self.graph_linear(out) + out = self.activation(out) + return out + + def cal_mu(self, x, edge_index, edge_type, batch): + mu, _ = self.recognition_model(x, edge_index, edge_type, batch) + + return mu + +class GCN_VAE(torch.nn.Module): + def __init__(self, in_embd, layer_embd, out_embd, num_relations, dropout): + super(GCN_VAE, self).__init__() + self.embedding = nn.ModuleList([nn.Embedding(35,in_embd), nn.Embedding(10,in_embd), \ + nn.Embedding(5,in_embd), nn.Embedding(7,in_embd), \ + nn.Embedding(5,in_embd), nn.Embedding(5,in_embd)]) + + self.GATConv1 = GCNConv(6*in_embd, layer_embd, num_relations) + self.GATConv2 = GCNConv(layer_embd, out_embd*2, num_relations) + + self.GATConv1.reset_parameters() + self.GATConv2.reset_parameters() + + self.activation = nn.Sigmoid() + self.d = out_embd + + self.pool = GlobalAttention(gate_nn=nn.Sequential( \ + nn.Linear(out_embd, out_embd), nn.BatchNorm1d(out_embd), nn.ReLU(), nn.Linear(out_embd, 1))) + + self.graph_linear = nn.Linear(out_embd, 1) + + def recognition_model(self, x, edge_index, edge_type, batch): + for i in range(6): + embds = self.embedding[i](x[:,i]) + if i == 0: + x_ = embds + else: + x_ = torch.cat((x_, embds), 1) + + out = self.activation(self.GATConv1(x_, edge_index)) + out = self.activation(self.GATConv2(out, edge_index)) + + mu = out[:,0:self.d] + logvar = out[:,self.d:2*self.d] + + return mu, logvar + + def reparametrize(self, mu, logvar): + std = logvar.mul(0.5).exp_() + eps = Variable(std.data.new(std.size()).normal_()) + + return eps.mul(std) + mu + + def generation_model(self, Z): + out = self.activation(Z@Z.T) + + return out + + def forward(self, x, edge_index, edge_type, batch, type_): + if type_=='pretrain': + mu, logvar = self.recognition_model(x, edge_index, edge_type, batch) + Z = self.reparametrize(mu, logvar) + A_hat = self.generation_model(Z) + + N = x.size(0) + A = torch.zeros((N,N), device=device) + with torch.no_grad(): + for i in range(edge_index.size(1)): + A[edge_index[0,i], edge_index[1,i]] = 1 + # print(A.size(),A_hat.size()) + return A, A_hat, mu, logvar + else: + mu = self.cal_mu(x, edge_index, edge_type, batch) + out = self.pool(mu, batch) + out = self.graph_linear(out) + out = self.activation(out) + return out + + def cal_mu(self, x, edge_index, edge_type, batch): + mu, _ = self.recognition_model(x, edge_index, edge_type, batch) + + return mu + +class GAT_VAE(torch.nn.Module): + def __init__(self, in_embd, layer_embd, out_embd, num_relations, dropout): + super(GAT_VAE, self).__init__() + self.embedding = nn.ModuleList([nn.Embedding(35,in_embd), nn.Embedding(10,in_embd), \ + nn.Embedding(5,in_embd), nn.Embedding(7,in_embd), \ + nn.Embedding(5,in_embd), nn.Embedding(5,in_embd)]) + + self.GATConv1 = GATConv(6*in_embd, layer_embd, num_relations) + self.GATConv2 = GATConv(layer_embd, out_embd*2, num_relations) + + self.GATConv1.reset_parameters() + self.GATConv2.reset_parameters() + + self.activation = nn.Sigmoid() + self.d = out_embd + + self.pool = GlobalAttention(gate_nn=nn.Sequential( \ + nn.Linear(out_embd, out_embd), nn.BatchNorm1d(out_embd), nn.ReLU(), nn.Linear(out_embd, 1))) + + self.graph_linear = nn.Linear(out_embd, 1) + + def recognition_model(self, x, edge_index, edge_type, batch): + for i in range(6): + embds = self.embedding[i](x[:,i]) + if i == 0: + x_ = embds + else: + x_ = torch.cat((x_, embds), 1) + + out = self.activation(self.GATConv1(x_, edge_index)) + out = self.activation(self.GATConv2(out, edge_index)) + + mu = out[:,0:self.d] + logvar = out[:,self.d:2*self.d] + + return mu, logvar + + def reparametrize(self, mu, logvar): + std = logvar.mul(0.5).exp_() + eps = Variable(std.data.new(std.size()).normal_()) + + return eps.mul(std) + mu + + def generation_model(self, Z): + out = self.activation(Z@Z.T) + + return out + + def forward(self, x, edge_index, edge_type, batch, type_): + if type_=='pretrain': + mu, logvar = self.recognition_model(x, edge_index, edge_type, batch) + Z = self.reparametrize(mu, logvar) + A_hat = self.generation_model(Z) + + N = x.size(0) + A = torch.zeros((N,N), device=device) + with torch.no_grad(): + for i in range(edge_index.size(1)): + A[edge_index[0,i], edge_index[1,i]] = 1 + # print(A.size(),A_hat.size()) + return A, A_hat, mu, logvar + else: + mu = self.cal_mu(x, edge_index, edge_type, batch) + out = self.pool(mu, batch) + out = self.graph_linear(out) + out = self.activation(out) + return out + + def cal_mu(self, x, edge_index, edge_type, batch): + mu, _ = self.recognition_model(x, edge_index, edge_type, batch) + + return mu + +class GDataset(Dataset): + def __init__(self, nodes, edges, relations, y, idx): + super(GDataset, self).__init__() + + self.nodes = nodes + self.edges = edges + self.y = y + self.relations = relations + self.idx = idx + + def __getitem__(self, idx): + idx = self.idx[idx] + edge_index = torch.tensor(self.edges[idx].T, dtype=torch.long) + x = torch.tensor(self.nodes[idx], dtype=torch.long) + y = torch.tensor(self.y[idx], dtype=torch.float) + edge_type = torch.tensor(self.relations[idx], dtype=torch.float) + return Data(x=x,edge_index=edge_index,edge_type=edge_type,y=y) + + def __len__(self): + return len(self.idx) + + def collate_fn(self,batch): + pass + +def preprocess_test(smiles): + nodes = [] + edges = [] + relations = [] + lens = [] + adjs = [] + ords = [] + for i in range(len(smiles)): + node, adj, order = gen_smiles2graph(smiles[i]) + if node == 'error': + print(i, smiles, 'error') + continue + lens.append(adj.shape[0]) + adjs.append(adj) + ords.append(order) + node[:,2] += 1 + node[:,3] -= 1 + nodes.append(node) + + adjs = np.array(adjs) + lens = np.array(lens) + + def file2array(path, delimiter=' '): + fp = open(path, 'r', encoding='utf-8') + string = fp.read() + fp.close() + row_list = string.splitlines() + data_list = [[float(i) for i in row.strip().split(',')] for row in row_list] + return np.array(data_list) + + def adj2idx(adj): + idx = [] + for i in range(adj.shape[0]): + for j in range(adj.shape[1]): + if adj[i,j] == 1: + idx.append([i,j]) + return np.array(idx) + + def order2relation(adj): + idx = [] + for i in range(adj.shape[0]): + for j in range(adj.shape[1]): + if adj[i,j] != 0: + idx.extend([adj[i,j]]) + return np.array(idx) + + for i in range(lens.shape[0]): + adj = adjs[i] + order = ords[i] + idx = adj2idx(adj) + relation = order2relation(order)-1 + edges.append(idx) + relations.append(relation) + + return smiles, nodes, edges, relations + + +def gen_smiles2graph(sml): + """Argument for the RD2NX function should be a valid SMILES sequence + returns: the graph + """ + ls = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19, 20, 30, 33, 34, 35, 36, 37, 38, 47, 52, 53, 54, 55, 56, 83, 88] + dic = {} + for i in range(len(ls)): + dic[ls[i]] = i + m = rdkit.Chem.MolFromSmiles(sml) +# m = rdkit.Chem.AddHs(m) + order_string = { + rdkit.Chem.rdchem.BondType.SINGLE: 1, + rdkit.Chem.rdchem.BondType.DOUBLE: 2, + rdkit.Chem.rdchem.BondType.TRIPLE: 3, + rdkit.Chem.rdchem.BondType.AROMATIC: 4, + } + N = len(list(m.GetAtoms())) + nodes = np.zeros((N, 6)) + + try: + test = m.GetAtoms() + except: + return 'error', 'error', 'error' + + for i in m.GetAtoms(): + atom_types= dic[i.GetAtomicNum()] + atom_degree= i.GetDegree() + atom_form_charge= i.GetFormalCharge() + atom_hybridization= i.GetHybridization() + atom_aromatic= i.GetIsAromatic() + atom_chirality= i.GetChiralTag() + nodes[i.GetIdx()] = [atom_types, atom_degree, atom_form_charge, atom_hybridization, atom_aromatic, atom_chirality] + + adj = np.zeros((N, N)) + orders = np.zeros((N, N)) + for j in m.GetBonds(): + u = min(j.GetBeginAtomIdx(), j.GetEndAtomIdx()) + v = max(j.GetBeginAtomIdx(), j.GetEndAtomIdx()) + order = j.GetBondType() + if order in order_string: + order = order_string[order] + else: + raise Warning("Ignoring bond order" + order) + adj[u, v] = 1 + adj[v, u] = 1 + orders[u, v] = order + orders[v, u] = order +# adj += np.eye(N) + return nodes, adj, orders + +def get_preds(probabilities, threshold=0.5): + return [1 if prob > threshold else 0 for prob in probabilities] + +def GVAE_pred(smi, enzyme, model_path=model_path, device='cpu'): + + smiles, nodes, edges, relations = preprocess_test([smi]) + y = [0]*len(smiles) + + + test_set = GDataset(nodes, edges, relations,y, range(len(smiles))) + test_loader = DataLoader(test_set, batch_size=len(smiles), shuffle=False) + + model = torch.load(model_path+'GVAE'+ '_' + enzyme + '.pt') + model.eval() + for data in test_loader: + data.to(device) + preds = model(data.x, data.edge_index, data.edge_type, data.batch, 'fintune') + # print(preds) + # print(get_preds(preds)[0]) + + return get_preds(preds)[0] + +# if __name__ == '__main__': +# smiles = ['CC1=CN=C(N=C1NC2=CC(=CC=C2)S(=O)(=O)NC(C)(C)C)NC3=CC=C(C=C3)OCCN4CCCC4'] +# smiles, nodes, edges, relations = preprocess_test(smiles) +# y = [0]*len(smiles) + +# test_set = GDataset(nodes, edges, relations, y, range(len(smiles))) +# test_loader = DataLoader(test_set, batch_size=len(smiles), shuffle=False) + +# model = torch.load(model_path+'GVAE_JAK1.pt') +# for data in test_loader: +# data.to(device) +# preds = model(data.x, data.edge_index, data.edge_type, data.batch, 'fintune') +# print(preds) + + + +def smile_list_to_MACCS(smi_list): + MACCS_list = [] + for smi in smi_list: + mol = Chem.MolFromSmiles(smi) + maccs = list(MACCSkeys.GenMACCSKeys(mol).ToBitString()) + MACCS_list.append(maccs) + return MACCS_list + + +model_path = 'model/' + +st.write(""" +# JAK prediction app +This app predicts the compound inhibition to certain JAK(s) +""") +st.sidebar.header('User Input Parameters') +def user_input_features(): + name = st.text_input('compound name', 'Fedratinib') + # if name == None: + # name = 'test' + smi = st.text_input('compound SMILES', 'CC1=CN=C(N=C1NC2=CC(=CC=C2)S(=O)(=O)NC(C)(C)C)NC3=CC=C(C=C3)OCCN4CCCC4') + # if name == None and smi == None: + # name ='Fedratinib' + # smi = 'CC1=CN=C(N=C1NC2=CC(=CC=C2)S(=O)(=O)NC(C)(C)C)NC3=CC=C(C=C3)OCCN4CCCC4' + # enzyme = st.multiselect( + # 'Choose JAK kinase: ', + # ['JAK1', 'JAK2', 'JAK3', 'TYK2']) + # if enzyme == None: + # enzyme = 'JAK1' + st.write('Select JAK kinase: ') + JAK1 = st.checkbox('JAK1') + JAK2 = st.checkbox('JAK2') + JAK3 = st.checkbox('JAK3') + TYK2 = st.checkbox('TYK2') + all_enzyme = st.checkbox('Select all enzymes') + enzyme = [] + if JAK1 == True: + enzyme.append('JAK1') + if JAK2 == True: + enzyme.append('JAK2') + if JAK3 == True: + enzyme.append('JAK3') + if TYK2 == True: + enzyme.append('TYK2') + if all_enzyme == True: + enzyme = ['JAK1', 'JAK2', 'JAK3', 'TYK2'] + + # model = st.multiselect( + # 'Choose model: ', + # ['knn','SVM_linear', 'SVM_poly', 'SVM_rbf', 'SVM_sigmoid', 'XGBoost']) + model = [] + st.write('Select model: ') + + knn = st.checkbox('KNN') + SVM_linear = st.checkbox('SVM_linear') + SVM_poly = st.checkbox('SVM_poly') + SVM_rbf = st.checkbox('SVM_rbf') + SVM_sigmoid = st.checkbox('SVM_sigmoid') + RF = st.checkbox('RF') + XGBoost = st.checkbox('XGBoost') + CNN = st.checkbox('CNN') + GVAE = st.checkbox('GraphVAE') + chembert = st.checkbox('chemBERTa') + all_model = st.checkbox('Select all models') + + if knn == True: + model.append('knn') + if SVM_linear == True: + model.append('SVM_linear') + if SVM_poly == True: + model.append('SVM_poly') + if SVM_rbf == True: + model.append('SVM_rbf') + if SVM_sigmoid == True: + model.append('SVM_sigmoid') + if RF == True: + model.append('RF') + if XGBoost == True: + model.append('XGBoost') + if CNN == True: + model.append('CNN') + if GVAE == True: + model.append('GVAE') + if chembert == True: + model.append('chembert') + if all_model == True: + model = ['knn', 'SVM_linear', 'SVM_poly', 'SVM_rbf', 'SVM_sigmoid', 'RF', 'XGBoost', 'CNN', 'GVAE', 'chembert'] + + return name, smi, enzyme, model +with st.sidebar: + name, smi, enzyme, model_chosen = user_input_features() + +st.subheader('User Input parameters:') + +st.write('Current compound: ', name) +st.write('Current compound SMILE: ', smi) +st.write('Selected kinase:', enzyme) +st.write('Selected model: ', model_chosen) + + +if st.button('Start Prediction'): + + if model_chosen==[]: + st.write('Did not choose model!') + if enzyme==[]: + st.write('Did not choose JAK kinase!') + + + if smi=='': + st.write('NO SMILES input!') + + elif smi != '' and model_chosen !=[] and enzyme != []: + + try: # TEST WHETHER SMILES STRING IS VALID + MACCS_list = smile_list_to_MACCS([smi]) + header = ['bit' + str(i) for i in range(167)] + df = pd.DataFrame(MACCS_list,columns=header) + maccs = df.values + valid_smi = True + + except: + st.write('Invalid compound SMILES! ') + valid_smi = False + try: + if valid_smi == True: + row_num = len(enzyme) + col_num = len(model_chosen) + prediction = [] + df = pd.DataFrame() + for jak in enzyme: + for ml in model_chosen: + modelname = ml + '_' + jak + '.sav' + + + try: + if ml != 'GVAE' and ml != 'CNN': + model = pickle.load(open(model_path+modelname, 'rb')) + pred = model.predict(maccs) + elif ml == 'GVAE': + pred = GVAE_pred(smi, jak) + elif ml == 'CNN': + prob, pred = CNN_predict(jak, smi) + label =['noninhibitor', 'inhibitor'] + # st.write(jak, ' ', ml, ' prediction is ', label[int(pred)]) + prediction.append(label[int(pred)]) + # st.write(jak, ' ', ml) + except: + if ml != 'GVAE' and ml != 'CNN': + st.write(modelname, ' cannot be loaded') + elif ml == 'GVAE' or ml == 'CNN': + st.write('CANNOT LOAD ', ml, ' for ', jak) + prediction.append('NA') + # try: + # pred_prob = model.predict_proba(maccs) + # # st.write(jak, ' ', ml, ' prediction is ', pred_prob) + # except: + # pass + # st.write('cannot predict_proba') + + vec = np.array(prediction) + df = pd.DataFrame(vec.reshape(-1, col_num)) + df.columns = model_chosen + df.index = enzyme + if name == '': + name = 'test compound' + title = 'Evaluation report for ' + name + st.subheader(title) + # st.write('Compound name: ', name) + # st.write('Compound SMILES: ', smi) + # df.loc[len(df)] = prediction + st.write(df) + except: + st.write('CANNOT FINISH PREDICTION') + + + diff --git a/pages/2_Model_Evaluation.py b/pages/2_Model_Evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..2c19b46c1730cca01cac7e1398568d319e8360bc --- /dev/null +++ b/pages/2_Model_Evaluation.py @@ -0,0 +1,26 @@ +import torch +import streamlit as st +from PIL import Image + +st.latex(r''' + Accuracy = \left(\frac{TP +TN}{TN + TN + FP + FN}\right) + ''') + +st.latex(r''' + Precision = \frac{TP}{TP + FP} + ''') +st.latex(r''' + Recall (SE) = \frac{TP}{TP + FN} + ''') +st.latex(r''' + SP = \frac{TN}{TN + FP} + ''') +st.latex(r''' + Weighted \ Accuracy = \frac{SE + SP}{2} + ''') +st.latex(r''' + MCC = \frac{TP \times TN - FN \times FP}{\sqrt{(TP + FP)(TP + FN)(TN + FN) (TN + FP)}} + ''') + +image = Image.open('table.png') +st.image(image, caption='Table: Results of test set in JAK1, JAK2, JAK3 and TYK2', output_format='png', width=1000, use_column_width='never') diff --git a/pages/3_Plot_AUC.py b/pages/3_Plot_AUC.py new file mode 100644 index 0000000000000000000000000000000000000000..44d7b0f12135745a3c4f3feaf05f9afedcd2e667 --- /dev/null +++ b/pages/3_Plot_AUC.py @@ -0,0 +1,150 @@ +from sklearn.neighbors import KNeighborsClassifier +import streamlit as st +from rdkit.Chem import MACCSkeys +from rdkit import Chem +import numpy as np +import pandas as pd +import xgboost as xgb +from sklearn.svm import SVC +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split, cross_val_score +from sklearn.metrics import classification_report, confusion_matrix, average_precision_score, roc_auc_score +import pickle +global header + +model_path = 'model/' + +def load_tpr_fpr(ml, enzyme): + tpr_file = 'AUC/' + ml + '_' + enzyme + '_tpr.pickle' + fpr_file = 'AUC/' + ml + '_' + enzyme + '_fpr.pickle' + with open(tpr_file, 'rb') as f: + tpr = pickle.load(f) + with open(fpr_file, 'rb') as f: + fpr = pickle.load(f) + return tpr, fpr + +def smile_list_to_MACCS(smi_list): + MACCS_list = [] + for smi in smi_list: + mol = Chem.MolFromSmiles(smi) + maccs = list(MACCSkeys.GenMACCSKeys(mol).ToBitString()) + MACCS_list.append(maccs) + return MACCS_list + + +st.write(""" +# Area Under the Curve Ploting +""") +st.sidebar.header('User Input Parameters') +def user_input_features(): +# name = st.text_input('compound name', 'Fedratinib') + # if name == None: + # name = 'test' +# smi = st.text_input('compound SMILES', 'CC1=CN=C(N=C1NC2=CC(=CC=C2)S(=O)(=O)NC(C)(C)C)NC3=CC=C(C=C3)OCCN4CCCC4') + # if name == None and smi == None: + # name ='Fedratinib' + # smi = 'CC1=CN=C(N=C1NC2=CC(=CC=C2)S(=O)(=O)NC(C)(C)C)NC3=CC=C(C=C3)OCCN4CCCC4' + # enzyme = st.multiselect( + # 'Choose JAK: ', + # ['JAK1', 'JAK2', 'JAK3', 'TYK2']) + # if enzyme == None: + # enzyme = 'JAK1' + st.write('Select JAK kinase: ') + JAK1 = st.checkbox('JAK1') + JAK2 = st.checkbox('JAK2') + JAK3 = st.checkbox('JAK3') + TYK2 = st.checkbox('TYK2') + all_enzyme = st.checkbox('Select all JAKs') + enzyme = [] + if JAK1 == True: + enzyme.append('JAK1') + if JAK2 == True: + enzyme.append('JAK2') + if JAK3 == True: + enzyme.append('JAK3') + if TYK2 == True: + enzyme.append('TYK2') + if all_enzyme == True: + enzyme = ['JAK1', 'JAK2', 'JAK3', 'TYK2'] + + # model = st.multiselect( + # 'Choose model: ', + # ['knn','SVM_linear', 'SVM_poly', 'SVM_rbf', 'SVM_sigmoid', 'XGBoost']) + model = [] + st.write('Select model: ') + + knn = st.checkbox('KNN') + SVM_linear = st.checkbox('SVM_linear') + SVM_poly = st.checkbox('SVM_poly') + SVM_rbf = st.checkbox('SVM_rbf') + SVM_sigmoid = st.checkbox('SVM_sigmoid') + RF = st.checkbox('RF') + XGBoost = st.checkbox('XGBoost') + CNN = st.checkbox('CNN') + GVAE = st.checkbox('GraphVAE') + chemBERTa = st.checkbox('chembert') + all_model = st.checkbox('Select all models') + if knn == True: + model.append('knn') + if SVM_linear == True: + model.append('SVM_linear') + if SVM_poly == True: + model.append('SVM_poly') + if SVM_rbf == True: + model.append('SVM_rbf') + if SVM_sigmoid == True: + model.append('SVM_sigmoid') + if RF == True: + model.append('RF') + if XGBoost == True: + model.append('XGBoost') + if CNN == True: + model.append('CNN') + if GVAE == True: + model.append('GVAE') + if chemBERTa == True: + model.append('chembert') + if all_model == True: + model = ['knn', 'SVM_linear', 'SVM_poly', 'SVM_rbf', 'SVM_sigmoid', 'RF', 'XGBoost', 'CNN', 'GVAE', 'chembert'] + + return enzyme, model +with st.sidebar: + enzymes, model_chosen = user_input_features() + +st.subheader('User Input parameters:') + +# st.write('Current compound: ', name) +# st.write('Current compound SMILE: ', smi) +st.write('Selected JAK:', enzymes) +st.write('Selected model: ', model_chosen) + + +if st.button('Start Plot AUC'): + + if model_chosen==[]: + st.write('Did not choose model!') + if enzymes==[]: + st.write('Did not choose JAK kinase!') + elif model_chosen != [] and enzymes != []: + for enzyme in enzymes: + title = enzyme + ' Receiver Operating Characteristic Curve' + models = model_chosen + fig, ax = plt.subplots(figsize=(10,10)) + for ml in models: + tpr, fpr = load_tpr_fpr(ml, enzyme) + ax.plot(fpr, tpr, label=ml) + ax.plot(np.linspace(0, 1, 100), + np.linspace(0, 1, 100), + label='baseline', + linestyle='--') + plt.title(title, fontsize=18) + plt.ylabel('TPR', fontsize=16) + plt.xlabel('FPR', fontsize=16) + plt.legend(fontsize=12) +# plt.savefig('figures/'+enzyme+'.png') + st.pyplot(fig) + + + + + diff --git a/pages/table.png b/pages/table.png new file mode 100644 index 0000000000000000000000000000000000000000..a1530cbae90bc27713752f593ded745bb63eff2f Binary files /dev/null and b/pages/table.png differ diff --git a/pages/test_chembert.py b/pages/test_chembert.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed43eaa120380c4ea07d6c22bbb2c538c530595 --- /dev/null +++ b/pages/test_chembert.py @@ -0,0 +1,109 @@ +import torch +import torch.nn as nn +from transformers import AutoModelWithLMHead, AutoTokenizer +import os +from tqdm import tqdm +import pandas as pd +import torch.optim as optim +from torch.utils.data import DataLoader, Dataset +from sklearn.metrics import classification_report, confusion_matrix, average_precision_score, roc_auc_score +import math +import matplotlib.pyplot as plt +import numpy as np +from sklearn.model_selection import KFold, train_test_split +adj_max=80 +fps_len=167 +max_len=120 +device = torch.device('cpu') +model_path = 'model/' +class chembert_encoder(nn.Module): + def __init__(self, output_dim=fps_len,dropout=0.5): + super(chembert_encoder, self).__init__() + self.bert = AutoModelWithLMHead.from_pretrained("seyonec/ChemBERTa-zinc-base-v1") + self.tokenizer = AutoTokenizer.from_pretrained("seyonec/ChemBERTa-zinc-base-v1") + self.dropout=nn.Dropout(dropout) + self.w=nn.Linear(767,output_dim) + + def forward(self, x): + input_feat = self.tokenizer.batch_encode_plus(x, max_length=512, + padding='longest', # implements dynamic padding + truncation=True, + return_tensors='pt', + return_attention_mask=True, + return_token_type_ids=True + ) + + if cuda_available: + input_feat['attention_mask'] = input_feat['attention_mask'].cuda() + input_feat['input_ids'] = input_feat['input_ids'].cuda() + + + outputs = self.bert(input_feat['input_ids'], attention_mask=input_feat['attention_mask'],output_hidden_states=None).logits[:,0,:] + return self.w(self.dropout(outputs)) +class pretrain_dataset(Dataset): + def __init__(self,dataframe, max_len=max_len): + super(pretrain_dataset, self).__init__() + self.len = len(dataframe) + self.dataframe = dataframe + self.max_len = max_len + + def __getitem__(self, idx): + sml = self.dataframe.canonical_smiles[idx] + chem_id = self.dataframe.chembl_id[idx] + s = self.dataframe.fps[idx] + s = list(s) + adj = torch.tensor([int(b) for b in s]) + return sml, adj, chem_id + def __len__(self): + return self.len +class jak_dataset(Dataset): + def __init__(self,dataframe): + super(jak_dataset, self).__init__() + self.len = len(dataframe) + self.dataframe = dataframe + + def __getitem__(self, idx): + sml = self.dataframe.Smiles[idx] + y = 1 if self.dataframe.Activity[idx] == 1 else 0 + return sml, y + def __len__(self): + return self.len +class chembert(nn.Module): + def __init__(self, load_path='model/chem_bert_encoder_pretrain_9.pt', + last_layer_size=fps_len, output_size=2, dropout=0.5): + super(chembert, self).__init__() + self.last_layer_size = last_layer_size + self.output_size = output_size + self.pretrained = chembert_encoder() + self.pretrained.load_state_dict(torch.load(load_path, map_location=device)) + self.w = nn.Linear(self.last_layer_size, self.output_size) + self.dropout = nn.Dropout(dropout) + def forward(self, x): + return self.w(self.dropout(self.pretrained(x))) + +def chembert_predict(enzyme, smi): + ml = 'chembert' + known_drugs = [smi] + + file_path = 'model/' + ml + '_' + enzyme + '.pt' + model = chembert() + optimizer = optim.AdamW(params=model.parameters(), lr=1e-5, weight_decay=1e-2) + model.load_state_dict(torch.load(file_path, map_location=torch.device('cpu'))) + weight_dict = {1: torch.tensor([3.0, 1.0]), 2: torch.tensor([2.0, 1.0]), 3: torch.tensor([2.0, 1.0]), + 4: torch.tensor([2.0, 1.0])} + params = {'batch_size': 16, 'shuffle': False, 'drop_last': False, 'num_workers': 0} + model.eval() + + known_df = pd.DataFrame(known_drugs) + known_df.columns = ['Smiles'] + known_df['Activity'] = 0 + known_data = jak_dataset(known_df) + known_loader = DataLoader(known_data, **params) + for idx, (X, y_true) in tqdm(enumerate(known_loader), total=len(known_loader)): + model.eval() + output = model(list(X)) + a, y_pred = torch.max(output, 1) + y_prob = torch.softmax(output,1)[:, 1].tolist() + + return y_prob, y_pred + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6a602c69a5ae14a27cebbbffcb52247268c4092e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +pip==22.2.2 +streamlit==1.12.0 +torch==1.12.0 +xgboost==1.6.1 +rdkit-pypi==2021.9.2.1 +rdkit==2022.3.4 +numpy==1.20.1 +torch-geometric==2.0.4 +pandas==1.2.4 +scikit-learn==1.1.1 +tqdm==4.64.0 +matplotlib==3.5.1 +torch-sparse==0.6.14 +transformers==4.21.1 +torch-scatter==2.0.9 diff --git a/streamlit-hello-2022-08-13-03-08-23.gif b/streamlit-hello-2022-08-13-03-08-23.gif new file mode 100644 index 0000000000000000000000000000000000000000..c7a058148cae2488d3f67732996296427f57095f --- /dev/null +++ b/streamlit-hello-2022-08-13-03-08-23.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef55c566d266465cc8b5c7cbf4cc33d316ced9949f7bdc27f6ef2474bcaffa32 +size 3612250 diff --git a/streamlit_logo.png b/streamlit_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..417f5460ea632bafebf7ee24b8c1131113c1450b Binary files /dev/null and b/streamlit_logo.png differ diff --git a/table.png b/table.png new file mode 100644 index 0000000000000000000000000000000000000000..a1530cbae90bc27713752f593ded745bb63eff2f Binary files /dev/null and b/table.png differ