Philip Fradkin
commited on
Commit
·
bc2395e
1
Parent(s):
02a4c26
feat: update 4track model
Browse files- README.md +1 -1
- models/orthrus_base_4_track/epoch=18-step=20000.ckpt +2 -2
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=1-step=2000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=10-step=12000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=12-step=14000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=14-step=16000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=16-step=18000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=3-step=4000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=5-step=6000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=7-step=8000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=9-step=10000.ckpt_ridge.csv +13 -0
- models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv +0 -13
- models/orthrus_base_4_track/model_config.json +1 -1
- notebooks/colab_prototye.ipynb +2 -1
- src/model.py +15 -0
README.md
CHANGED
@@ -84,7 +84,7 @@ Here is example code
|
|
84 |
# Sequence for short mRNA
|
85 |
> seq=(
|
86 |
'TCATCTGGATTATACATATTTCGCAATGAAAGAGAGGAAGAAAAGGAAGCAGCAAAATATGTGGAGGCCCA'
|
87 |
-
'ACAAAAGAGACTAGAAGCCTTATTCACTAAAATTCAGGAGGAATTTGAAGAACATGAAGTTACTTCCTCC
|
88 |
'ACTGAAGTCTTGAACCCCCCAAAGTCATCCATGAGGGTTGGAATCAACTTCTGAAAACACAACAAAACCA'
|
89 |
'TATTTACCATCACGTGCACTAACAAGACAGCAAGTTCGTGCTTTGCAAGATGGTGCAGAGCTTTATGAAG'
|
90 |
'CAGTGAAGAATGCAGCAGACCCAGCTTACCTTGAGGGTTATTTCAGTGAAGAGCAGTTAAGAGCCTTGAA'
|
|
|
84 |
# Sequence for short mRNA
|
85 |
> seq=(
|
86 |
'TCATCTGGATTATACATATTTCGCAATGAAAGAGAGGAAGAAAAGGAAGCAGCAAAATATGTGGAGGCCCA'
|
87 |
+
'ACAAAAGAGACTAGAAGCCTTATTCACTAAAATTCAGGAGGAATTTGAAGAACATGAAGTTACTTCCTCC'
|
88 |
'ACTGAAGTCTTGAACCCCCCAAAGTCATCCATGAGGGTTGGAATCAACTTCTGAAAACACAACAAAACCA'
|
89 |
'TATTTACCATCACGTGCACTAACAAGACAGCAAGTTCGTGCTTTGCAAGATGGTGCAGAGCTTTATGAAG'
|
90 |
'CAGTGAAGAATGCAGCAGACCCAGCTTACCTTGAGGGTTATTTCAGTGAAGAGCAGTTAAGAGCCTTGAA'
|
models/orthrus_base_4_track/epoch=18-step=20000.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:157d60e0471bff00023376a4b29a1746b407ae919506baf2cf40e88d0009f8ce
|
3 |
+
size 21409844
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=1-step=2000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_roc_auc,val,0.8220142525972354,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_auprc,val,0.4625396927514182,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.41187612954069797,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.4650231114299552,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.7519351051859768,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.3815569357098566,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_roc_auc,val,0.8196707035908555,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_auprc,val,0.3503672527264725,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.7118696793044624,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.508566975266374,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.7589341222101531,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.47631361227244867,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=10-step=12000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_roc_auc,val,0.8433483249915402,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_auprc,val,0.5132275016061479,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.3924153336558653,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.5055500181171051,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.7462168153619977,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.3901367108773842,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_roc_auc,val,0.825789615510407,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_auprc,val,0.3624099056153837,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.6772307183584606,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.541695841911757,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.7251545422958905,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.5109326806495391,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=12-step=14000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_roc_auc,val,0.8421522962237182,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_auprc,val,0.5078808348506777,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.38159363235269134,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.5210525632055019,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.7449572004778382,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.39210775201429937,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_roc_auc,val,0.8253978288124967,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_auprc,val,0.35978864383790576,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.6759492269289844,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.5430234153021989,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.7187604645312344,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.5170162511471106,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=14-step=16000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_roc_auc,val,0.8453107717715747,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_auprc,val,0.51388271248823,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.38352797937787375,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.5187762652770216,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.7464369347298561,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.38989871864513437,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_roc_auc,val,0.825373574980663,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_auprc,val,0.3606313916724556,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.6729050049584757,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.5458388693481175,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.7104695665288715,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.5250332145677902,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=16-step=18000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_roc_auc,val,0.8440424572851378,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_auprc,val,0.5131805187647871,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.38003889341798525,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.523477567510083,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.7444410742413161,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.3930851174944151,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_roc_auc,val,0.8259117384044564,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_auprc,val,0.36113756335833136,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.6744663045562795,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.5444030097422119,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.7113535602097519,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.5241967682295874,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.8446671447835594,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.5141468969910018,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.3816430082054893,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.520855720912494,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.7437523843416108,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.39403218950488417,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.825946043024202,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.3617639617322508,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.6739744006625423,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.544860645002531,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.7105890797238071,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.5249681390195272,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=3-step=4000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_roc_auc,val,0.8334827448623479,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_auprc,val,0.48054142074091,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.3937403979375475,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.5039354128857179,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.7527737213090051,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.3806671640445604,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_roc_auc,val,0.8213135611439488,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_auprc,val,0.35596875648549375,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.7108028779272146,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.5099485164455663,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.7471981375686599,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.48843724229011504,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=5-step=6000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_roc_auc,val,0.8378129434997147,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_auprc,val,0.4930322732919934,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.42145942081505877,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.46362110810253776,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.7539863277031761,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.3786947841967137,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_roc_auc,val,0.822611781448217,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_auprc,val,0.3550260538018792,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.6853506444183269,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.5338677108741229,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.7403810703891033,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.49591840377848506,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=7-step=8000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_roc_auc,val,0.838934161283643,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_auprc,val,0.499202623569385,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.41299386356832335,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.47458154750742787,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.7506913875148472,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.3835651031902195,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_roc_auc,val,0.8234223639111674,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_auprc,val,0.35690103176614196,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.6861524146069266,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.5330125034795364,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.734732230977574,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.5014295583249777,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=9-step=10000.ckpt_ridge.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_roc_auc,val,0.8419237597159581,
|
3 |
+
go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_auprc,val,0.5067915793618881,
|
4 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.3812646126826232,
|
5 |
+
mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.5231334426937999,
|
6 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.7419690874136367,
|
7 |
+
mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.39632707952585045,
|
8 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_roc_auc,val,0.8258357171939567,
|
9 |
+
protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_auprc,val,0.3602220156610885,
|
10 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.6739133283342498,
|
11 |
+
hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.5449821329643417,
|
12 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.7232799553052773,
|
13 |
+
hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.5127369847559508,
|
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
|
2 |
-
go_mf,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.8478578174637246,
|
3 |
-
go_mf,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.5095139671888503,
|
4 |
-
mrfp,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.3546131256685185,
|
5 |
-
mrfp,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.5707018777098897,
|
6 |
-
mrl,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.7273312686835766,
|
7 |
-
mrl,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.41711144859054816,
|
8 |
-
protein_loc,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.8337499171489103,
|
9 |
-
protein_loc,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.3868738259700707,
|
10 |
-
hl_human,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.5510596183223497,
|
11 |
-
hl_human,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.6528813736053247,
|
12 |
-
hl_mouse,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.5930341784262118,
|
13 |
-
hl_mouse,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.6290704100935987,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/orthrus_base_4_track/model_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"model_class": "ssm", "ssm_model_dim": 256, "ssm_n_layers": 3, "n_tracks":
|
|
|
1 |
+
{"model_class": "ssm", "ssm_model_dim": 256, "ssm_n_layers": 3, "n_tracks": 4, "predict_splice_codon": false, "predict_masked": false}
|
notebooks/colab_prototye.ipynb
CHANGED
@@ -204,7 +204,8 @@
|
|
204 |
" t (gk.Transcript): The transcript object.\n",
|
205 |
" \"\"\"\n",
|
206 |
" seq = \"\".join([genome.dna(exon) for exon in t.exons])\n",
|
207 |
-
" oh =
|
|
|
208 |
" return oh\n",
|
209 |
"\n",
|
210 |
"def create_six_track_encoding(t, channels_last=False):\n",
|
|
|
204 |
" t (gk.Transcript): The transcript object.\n",
|
205 |
" \"\"\"\n",
|
206 |
" seq = \"\".join([genome.dna(exon) for exon in t.exons])\n",
|
207 |
+
" oh = \n",
|
208 |
+
" _oh(seq)\n",
|
209 |
" return oh\n",
|
210 |
"\n",
|
211 |
"def create_six_track_encoding(t, channels_last=False):\n",
|
src/model.py
CHANGED
@@ -4,10 +4,25 @@ import os
|
|
4 |
import json
|
5 |
import torch
|
6 |
import torch.nn as nn
|
|
|
7 |
|
8 |
from mamba_ssm.modules.mamba_simple import Mamba, Block
|
9 |
from huggingface_hub import PyTorchModelHubMixin
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def create_block(
|
12 |
d_model,
|
13 |
ssm_cfg=None,
|
|
|
4 |
import json
|
5 |
import torch
|
6 |
import torch.nn as nn
|
7 |
+
import numpy as np
|
8 |
|
9 |
from mamba_ssm.modules.mamba_simple import Mamba, Block
|
10 |
from huggingface_hub import PyTorchModelHubMixin
|
11 |
|
12 |
+
# convert to one hot
|
13 |
+
def seq_to_oh(seq):
|
14 |
+
oh = np.zeros((len(seq), 4), dtype=int)
|
15 |
+
for i, base in enumerate(seq):
|
16 |
+
if base == 'A':
|
17 |
+
oh[i, 0] = 1
|
18 |
+
elif base == 'C':
|
19 |
+
oh[i, 1] = 1
|
20 |
+
elif base == 'G':
|
21 |
+
oh[i, 2] = 1
|
22 |
+
elif base == 'T':
|
23 |
+
oh[i, 3] = 1
|
24 |
+
return oh
|
25 |
+
|
26 |
def create_block(
|
27 |
d_model,
|
28 |
ssm_cfg=None,
|