Philip Fradkin commited on
Commit
bc2395e
·
1 Parent(s): 02a4c26

feat: update 4track model

Browse files
Files changed (16) hide show
  1. README.md +1 -1
  2. models/orthrus_base_4_track/epoch=18-step=20000.ckpt +2 -2
  3. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=1-step=2000.ckpt_ridge.csv +13 -0
  4. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=10-step=12000.ckpt_ridge.csv +13 -0
  5. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=12-step=14000.ckpt_ridge.csv +13 -0
  6. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=14-step=16000.ckpt_ridge.csv +13 -0
  7. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=16-step=18000.ckpt_ridge.csv +13 -0
  8. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv +13 -0
  9. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=3-step=4000.ckpt_ridge.csv +13 -0
  10. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=5-step=6000.ckpt_ridge.csv +13 -0
  11. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=7-step=8000.ckpt_ridge.csv +13 -0
  12. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=9-step=10000.ckpt_ridge.csv +13 -0
  13. models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv +0 -13
  14. models/orthrus_base_4_track/model_config.json +1 -1
  15. notebooks/colab_prototye.ipynb +2 -1
  16. src/model.py +15 -0
README.md CHANGED
@@ -84,7 +84,7 @@ Here is example code
84
  # Sequence for short mRNA
85
  > seq=(
86
  'TCATCTGGATTATACATATTTCGCAATGAAAGAGAGGAAGAAAAGGAAGCAGCAAAATATGTGGAGGCCCA'
87
- 'ACAAAAGAGACTAGAAGCCTTATTCACTAAAATTCAGGAGGAATTTGAAGAACATGAAGTTACTTCCTCC
88
  'ACTGAAGTCTTGAACCCCCCAAAGTCATCCATGAGGGTTGGAATCAACTTCTGAAAACACAACAAAACCA'
89
  'TATTTACCATCACGTGCACTAACAAGACAGCAAGTTCGTGCTTTGCAAGATGGTGCAGAGCTTTATGAAG'
90
  'CAGTGAAGAATGCAGCAGACCCAGCTTACCTTGAGGGTTATTTCAGTGAAGAGCAGTTAAGAGCCTTGAA'
 
84
  # Sequence for short mRNA
85
  > seq=(
86
  'TCATCTGGATTATACATATTTCGCAATGAAAGAGAGGAAGAAAAGGAAGCAGCAAAATATGTGGAGGCCCA'
87
+ 'ACAAAAGAGACTAGAAGCCTTATTCACTAAAATTCAGGAGGAATTTGAAGAACATGAAGTTACTTCCTCC'
88
  'ACTGAAGTCTTGAACCCCCCAAAGTCATCCATGAGGGTTGGAATCAACTTCTGAAAACACAACAAAACCA'
89
  'TATTTACCATCACGTGCACTAACAAGACAGCAAGTTCGTGCTTTGCAAGATGGTGCAGAGCTTTATGAAG'
90
  'CAGTGAAGAATGCAGCAGACCCAGCTTACCTTGAGGGTTATTTCAGTGAAGAGCAGTTAAGAGCCTTGAA'
models/orthrus_base_4_track/epoch=18-step=20000.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58c38f75c8d253db481ddc0c910adaf8ae4d88ee42a993fc72b07e3ce1657302
3
- size 21415988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157d60e0471bff00023376a4b29a1746b407ae919506baf2cf40e88d0009f8ce
3
+ size 21409844
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=1-step=2000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_roc_auc,val,0.8220142525972354,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_auprc,val,0.4625396927514182,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.41187612954069797,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.4650231114299552,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.7519351051859768,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.3815569357098566,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_roc_auc,val,0.8196707035908555,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,logistic,val_auprc,val,0.3503672527264725,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.7118696793044624,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.508566975266374,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_loss,val,0.7589341222101531,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=1-step=2000.ckpt,ridge,val_r,val,0.47631361227244867,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=10-step=12000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_roc_auc,val,0.8433483249915402,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_auprc,val,0.5132275016061479,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.3924153336558653,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.5055500181171051,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.7462168153619977,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.3901367108773842,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_roc_auc,val,0.825789615510407,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,logistic,val_auprc,val,0.3624099056153837,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.6772307183584606,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.541695841911757,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_loss,val,0.7251545422958905,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=10-step=12000.ckpt,ridge,val_r,val,0.5109326806495391,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=12-step=14000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_roc_auc,val,0.8421522962237182,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_auprc,val,0.5078808348506777,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.38159363235269134,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.5210525632055019,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.7449572004778382,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.39210775201429937,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_roc_auc,val,0.8253978288124967,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,logistic,val_auprc,val,0.35978864383790576,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.6759492269289844,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.5430234153021989,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_loss,val,0.7187604645312344,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=12-step=14000.ckpt,ridge,val_r,val,0.5170162511471106,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=14-step=16000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_roc_auc,val,0.8453107717715747,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_auprc,val,0.51388271248823,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.38352797937787375,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.5187762652770216,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.7464369347298561,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.38989871864513437,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_roc_auc,val,0.825373574980663,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,logistic,val_auprc,val,0.3606313916724556,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.6729050049584757,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.5458388693481175,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_loss,val,0.7104695665288715,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=14-step=16000.ckpt,ridge,val_r,val,0.5250332145677902,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=16-step=18000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_roc_auc,val,0.8440424572851378,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_auprc,val,0.5131805187647871,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.38003889341798525,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.523477567510083,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.7444410742413161,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.3930851174944151,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_roc_auc,val,0.8259117384044564,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,logistic,val_auprc,val,0.36113756335833136,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.6744663045562795,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.5444030097422119,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_loss,val,0.7113535602097519,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=16-step=18000.ckpt,ridge,val_r,val,0.5241967682295874,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.8446671447835594,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.5141468969910018,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.3816430082054893,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.520855720912494,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.7437523843416108,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.39403218950488417,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.825946043024202,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.3617639617322508,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.6739744006625423,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.544860645002531,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.7105890797238071,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.5249681390195272,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=3-step=4000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_roc_auc,val,0.8334827448623479,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_auprc,val,0.48054142074091,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.3937403979375475,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.5039354128857179,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.7527737213090051,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.3806671640445604,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_roc_auc,val,0.8213135611439488,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,logistic,val_auprc,val,0.35596875648549375,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.7108028779272146,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.5099485164455663,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_loss,val,0.7471981375686599,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=3-step=4000.ckpt,ridge,val_r,val,0.48843724229011504,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=5-step=6000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_roc_auc,val,0.8378129434997147,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_auprc,val,0.4930322732919934,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.42145942081505877,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.46362110810253776,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.7539863277031761,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.3786947841967137,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_roc_auc,val,0.822611781448217,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,logistic,val_auprc,val,0.3550260538018792,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.6853506444183269,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.5338677108741229,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_loss,val,0.7403810703891033,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=5-step=6000.ckpt,ridge,val_r,val,0.49591840377848506,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=7-step=8000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_roc_auc,val,0.838934161283643,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_auprc,val,0.499202623569385,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.41299386356832335,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.47458154750742787,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.7506913875148472,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.3835651031902195,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_roc_auc,val,0.8234223639111674,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,logistic,val_auprc,val,0.35690103176614196,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.6861524146069266,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.5330125034795364,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_loss,val,0.734732230977574,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=7-step=8000.ckpt,ridge,val_r,val,0.5014295583249777,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=9-step=10000.ckpt_ridge.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_roc_auc,val,0.8419237597159581,
3
+ go_mf,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_auprc,val,0.5067915793618881,
4
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.3812646126826232,
5
+ mrfp,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.5231334426937999,
6
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.7419690874136367,
7
+ mrl,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.39632707952585045,
8
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_roc_auc,val,0.8258357171939567,
9
+ protein_loc,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,logistic,val_auprc,val,0.3602220156610885,
10
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.6739133283342498,
11
+ hl_human,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.5449821329643417,
12
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_loss,val,0.7232799553052773,
13
+ hl_mouse,ssm_4t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=9-step=10000.ckpt,ridge,val_r,val,0.5127369847559508,
models/orthrus_base_4_track/lp_results/homo_lp_results_ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria_epoch=18-step=20000.ckpt_ridge.csv DELETED
@@ -1,13 +0,0 @@
1
- dataset,run_name,ckpt_name,lin_model,metric,split,mean_value,std_value
2
- go_mf,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.8478578174637246,
3
- go_mf,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.5095139671888503,
4
- mrfp,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.3546131256685185,
5
- mrfp,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.5707018777098897,
6
- mrl,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.7273312686835766,
7
- mrl,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.41711144859054816,
8
- protein_loc,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_roc_auc,val,0.8337499171489103,
9
- protein_loc,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,logistic,val_auprc,val,0.3868738259700707,
10
- hl_human,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.5510596183223497,
11
- hl_human,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.6528813736053247,
12
- hl_mouse,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_loss,val,0.5930341784262118,
13
- hl_mouse,ssm_6t_3_256_lr0.001_wd1e-05_mask0.15_splice_all_basic_eutheria,epoch=18-step=20000.ckpt,ridge,val_r,val,0.6290704100935987,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/orthrus_base_4_track/model_config.json CHANGED
@@ -1 +1 @@
1
- {"model_class": "ssm", "ssm_model_dim": 256, "ssm_n_layers": 3, "n_tracks": 6, "predict_splice_codon": false, "predict_masked": false}
 
1
+ {"model_class": "ssm", "ssm_model_dim": 256, "ssm_n_layers": 3, "n_tracks": 4, "predict_splice_codon": false, "predict_masked": false}
notebooks/colab_prototye.ipynb CHANGED
@@ -204,7 +204,8 @@
204
  " t (gk.Transcript): The transcript object.\n",
205
  " \"\"\"\n",
206
  " seq = \"\".join([genome.dna(exon) for exon in t.exons])\n",
207
- " oh = seq_to_oh(seq)\n",
 
208
  " return oh\n",
209
  "\n",
210
  "def create_six_track_encoding(t, channels_last=False):\n",
 
204
  " t (gk.Transcript): The transcript object.\n",
205
  " \"\"\"\n",
206
  " seq = \"\".join([genome.dna(exon) for exon in t.exons])\n",
207
+ " oh = \n",
208
+ " _oh(seq)\n",
209
  " return oh\n",
210
  "\n",
211
  "def create_six_track_encoding(t, channels_last=False):\n",
src/model.py CHANGED
@@ -4,10 +4,25 @@ import os
4
  import json
5
  import torch
6
  import torch.nn as nn
 
7
 
8
  from mamba_ssm.modules.mamba_simple import Mamba, Block
9
  from huggingface_hub import PyTorchModelHubMixin
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def create_block(
12
  d_model,
13
  ssm_cfg=None,
 
4
  import json
5
  import torch
6
  import torch.nn as nn
7
+ import numpy as np
8
 
9
  from mamba_ssm.modules.mamba_simple import Mamba, Block
10
  from huggingface_hub import PyTorchModelHubMixin
11
 
12
+ # convert to one hot
13
+ def seq_to_oh(seq):
14
+ oh = np.zeros((len(seq), 4), dtype=int)
15
+ for i, base in enumerate(seq):
16
+ if base == 'A':
17
+ oh[i, 0] = 1
18
+ elif base == 'C':
19
+ oh[i, 1] = 1
20
+ elif base == 'G':
21
+ oh[i, 2] = 1
22
+ elif base == 'T':
23
+ oh[i, 3] = 1
24
+ return oh
25
+
26
  def create_block(
27
  d_model,
28
  ssm_cfg=None,