diff --git a/.gitignore b/.gitignore
index 100ab8861c846a556d7540e1fe2ab4cd72ad5e00..a8de302f89b0af22888ac234e3b0d1a6f208a591 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,5 @@ __pycache__/
#eval-queue-bk/
#eval-results-bk/
logs/
+.idea/
+
diff --git a/.idea/OmniGenomeLeaderboard.iml b/.idea/OmniGenomeLeaderboard.iml
new file mode 100644
index 0000000000000000000000000000000000000000..ec63674cd7f4d511fb06cd63eaeba166d6bc0dd8
--- /dev/null
+++ b/.idea/OmniGenomeLeaderboard.iml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000000000000000000000000000000000000..5366c82fe56453c92209a591340f4ded829c1441
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,88 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000000000000000000000000000000000000..a5fe07b7a5ab8aa8bd5661a6ae8a7cc36d74e052
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000000000000000000000000000000000000..35eb1ddfbbc029bcab630581847471d7f238ec53
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index f4557d4f6e138a5677cdffeefbdff11ef0e0c204..b99212d75d144c01b2ab00e0dda6e6238f4d79ff 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -5,8 +5,57 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -26,10 +75,10 @@
+
-
@@ -94,7 +143,9 @@
-
+
+
+
@@ -134,6 +185,6 @@
-
+
\ No newline at end of file
diff --git a/app.py b/app.py
index 22109eb72f0047457e17bf86e3d1934c9bc5144d..7e18c3fa4b51124de631038d7ba61a77a8fa32c3 100644
--- a/app.py
+++ b/app.py
@@ -35,31 +35,30 @@ def restart_space():
### Space initialisation
-
-try:
- print(EVAL_REQUESTS_PATH)
- snapshot_download(
- repo_id=QUEUE_REPO,
- local_dir=EVAL_REQUESTS_PATH,
- repo_type="dataset",
- tqdm_class=None,
- etag_timeout=30,
- token=TOKEN,
- )
-except Exception:
- restart_space()
-try:
- print(EVAL_RESULTS_PATH)
- snapshot_download(
- repo_id=RESULTS_REPO,
- local_dir=EVAL_RESULTS_PATH,
- repo_type="dataset",
- tqdm_class=None,
- etag_timeout=30,
- token=TOKEN,
- )
-except Exception:
- restart_space()
+# try:
+# print(EVAL_REQUESTS_PATH)
+# snapshot_download(
+# repo_id=QUEUE_REPO,
+# local_dir=EVAL_REQUESTS_PATH,
+# repo_type="dataset",
+# tqdm_class=None,
+# etag_timeout=30,
+# token=TOKEN,
+# )
+# except Exception:
+# restart_space()
+# try:
+# print(EVAL_RESULTS_PATH)
+# snapshot_download(
+# repo_id=RESULTS_REPO,
+# local_dir=EVAL_RESULTS_PATH,
+# repo_type="dataset",
+# tqdm_class=None,
+# etag_timeout=30,
+# token=TOKEN,
+# )
+# except Exception:
+# restart_space()
RGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/RGB/", EVAL_REQUESTS_PATH+"/RGB/", RGB_COLS, RGB_BENCHMARK_COLS)
PGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/PGB/", EVAL_REQUESTS_PATH+"/PGB/", PGB_COLS, PGB_BENCHMARK_COLS)
diff --git a/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
new file mode 100644
index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc
--- /dev/null
+++ b/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
@@ -0,0 +1 @@
+{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"}
\ No newline at end of file
diff --git a/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a
--- /dev/null
+++ b/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json
@@ -0,0 +1 @@
+{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"}
\ No newline at end of file
diff --git a/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
new file mode 100644
index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc
--- /dev/null
+++ b/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
@@ -0,0 +1 @@
+{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"}
\ No newline at end of file
diff --git a/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a
--- /dev/null
+++ b/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json
@@ -0,0 +1 @@
+{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"}
\ No newline at end of file
diff --git a/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
new file mode 100644
index 0000000000000000000000000000000000000000..2471a5c9364821d4d97616d03ed65047aaac92fc
--- /dev/null
+++ b/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
@@ -0,0 +1 @@
+{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"}
\ No newline at end of file
diff --git a/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json b/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ea7d38471486d3d8f8e0df988dfd653e456f51a
--- /dev/null
+++ b/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json
@@ -0,0 +1 @@
+{"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"}
\ No newline at end of file
diff --git a/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json b/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
new file mode 100644
index 0000000000000000000000000000000000000000..b253088dd1195bd80fc63e2184ecd9231d02cc14
--- /dev/null
+++ b/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json
@@ -0,0 +1 @@
+{"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 100, "license": "custom"}
\ No newline at end of file
diff --git a/eval-results/GB/3UTRBERT.json b/eval-results/GB/3UTRBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce357a624ac6d43f25bc2ecdbcc16c11fe9787e3
--- /dev/null
+++ b/eval-results/GB/3UTRBERT.json
@@ -0,0 +1,48 @@
+{
+ "config":{
+ "model":"multimolecule/utrbert-4mer",
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/utrbert-4mer",
+ "model_sha":"main"
+ },
+ "results":{
+ "DEM":{
+ "F1":0.8950
+ },
+ "DOW":{
+ "F1":0.9022
+ },
+ "DRE":{
+ "F1":0.7435
+ },
+ "DME":{
+ "F1":0.8014
+ },
+ "HCE":{
+ "F1":0.7023
+ },
+ "HEE":{
+ "F1":0.7633
+ },
+ "HRE":{
+ "F1":0.9847
+ },
+ "HNP":{
+ "F1":0.8249
+ },
+ "HOR":{
+ "F1":0.6678
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GB/Caduceus.json b/eval-results/GB/Caduceus.json
new file mode 100644
index 0000000000000000000000000000000000000000..4240022d6b17dd8de28fc9e7dc9c5a1873760630
--- /dev/null
+++ b/eval-results/GB/Caduceus.json
@@ -0,0 +1,48 @@
+{
+ "config":{
+ "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
+ "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
+ "model_sha":"main"
+ },
+ "results":{
+ "DEM":{
+ "F1":0.9213
+ },
+ "DOW":{
+ "F1":0.9474
+ },
+ "DRE":{
+ "F1":0.7203
+ },
+ "DME":{
+ "F1":0.7561
+ },
+ "HCE":{
+ "F1":0.7020
+ },
+ "HEE":{
+ "F1":0.7647
+ },
+ "HRE":{
+ "F1":0.7916
+ },
+ "HNP":{
+ "F1":0.8436
+ },
+ "HOR":{
+ "F1":0.6317
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GB/DNABERT-2-117M.json b/eval-results/GB/DNABERT-2-117M.json
new file mode 100644
index 0000000000000000000000000000000000000000..dc6e4754bd33a1a1da61ab44d2c2bbb87dc1b96a
--- /dev/null
+++ b/eval-results/GB/DNABERT-2-117M.json
@@ -0,0 +1,48 @@
+{
+ "config":{
+ "model":"zhihan1996/DNABERT-2-117M",
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"zhihan1996/DNABERT-2-117M",
+ "model_sha":"main"
+ },
+ "results":{
+ "DEM":{
+ "F1":0.9267
+ },
+ "DOW":{
+ "F1":0.9517
+ },
+ "DRE":{
+ "F1":0.4377
+ },
+ "DME":{
+ "F1":0.7721
+ },
+ "HCE":{
+ "F1":0.7558
+ },
+ "HEE":{
+ "F1":0.8066
+ },
+ "HRE":{
+ "F1":0.7814
+ },
+ "HNP":{
+ "F1":0.8580
+ },
+ "HOR":{
+ "F1":0.6803
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GB/HyenaDNA.json b/eval-results/GB/HyenaDNA.json
new file mode 100644
index 0000000000000000000000000000000000000000..e16541883eeac9d04b2bff41acbb2800fbb61d17
--- /dev/null
+++ b/eval-results/GB/HyenaDNA.json
@@ -0,0 +1,48 @@
+{
+ "config":{
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_sha":"main"
+ },
+ "results":{
+ "DEM":{
+ "F1":0.8821
+ },
+ "DOW":{
+ "F1":0.9413
+ },
+ "DRE":{
+ "F1":0.7011
+ },
+ "DME":{
+ "F1":0.7644
+ },
+ "HCE":{
+ "F1":0.7038
+ },
+ "HEE":{
+ "F1":0.7958
+ },
+ "HRE":{
+ "F1":0.9633
+ },
+ "HNP":{
+ "F1":0.8599
+ },
+ "HOR":{
+ "F1":0.6703
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GB/NT-V2-100M.json b/eval-results/GB/NT-V2-100M.json
new file mode 100644
index 0000000000000000000000000000000000000000..04e8c8e3026dee318fba5ff3f68d2b32f1cfe7f7
--- /dev/null
+++ b/eval-results/GB/NT-V2-100M.json
@@ -0,0 +1,48 @@
+{
+ "config":{
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_sha":"main"
+ },
+ "results":{
+ "DEM":{
+ "F1":0.9166
+ },
+ "DOW":{
+ "F1":0.9432
+ },
+ "DRE":{
+ "F1":0.7820
+ },
+ "DME":{
+ "F1":0.8172
+ },
+ "HCE":{
+ "F1":0.7198
+ },
+ "HEE":{
+ "F1":0.7985
+ },
+ "HRE":{
+ "F1":0.9330
+ },
+ "HNP":{
+ "F1":0.8530
+ },
+ "HOR":{
+ "F1":0.6853
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GB/OmniGenome186M.json b/eval-results/GB/OmniGenome186M.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f53aeb07e97ab71bbc8f78e16fa87545fb0c43c
--- /dev/null
+++ b/eval-results/GB/OmniGenome186M.json
@@ -0,0 +1,48 @@
+{
+ "config":{
+ "model":"yangheng/omnigenome-186M",
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"yangheng/omnigenome-186M",
+ "model_sha":"main"
+ },
+ "results":{
+ "DEM":{
+ "F1":0.9416
+ },
+ "DOW":{
+ "F1":0.9349
+ },
+ "DRE":{
+ "F1":0.7717
+ },
+ "DME":{
+ "F1":0.8034
+ },
+ "HCE":{
+ "F1":0.7351
+ },
+ "HEE":{
+ "F1":0.8223
+ },
+ "HRE":{
+ "F1":0.9566
+ },
+ "HNP":{
+ "F1":0.8787
+ },
+ "HOR":{
+ "F1":0.6897
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GB/SpliceBERT.json b/eval-results/GB/SpliceBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..ea4b1c7c66306ef5379832c7558d16a550a55561
--- /dev/null
+++ b/eval-results/GB/SpliceBERT.json
@@ -0,0 +1,48 @@
+{
+ "config":{
+ "model":"multimolecule/splicebert",
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/splicebert",
+ "model_sha":"main"
+ },
+ "results":{
+ "DEM":{
+ "F1":0.9472
+ },
+ "DOW":{
+ "F1":0.9642
+ },
+ "DRE":{
+ "F1":0.7229
+ },
+ "DME":{
+ "F1":0.7470
+ },
+ "HCE":{
+ "F1":0.7350
+ },
+ "HEE":{
+ "F1":0.7960
+ },
+ "HRE":{
+ "F1":0.9523
+ },
+ "HNP":{
+ "F1":0.8957
+ },
+ "HOR":{
+ "F1":0.6889
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GUE/3UTRBERT.json b/eval-results/GUE/3UTRBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..95d23ee36786021be7ec587e5b278036700807fc
--- /dev/null
+++ b/eval-results/GUE/3UTRBERT.json
@@ -0,0 +1,42 @@
+{
+ "config":{
+ "model":"multimolecule/utrbert-4mer",
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/utrbert-4mer",
+ "model_sha":"main"
+ },
+ "results":{
+ "Yeast EMP":{
+ "F1":0.7189
+ },
+ "Mouse TF-M":{
+ "F1":0.7146
+ },
+ "Virus CVC":{
+ "F1":0.6871
+ },
+ "Human TF-H":{
+ "F1":0.7485
+ },
+ "Human PD":{
+ "F1":0.8237
+ },
+ "Human CPD":{
+ "F1":0.9051
+ },
+ "Human SSP":{
+ "F1":0.8195
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GUE/Caduceus.json b/eval-results/GUE/Caduceus.json
new file mode 100644
index 0000000000000000000000000000000000000000..5efcd074500e32eb68a3417305640a6e65ad74bb
--- /dev/null
+++ b/eval-results/GUE/Caduceus.json
@@ -0,0 +1,42 @@
+{
+ "config":{
+ "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
+ "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
+ "model_sha":"main"
+ },
+ "results":{
+ "Yeast EMP":{
+ "F1":0.7349
+ },
+ "Mouse TF-M":{
+ "F1":0.7818
+ },
+ "Virus CVC":{
+ "F1":0.4909
+ },
+ "Human TF-H":{
+ "F1":0.7956
+ },
+ "Human PD":{
+ "F1":0.8913
+ },
+ "Human CPD":{
+ "F1":0.8509
+ },
+ "Human SSP":{
+ "F1":0.8182
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GUE/DNABERT-2-117M.json b/eval-results/GUE/DNABERT-2-117M.json
new file mode 100644
index 0000000000000000000000000000000000000000..9ec4062de7f854995b73467107515d49ae9c4fb6
--- /dev/null
+++ b/eval-results/GUE/DNABERT-2-117M.json
@@ -0,0 +1,42 @@
+{
+ "config":{
+ "model":"zhihan1996/DNABERT-2-117M",
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"zhihan1996/DNABERT-2-117M",
+ "model_sha":"main"
+ },
+ "results":{
+ "Yeast EMP":{
+ "F1":0.7585
+ },
+ "Mouse TF-M":{
+ "F1":0.8623
+ },
+ "Virus CVC":{
+ "F1":0.6890
+ },
+ "Human TF-H":{
+ "F1":0.8180
+ },
+ "Human PD":{
+ "F1":0.9017
+ },
+ "Human CPD":{
+ "F1":0.8257
+ },
+ "Human SSP":{
+ "F1":0.8521
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GUE/HyenaDNA.json b/eval-results/GUE/HyenaDNA.json
new file mode 100644
index 0000000000000000000000000000000000000000..2e8f09165601c1cccda8918d51e6785afef7cc75
--- /dev/null
+++ b/eval-results/GUE/HyenaDNA.json
@@ -0,0 +1,42 @@
+{
+ "config":{
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_sha":"main"
+ },
+ "results":{
+ "Yeast EMP":{
+ "F1":0.7308
+ },
+ "Mouse TF-M":{
+ "F1":0.7344
+ },
+ "Virus CVC":{
+ "F1":0.6637
+ },
+ "Human TF-H":{
+ "F1":0.7762
+ },
+ "Human PD":{
+ "F1":0.9119
+ },
+ "Human CPD":{
+ "F1":0.8431
+ },
+ "Human SSP":{
+ "F1":0.8334
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GUE/NT-V2-100M.json b/eval-results/GUE/NT-V2-100M.json
new file mode 100644
index 0000000000000000000000000000000000000000..2b3a39d97cf155bf7888c9cdd54c03c6bddfd8b9
--- /dev/null
+++ b/eval-results/GUE/NT-V2-100M.json
@@ -0,0 +1,42 @@
+{
+ "config":{
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_sha":"main"
+ },
+ "results":{
+ "Yeast EMP":{
+ "F1":0.7493
+ },
+ "Mouse TF-M":{
+ "F1":0.7810
+ },
+ "Virus CVC":{
+ "F1":0.5923
+ },
+ "Human TF-H":{
+ "F1":0.7912
+ },
+ "Human PD":{
+ "F1":0.9087
+ },
+ "Human CPD":{
+ "F1":0.8470
+ },
+ "Human SSP":{
+ "F1":0.8413
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GUE/OmniGenome186M.json b/eval-results/GUE/OmniGenome186M.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a8f30ed4659430d7155800e51113eb113cea5ea
--- /dev/null
+++ b/eval-results/GUE/OmniGenome186M.json
@@ -0,0 +1,42 @@
+{
+ "config":{
+ "model":"yangheng/omnigenome-186M",
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"yangheng/omnigenome-186M",
+ "model_sha":"main"
+ },
+ "results":{
+ "Yeast EMP":{
+ "F1":0.7851
+ },
+ "Mouse TF-M":{
+ "F1":0.8472
+ },
+ "Virus CVC":{
+ "F1":0.7472
+ },
+ "Human TF-H":{
+ "F1":0.8173
+ },
+ "Human PD":{
+ "F1":0.9004
+ },
+ "Human CPD":{
+ "F1":0.8522
+ },
+ "Human SSP":{
+ "F1":0.9039
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/GUE/SpliceBERT.json b/eval-results/GUE/SpliceBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..e68ed40e24cf3d44c7e36b5e0126543abdc237ac
--- /dev/null
+++ b/eval-results/GUE/SpliceBERT.json
@@ -0,0 +1,42 @@
+{
+ "config":{
+ "model":"multimolecule/splicebert",
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/splicebert",
+ "model_sha":"main"
+ },
+ "results":{
+ "Yeast EMP":{
+ "F1":0.7766
+ },
+ "Mouse TF-M":{
+ "F1":0.8497
+ },
+ "Virus CVC":{
+ "F1":0.5624
+ },
+ "Human TF-H":{
+ "F1":0.8277
+ },
+ "Human PD":{
+ "F1":0.9224
+ },
+ "Human CPD":{
+ "F1":0.8396
+ },
+ "Human SSP":{
+ "F1":0.9381
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/3UTRBERT.json b/eval-results/PGB/3UTRBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..095b1f90d8ed20191ab55b722ed6a4ec48c26e48
--- /dev/null
+++ b/eval-results/PGB/3UTRBERT.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"multimolecule/utrbert-4mer",
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/utrbert-4mer",
+ "model_sha":"main"
+ },
+"results":{
+ "PolyA":{
+ "F1":0.7648
+ },
+ "LncRNA":{
+ "F1":0.7075
+ },
+ "Chrom Acc":{
+ "F1":0.6371
+ },
+ "Prom Str":{
+ "RMSE":1.04
+ },
+ "Term Str":{
+ "RMSE":0.36
+ },
+ "Splice":{
+ "F1":0.9444
+ },
+ "Gene Exp":{
+ "RMSE":14.87
+ },
+ "Enhancer":{
+ "F1":0.7167
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/Agro-NT.json b/eval-results/PGB/Agro-NT.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c1cf5acde27b681f67c9ca8e23b5bd3b9c6e63e
--- /dev/null
+++ b/eval-results/PGB/Agro-NT.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"InstaDeepAI/agro-nucleotide-transformer-1b",
+ "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b",
+ "model_sha":"main"
+ },
+"results":{
+ "PolyA":{
+ "F1":0.7889
+ },
+ "LncRNA":{
+ "F1":0.6724
+ },
+ "Chrom Acc":{
+ "F1":0.6327
+ },
+ "Prom Str":{
+ "RMSE":0.94
+ },
+ "Term Str":{
+ "RMSE":0.78
+ },
+ "Splice":{
+ "F1":0.8845
+ },
+ "Gene Exp":{
+ "RMSE":15.56
+ },
+ "Enhancer":{
+ "F1":0.6283
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/CDSBERT.json b/eval-results/PGB/CDSBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..92365f0f58bc175190a3ff95cbc24d2093494f09
--- /dev/null
+++ b/eval-results/PGB/CDSBERT.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"GleghornLab/cdsBERT",
+ "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"},
+ "model_dtype":"bfloat16",
+ "model_name":"GleghornLab/cdsBERT",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.3972
+ },
+ "LncRNA":{
+ "F1":0.3306
+ },
+ "Chrom Acc":{
+ "F1":0.4895
+ },
+ "Prom Str":{
+ "RMSE":2.19
+ },
+ "Term Str":{
+ "RMSE":0.59
+ },
+ "Splice":{
+ "F1":0.5220
+ },
+ "Gene Exp":{
+ "RMSE":14.77
+ },
+ "Enhancer":{
+ "F1":0.3393
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/Caduceus.json b/eval-results/PGB/Caduceus.json
new file mode 100644
index 0000000000000000000000000000000000000000..80cfc4a525127aff7dbc5d0a71224d432319bbc6
--- /dev/null
+++ b/eval-results/PGB/Caduceus.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
+ "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.7089
+ },
+ "LncRNA":{
+ "F1":0.6840
+ },
+ "Chrom Acc":{
+ "F1":0.6453
+ },
+ "Prom Str":{
+ "RMSE":0.91
+ },
+ "Term Str":{
+ "RMSE":0.26
+ },
+ "Splice":{
+ "F1":0.7951
+ },
+ "Gene Exp":{
+ "RMSE":14.72
+ },
+ "Enhancer":{
+ "F1":0.6083
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/DNABERT-2-117M.json b/eval-results/PGB/DNABERT-2-117M.json
new file mode 100644
index 0000000000000000000000000000000000000000..3906893d2ce05c38ed29efe43ba07f466bdc0e58
--- /dev/null
+++ b/eval-results/PGB/DNABERT-2-117M.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"zhihan1996/DNABERT-2-117M",
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"zhihan1996/DNABERT-2-117M",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.4135
+ },
+ "LncRNA":{
+ "F1":0.7255
+ },
+ "Chrom Acc":{
+ "F1":0.6149
+ },
+ "Prom Str":{
+ "RMSE":0.99
+ },
+ "Term Str":{
+ "RMSE":0.24
+ },
+ "Splice":{
+ "F1":0.4534
+ },
+ "Gene Exp":{
+ "RMSE":14.78
+ },
+ "Enhancer":{
+ "F1":0.3640
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/HyenaDNA.json b/eval-results/PGB/HyenaDNA.json
new file mode 100644
index 0000000000000000000000000000000000000000..af033dd0115d50a3bdebed9eb98038b3850d3c41
--- /dev/null
+++ b/eval-results/PGB/HyenaDNA.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.8311
+ },
+ "LncRNA":{
+ "F1":0.5821
+ },
+ "Chrom Acc":{
+ "F1":0.5220
+ },
+ "Prom Str":{
+ "RMSE":0.88
+ },
+ "Term Str":{
+ "RMSE":0.26
+ },
+ "Splice":{
+ "F1":0.9028
+ },
+ "Gene Exp":{
+ "RMSE":14.76
+ },
+ "Enhancer":{
+ "F1":0.6617
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/NT-V2-100M.json b/eval-results/PGB/NT-V2-100M.json
new file mode 100644
index 0000000000000000000000000000000000000000..9dbcaf073a15ce2d3e1c89b16d7b65d074e97a89
--- /dev/null
+++ b/eval-results/PGB/NT-V2-100M.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.7126
+ },
+ "LncRNA":{
+ "F1":0.7308
+ },
+ "Chrom Acc":{
+ "F1":0.6571
+ },
+ "Prom Str":{
+ "RMSE":0.81
+ },
+ "Term Str":{
+ "RMSE":0.27
+ },
+ "Splice":{
+ "F1":0.9505
+ },
+ "Gene Exp":{
+ "RMSE":14.69
+ },
+ "Enhancer":{
+ "F1":0.7389
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/OmniGenome186M.json b/eval-results/PGB/OmniGenome186M.json
new file mode 100644
index 0000000000000000000000000000000000000000..06b0c51eead662453dcfb7fe0c41ae7a8297046b
--- /dev/null
+++ b/eval-results/PGB/OmniGenome186M.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"yangheng/omnigenome-186M",
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"yangheng/omnigenome-186M",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.8755
+ },
+ "LncRNA":{
+ "F1":0.7796
+ },
+ "Chrom Acc":{
+ "F1":0.6769
+ },
+ "Prom Str":{
+ "RMSE":0.59
+ },
+ "Term Str":{
+ "RMSE":0.18
+ },
+ "Splice":{
+ "F1":0.9841
+ },
+ "Gene Exp":{
+ "RMSE":14.71
+ },
+ "Enhancer":{
+ "F1":0.7977
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/RNA-BERT.json b/eval-results/PGB/RNA-BERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..dc87b9183fd0d44c57fc5f39596f9670be1d7b88
--- /dev/null
+++ b/eval-results/PGB/RNA-BERT.json
@@ -0,0 +1,46 @@
+{
+ "config":{
+ "model":"multimolecule/rnabert",
+ "model_args":"pretrained=multimolecule/rnabert,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"0.48M", "Pretraining Data":"76,237 human ncRNA sequences", "Species":"Human", "Nucleic Acid":"ncRNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/rnabert",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.7854
+ },
+ "LncRNA":{
+ "F1":0.6199
+ },
+ "Chrom Acc":{
+ "F1":0.4894
+ },
+ "Prom Str":{
+ "RMSE":1.81
+ },
+ "Term Str":{
+ "RMSE":0.38
+ },
+ "Splice":{
+ "F1":0.9445
+ },
+ "Gene Exp":{
+ "RMSE":14.89
+ },
+ "Enhancer":{
+ "F1":0.5761
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/RNA-FM.json b/eval-results/PGB/RNA-FM.json
new file mode 100644
index 0000000000000000000000000000000000000000..73cc0c1f5125e649358e73058bb3310733f164d3
--- /dev/null
+++ b/eval-results/PGB/RNA-FM.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"multimolecule/rnafm",
+ "model_args":"pretrained=multimolecule/rnafm,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"99.52M", "Pretraining Data":"23.7 million non-redundant RNA sequences", "Species":"Multi-Species", "Nucleic Acid":"ncRNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/rnafm",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.8494
+ },
+ "LncRNA":{
+ "F1":0.6875
+ },
+ "Chrom Acc":{
+ "F1":0.5492
+ },
+ "Prom Str":{
+ "RMSE":0.95
+ },
+ "Term Str":{
+ "RMSE":0.27
+ },
+ "Splice":{
+ "F1":0.9595
+ },
+ "Gene Exp":{
+ "RMSE":14.83
+ },
+ "Enhancer":{
+ "F1":0.5714
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/RNA-MSM.json b/eval-results/PGB/RNA-MSM.json
new file mode 100644
index 0000000000000000000000000000000000000000..282ad3a11ed1f200ccd93c78fcd961d94c4364de
--- /dev/null
+++ b/eval-results/PGB/RNA-MSM.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"multimolecule/rnamsm",
+ "model_args":"pretrained=multimolecule/rnamsm,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"96.5M", "Pretraining Data":"3,932 RNA families", "Species":"Multi-Species", "Nucleic Acid":"RNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/rnamsm",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.8425
+ },
+ "LncRNA":{
+ "F1":0.6749
+ },
+ "Chrom Acc":{
+ "F1":0.5352
+ },
+ "Prom Str":{
+ "RMSE":1.28
+ },
+ "Term Str":{
+ "RMSE":0.28
+ },
+ "Splice":{
+ "F1":0.9549
+ },
+ "Gene Exp":{
+ "RMSE":14.87
+ },
+ "Enhancer":{
+ "F1":0.6145
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/PGB/SpliceBERT.json b/eval-results/PGB/SpliceBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..f52106504f5a96e28e24a29e9ef7019115a7f8ab
--- /dev/null
+++ b/eval-results/PGB/SpliceBERT.json
@@ -0,0 +1,45 @@
+{
+ "config":{
+ "model":"multimolecule/splicebert",
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/splicebert",
+ "model_sha":"main"
+ },
+ "results":{
+ "PolyA":{
+ "F1":0.6523
+ },
+ "LncRNA":{
+ "F1":0.7188
+ },
+ "Chrom Acc":{
+ "F1":0.6362
+ },
+ "Prom Str":{
+ "RMSE":0.75
+ },
+ "Term Str":{
+ "RMSE":0.22
+ },
+ "Splice":{
+ "F1":0.9645
+ },
+ "Gene Exp":{
+ "RMSE":14.70
+ },
+ "Enhancer":{
+ "F1":0.6971
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/3UTRBERT.json b/eval-results/RGB/yangheng/3UTRBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..1adec75827e33722413afddd29d504c61349c815
--- /dev/null
+++ b/eval-results/RGB/yangheng/3UTRBERT.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"multimolecule/utrbert-4mer",
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/utrbert-4mer",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.7772
+ },
+ "SNMD":{
+ "AUC":0.5002
+ },
+ "SNMR":{
+ "F1":0.2401
+ },
+ "ArchiveII":{
+ "F1":0.7898
+ },
+ "bpRNA":{
+ "F1":0.5693
+ },
+ "RNAStralign":{
+ "F1":0.9203
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/Agro-NT.json b/eval-results/RGB/yangheng/Agro-NT.json
new file mode 100644
index 0000000000000000000000000000000000000000..846f30ec63c16f794989b0db1a154c59e1f3697c
--- /dev/null
+++ b/eval-results/RGB/yangheng/Agro-NT.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"InstaDeepAI/agro-nucleotide-transformer-1b",
+ "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.7830
+ },
+ "SNMD":{
+ "AUC":0.4999
+ },
+ "SNMR":{
+ "F1":0.2638
+ },
+ "ArchiveII":{
+ "F1":0.7013
+ },
+ "bpRNA":{
+ "F1":0.4871
+ },
+ "RNAStralign":{
+ "F1":0.7521
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/CDSBERT.json b/eval-results/RGB/yangheng/CDSBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c4b2d0bacaf8c5ada6c773d2f2fbf5ed1b05fa4
--- /dev/null
+++ b/eval-results/RGB/yangheng/CDSBERT.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"GleghornLab/cdsBERT",
+ "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"},
+ "model_dtype":"bfloat16",
+ "model_name":"GleghornLab/cdsBERT",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.7468
+ },
+ "SNMD":{
+ "AUC":0.5503
+ },
+ "SNMR":{
+ "F1":0.3616
+ },
+ "ArchiveII":{
+ "F1":0.8934
+ },
+ "bpRNA":{
+ "F1":0.7001
+ },
+ "RNAStralign":{
+ "F1":0.9715
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/DNABERT-2-117M.json b/eval-results/RGB/yangheng/DNABERT-2-117M.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5420bb518b4cc9bbfaddbc533c30c0d9f49bb06
--- /dev/null
+++ b/eval-results/RGB/yangheng/DNABERT-2-117M.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"zhihan1996/DNABERT-2-117M",
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"zhihan1996/DNABERT-2-117M",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.8158
+ },
+ "SNMD":{
+ "AUC":0.4994
+ },
+ "SNMR":{
+ "F1":0.1586
+ },
+ "ArchiveII":{
+ "F1":0.5982
+ },
+ "bpRNA":{
+ "F1":0.4340
+ },
+ "RNAStralign":{
+ "F1":0.6549
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/HyenaDNA.json b/eval-results/RGB/yangheng/HyenaDNA.json
new file mode 100644
index 0000000000000000000000000000000000000000..64a6a4fc545031813e11311068ee7011518c6410
--- /dev/null
+++ b/eval-results/RGB/yangheng/HyenaDNA.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.8056
+ },
+ "SNMD":{
+ "AUC":0.5332
+ },
+ "SNMR":{
+ "F1":0.3980
+ },
+ "ArchiveII":{
+ "F1":0.8423
+ },
+ "bpRNA":{
+ "F1":0.5662
+ },
+ "RNAStralign":{
+ "F1":0.9542
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/NT-V2-100M.json b/eval-results/RGB/yangheng/NT-V2-100M.json
new file mode 100644
index 0000000000000000000000000000000000000000..f22a639787974b5149a8cd30360c249cf9409922
--- /dev/null
+++ b/eval-results/RGB/yangheng/NT-V2-100M.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.7826
+ },
+ "SNMD":{
+ "AUC":0.5049
+ },
+ "SNMR":{
+ "F1":0.2601
+ },
+ "ArchiveII":{
+ "F1":0.7990
+ },
+ "bpRNA":{
+ "F1":0.5660
+ },
+ "RNAStralign":{
+ "F1":0.9084
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/OmniGenome186M.json b/eval-results/RGB/yangheng/OmniGenome186M.json
new file mode 100644
index 0000000000000000000000000000000000000000..ec7663dc48867a7bc15c972ae4c11fba5fdb5e37
--- /dev/null
+++ b/eval-results/RGB/yangheng/OmniGenome186M.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"yangheng/omnigenome-186M",
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"yangheng/omnigenome-186M",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.7164
+ },
+ "SNMD":{
+ "AUC":0.6381
+ },
+ "SNMR":{
+ "F1":0.4980
+ },
+ "ArchiveII":{
+ "F1":0.9520
+ },
+ "bpRNA":{
+ "F1":0.8248
+ },
+ "RNAStralign":{
+ "F1":0.9912
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/SpliceBERT.json b/eval-results/RGB/yangheng/SpliceBERT.json
new file mode 100644
index 0000000000000000000000000000000000000000..8ce43c5282635c6aabfd4a5e3b31435f198451af
--- /dev/null
+++ b/eval-results/RGB/yangheng/SpliceBERT.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"multimolecule/splicebert",
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
+ "model_dtype":"bfloat16",
+ "model_name":"multimolecule/splicebert",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.7340
+ },
+ "SNMD":{
+ "AUC":0.5811
+ },
+ "SNMR":{
+ "F1":0.4644
+ },
+ "ArchiveII":{
+ "F1":0.8905
+ },
+ "bpRNA":{
+ "F1":0.6910
+ },
+ "RNAStralign":{
+ "F1":0.9697
+ }
+ }
+}
\ No newline at end of file
diff --git a/eval-results/RGB/yangheng/results_OmniGenome-52M.json b/eval-results/RGB/yangheng/results_OmniGenome-52M.json
new file mode 100644
index 0000000000000000000000000000000000000000..c786546cc3bf47cc0eb0b208341a403f97563f90
--- /dev/null
+++ b/eval-results/RGB/yangheng/results_OmniGenome-52M.json
@@ -0,0 +1,39 @@
+{
+ "config":{
+ "model":"yangheng/omnigenome-52M",
+ "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16",
+ "num_fewshot":0,
+ "batch_size":1,
+ "batch_sizes":[
+
+ ],
+ "device":"cpu",
+ "no_cache":true,
+ "limit":20,
+ "bootstrap_iters":100000,
+ "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
+ "model_dtype":"bfloat16",
+ "model_name":"yangheng/omnigenome-52M",
+ "model_sha":"main"
+ },
+ "results":{
+ "mRNA":{
+ "RMSE":0.7191
+ },
+ "SNMD":{
+ "AUC":0.6244
+ },
+ "SNMR":{
+ "F1":0.4891
+ },
+ "ArchiveII":{
+ "F1":0.9498
+ },
+ "bpRNA":{
+ "F1":0.8234
+ },
+ "RNAStralign":{
+ "F1":0.9901
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/about.py b/src/about.py
index 348baefe458b126cc1ca329799718eb5cd21984d..ac3f0503e17d506b729aee323e479d173b9c2c8e 100644
--- a/src/about.py
+++ b/src/about.py
@@ -111,10 +111,10 @@ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@article{Yang2024,
author = {Yang, Heng and Li, Ke},
- title = {Foundation Models Work},
+ title = {OmniGenome: Aligning RNA Sequences with Secondary Structures in Genomic Foundation Models},
journal = {arXiv},
year = {2024},
- note = {arXiv preprint arXiv:XXXX.XXXXX}
- url = {https://arxiv.org/abs/XXXX.XXXXX}
+ note = {arXiv preprint arXiv:2407.11242}
+ url = {https://arxiv.org/abs/2407.11242}
}
"""