yangheng commited on
Commit
e867ca2
1 Parent(s): 729b3cc
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +2 -0
  2. .idea/OmniGenomeLeaderboard.iml +7 -0
  3. .idea/inspectionProfiles/Project_Default.xml +88 -0
  4. .idea/inspectionProfiles/profiles_settings.xml +6 -0
  5. .idea/misc.xml +6 -0
  6. .idea/vcs.xml +6 -0
  7. .idea/workspace.xml +56 -5
  8. app.py +24 -25
  9. eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json +1 -0
  10. eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json +1 -0
  11. eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json +1 -0
  12. eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json +1 -0
  13. eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json +1 -0
  14. eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json +1 -0
  15. eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json +1 -0
  16. eval-results/GB/3UTRBERT.json +48 -0
  17. eval-results/GB/Caduceus.json +48 -0
  18. eval-results/GB/DNABERT-2-117M.json +48 -0
  19. eval-results/GB/HyenaDNA.json +48 -0
  20. eval-results/GB/NT-V2-100M.json +48 -0
  21. eval-results/GB/OmniGenome186M.json +48 -0
  22. eval-results/GB/SpliceBERT.json +48 -0
  23. eval-results/GUE/3UTRBERT.json +42 -0
  24. eval-results/GUE/Caduceus.json +42 -0
  25. eval-results/GUE/DNABERT-2-117M.json +42 -0
  26. eval-results/GUE/HyenaDNA.json +42 -0
  27. eval-results/GUE/NT-V2-100M.json +42 -0
  28. eval-results/GUE/OmniGenome186M.json +42 -0
  29. eval-results/GUE/SpliceBERT.json +42 -0
  30. eval-results/PGB/3UTRBERT.json +45 -0
  31. eval-results/PGB/Agro-NT.json +45 -0
  32. eval-results/PGB/CDSBERT.json +45 -0
  33. eval-results/PGB/Caduceus.json +45 -0
  34. eval-results/PGB/DNABERT-2-117M.json +45 -0
  35. eval-results/PGB/HyenaDNA.json +45 -0
  36. eval-results/PGB/NT-V2-100M.json +45 -0
  37. eval-results/PGB/OmniGenome186M.json +45 -0
  38. eval-results/PGB/RNA-BERT.json +46 -0
  39. eval-results/PGB/RNA-FM.json +45 -0
  40. eval-results/PGB/RNA-MSM.json +45 -0
  41. eval-results/PGB/SpliceBERT.json +45 -0
  42. eval-results/RGB/yangheng/3UTRBERT.json +39 -0
  43. eval-results/RGB/yangheng/Agro-NT.json +39 -0
  44. eval-results/RGB/yangheng/CDSBERT.json +39 -0
  45. eval-results/RGB/yangheng/DNABERT-2-117M.json +39 -0
  46. eval-results/RGB/yangheng/HyenaDNA.json +39 -0
  47. eval-results/RGB/yangheng/NT-V2-100M.json +39 -0
  48. eval-results/RGB/yangheng/OmniGenome186M.json +39 -0
  49. eval-results/RGB/yangheng/SpliceBERT.json +39 -0
  50. eval-results/RGB/yangheng/results_OmniGenome-52M.json +39 -0
.gitignore CHANGED
@@ -11,3 +11,5 @@ __pycache__/
11
  #eval-queue-bk/
12
  #eval-results-bk/
13
  logs/
 
 
 
11
  #eval-queue-bk/
12
  #eval-results-bk/
13
  logs/
14
+ .idea/
15
+
.idea/OmniGenomeLeaderboard.iml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module version="4">
3
+ <component name="PyDocumentationSettings">
4
+ <option name="format" value="PLAIN" />
5
+ <option name="myDocStringFormat" value="Plain" />
6
+ </component>
7
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
5
+ <Languages>
6
+ <language minSize="54" name="Python" />
7
+ </Languages>
8
+ </inspection_tool>
9
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
10
+ <option name="ignoredPackages">
11
+ <value>
12
+ <list size="70">
13
+ <item index="0" class="java.lang.String" itemvalue="ftfy" />
14
+ <item index="1" class="java.lang.String" itemvalue="gensim" />
15
+ <item index="2" class="java.lang.String" itemvalue="diffusers" />
16
+ <item index="3" class="java.lang.String" itemvalue="tensorflow_text" />
17
+ <item index="4" class="java.lang.String" itemvalue="tensorflow" />
18
+ <item index="5" class="java.lang.String" itemvalue="TextAttack" />
19
+ <item index="6" class="java.lang.String" itemvalue="tensorflow_hub" />
20
+ <item index="7" class="java.lang.String" itemvalue="pyabsa" />
21
+ <item index="8" class="java.lang.String" itemvalue="protobuf" />
22
+ <item index="9" class="java.lang.String" itemvalue="networkx" />
23
+ <item index="10" class="java.lang.String" itemvalue="update-checker" />
24
+ <item index="11" class="java.lang.String" itemvalue="boostaug" />
25
+ <item index="12" class="java.lang.String" itemvalue="pandas" />
26
+ <item index="13" class="java.lang.String" itemvalue="termcolor" />
27
+ <item index="14" class="java.lang.String" itemvalue="metric-visualizer" />
28
+ <item index="15" class="java.lang.String" itemvalue="spacy" />
29
+ <item index="16" class="java.lang.String" itemvalue="seqeval" />
30
+ <item index="17" class="java.lang.String" itemvalue="autocuda" />
31
+ <item index="18" class="java.lang.String" itemvalue="sentencepiece" />
32
+ <item index="19" class="java.lang.String" itemvalue="findfile" />
33
+ <item index="20" class="java.lang.String" itemvalue="gitpython" />
34
+ <item index="21" class="java.lang.String" itemvalue="pytorch_warmup" />
35
+ <item index="22" class="java.lang.String" itemvalue="torchtext" />
36
+ <item index="23" class="java.lang.String" itemvalue="googledrivedownloader" />
37
+ <item index="24" class="java.lang.String" itemvalue="opennmt-py" />
38
+ <item index="25" class="java.lang.String" itemvalue="opencv-contrib-python" />
39
+ <item index="26" class="java.lang.String" itemvalue="timm" />
40
+ <item index="27" class="java.lang.String" itemvalue="pytorch_lightning" />
41
+ <item index="28" class="java.lang.String" itemvalue="invisible-watermark" />
42
+ <item index="29" class="java.lang.String" itemvalue="tqdm" />
43
+ <item index="30" class="java.lang.String" itemvalue="tokenizers" />
44
+ <item index="31" class="java.lang.String" itemvalue="gradio" />
45
+ <item index="32" class="java.lang.String" itemvalue="scikit-learn" />
46
+ <item index="33" class="java.lang.String" itemvalue="nltk" />
47
+ <item index="34" class="java.lang.String" itemvalue="arxiv" />
48
+ <item index="35" class="java.lang.String" itemvalue="imblearn" />
49
+ <item index="36" class="java.lang.String" itemvalue="packaging" />
50
+ <item index="37" class="java.lang.String" itemvalue="setuptools" />
51
+ <item index="38" class="java.lang.String" itemvalue="numpy" />
52
+ <item index="39" class="java.lang.String" itemvalue="requests" />
53
+ <item index="40" class="java.lang.String" itemvalue="nlpaug" />
54
+ <item index="41" class="java.lang.String" itemvalue="yacs" />
55
+ <item index="42" class="java.lang.String" itemvalue="tensorboardX" />
56
+ <item index="43" class="java.lang.String" itemvalue="rouge" />
57
+ <item index="44" class="java.lang.String" itemvalue="datasets" />
58
+ <item index="45" class="java.lang.String" itemvalue="transformers" />
59
+ <item index="46" class="java.lang.String" itemvalue="typing_extensions" />
60
+ <item index="47" class="java.lang.String" itemvalue="torch" />
61
+ <item index="48" class="java.lang.String" itemvalue="cmudict" />
62
+ <item index="49" class="java.lang.String" itemvalue="pykakasi" />
63
+ <item index="50" class="java.lang.String" itemvalue="fastapi" />
64
+ <item index="51" class="java.lang.String" itemvalue="rouge-chinese" />
65
+ <item index="52" class="java.lang.String" itemvalue="peft" />
66
+ <item index="53" class="java.lang.String" itemvalue="uvicorn" />
67
+ <item index="54" class="java.lang.String" itemvalue="sse-starlette" />
68
+ <item index="55" class="java.lang.String" itemvalue="trl" />
69
+ <item index="56" class="java.lang.String" itemvalue="tiktoken" />
70
+ <item index="57" class="java.lang.String" itemvalue="scipy" />
71
+ <item index="58" class="java.lang.String" itemvalue="pydantic" />
72
+ <item index="59" class="java.lang.String" itemvalue="jieba" />
73
+ <item index="60" class="java.lang.String" itemvalue="matplotlib" />
74
+ <item index="61" class="java.lang.String" itemvalue="transformers_stream_generator" />
75
+ <item index="62" class="java.lang.String" itemvalue="accelerate" />
76
+ <item index="63" class="java.lang.String" itemvalue="optimum" />
77
+ <item index="64" class="java.lang.String" itemvalue="auto-gptq" />
78
+ <item index="65" class="java.lang.String" itemvalue="bitsandbytes" />
79
+ <item index="66" class="java.lang.String" itemvalue="deepspeed" />
80
+ <item index="67" class="java.lang.String" itemvalue="evaluate" />
81
+ <item index="68" class="java.lang.String" itemvalue="tensorboardx" />
82
+ <item index="69" class="java.lang.String" itemvalue="sklearn" />
83
+ </list>
84
+ </value>
85
+ </option>
86
+ </inspection_tool>
87
+ </profile>
88
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="C:\Users\chuan\miniconda3" />
5
+ </component>
6
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
.idea/workspace.xml CHANGED
@@ -5,8 +5,57 @@
5
  </component>
6
  <component name="ChangeListManager">
7
  <list default="true" id="4dc9d937-d789-48c5-9ba5-fe08d01bc11f" name="Changes" comment="">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  <change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
 
9
  <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
 
10
  </list>
11
  <option name="SHOW_DIALOG" value="false" />
12
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -26,10 +75,10 @@
26
  </component>
27
  <component name="PropertiesComponent"><![CDATA[{
28
  "keyToString": {
29
- "Python.app.executor": "Debug",
30
  "RunOnceActivity.ShowReadmeOnStart": "true",
31
  "git-widget-placeholder": "main",
32
- "last_opened_file_path": "C:/Users/chuan/OneDrive - University of Exeter/AIProjects/OmniGenomeLeaderboard/eval-queue",
33
  "node.js.detected.package.eslint": "true",
34
  "node.js.detected.package.tslint": "true",
35
  "node.js.selected.package.eslint": "(autodetect)",
@@ -40,9 +89,9 @@
40
  }]]></component>
41
  <component name="RecentsManager">
42
  <key name="CopyFile.RECENT_KEYS">
 
43
  <recent name="C:\Users\chuan\OneDrive - University of Exeter\AIProjects\OmniGenomeLeaderboard\eval-queue" />
44
  <recent name="C:\Users\chuan\OneDrive - University of Exeter\AIProjects\OmniGenomeLeaderboard\eval-queue\.cache\huggingface\download\GB\InstaDeepAI" />
45
- <recent name="C:\Users\chuan\OneDrive - University of Exeter\AIProjects\OmniGenomeLeaderboard" />
46
  </key>
47
  </component>
48
  <component name="RunManager">
@@ -94,7 +143,9 @@
94
  <workItem from="1726143220520" duration="3239000" />
95
  <workItem from="1726146471494" duration="2362000" />
96
  <workItem from="1726223901200" duration="2056000" />
97
- <workItem from="1726434553745" duration="4460000" />
 
 
98
  </task>
99
  <servers />
100
  </component>
@@ -134,6 +185,6 @@
134
  </breakpoint-manager>
135
  </component>
136
  <component name="com.intellij.coverage.CoverageDataManagerImpl">
137
- <SUITE FILE_PATH="coverage/OmniGenomeLeaderboard$app.coverage" NAME="app Coverage Results" MODIFIED="1726486285408" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
138
  </component>
139
  </project>
 
5
  </component>
6
  <component name="ChangeListManager">
7
  <list default="true" id="4dc9d937-d789-48c5-9ba5-fe08d01bc11f" name="Changes" comment="">
8
+ <change afterPath="$PROJECT_DIR$/.idea/OmniGenomeLeaderboard.iml" afterDir="false" />
9
+ <change afterPath="$PROJECT_DIR$/.idea/inspectionProfiles/Project_Default.xml" afterDir="false" />
10
+ <change afterPath="$PROJECT_DIR$/.idea/inspectionProfiles/profiles_settings.xml" afterDir="false" />
11
+ <change afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
12
+ <change afterPath="$PROJECT_DIR$/.idea/vcs.xml" afterDir="false" />
13
+ <change afterPath="$PROJECT_DIR$/eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json" afterDir="false" />
14
+ <change afterPath="$PROJECT_DIR$/eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json" afterDir="false" />
15
+ <change afterPath="$PROJECT_DIR$/eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json" afterDir="false" />
16
+ <change afterPath="$PROJECT_DIR$/eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json" afterDir="false" />
17
+ <change afterPath="$PROJECT_DIR$/eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json" afterDir="false" />
18
+ <change afterPath="$PROJECT_DIR$/eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json" afterDir="false" />
19
+ <change afterPath="$PROJECT_DIR$/eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json" afterDir="false" />
20
+ <change afterPath="$PROJECT_DIR$/eval-results/GB/3UTRBERT.json" afterDir="false" />
21
+ <change afterPath="$PROJECT_DIR$/eval-results/GB/Caduceus.json" afterDir="false" />
22
+ <change afterPath="$PROJECT_DIR$/eval-results/GB/DNABERT-2-117M.json" afterDir="false" />
23
+ <change afterPath="$PROJECT_DIR$/eval-results/GB/HyenaDNA.json" afterDir="false" />
24
+ <change afterPath="$PROJECT_DIR$/eval-results/GB/NT-V2-100M.json" afterDir="false" />
25
+ <change afterPath="$PROJECT_DIR$/eval-results/GB/OmniGenome186M.json" afterDir="false" />
26
+ <change afterPath="$PROJECT_DIR$/eval-results/GB/SpliceBERT.json" afterDir="false" />
27
+ <change afterPath="$PROJECT_DIR$/eval-results/GUE/3UTRBERT.json" afterDir="false" />
28
+ <change afterPath="$PROJECT_DIR$/eval-results/GUE/Caduceus.json" afterDir="false" />
29
+ <change afterPath="$PROJECT_DIR$/eval-results/GUE/DNABERT-2-117M.json" afterDir="false" />
30
+ <change afterPath="$PROJECT_DIR$/eval-results/GUE/HyenaDNA.json" afterDir="false" />
31
+ <change afterPath="$PROJECT_DIR$/eval-results/GUE/NT-V2-100M.json" afterDir="false" />
32
+ <change afterPath="$PROJECT_DIR$/eval-results/GUE/OmniGenome186M.json" afterDir="false" />
33
+ <change afterPath="$PROJECT_DIR$/eval-results/GUE/SpliceBERT.json" afterDir="false" />
34
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/3UTRBERT.json" afterDir="false" />
35
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/Agro-NT.json" afterDir="false" />
36
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/CDSBERT.json" afterDir="false" />
37
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/Caduceus.json" afterDir="false" />
38
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/DNABERT-2-117M.json" afterDir="false" />
39
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/HyenaDNA.json" afterDir="false" />
40
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/NT-V2-100M.json" afterDir="false" />
41
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/OmniGenome186M.json" afterDir="false" />
42
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/RNA-BERT.json" afterDir="false" />
43
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/RNA-FM.json" afterDir="false" />
44
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/RNA-MSM.json" afterDir="false" />
45
+ <change afterPath="$PROJECT_DIR$/eval-results/PGB/SpliceBERT.json" afterDir="false" />
46
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/3UTRBERT.json" afterDir="false" />
47
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/Agro-NT.json" afterDir="false" />
48
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/CDSBERT.json" afterDir="false" />
49
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/DNABERT-2-117M.json" afterDir="false" />
50
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/HyenaDNA.json" afterDir="false" />
51
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/NT-V2-100M.json" afterDir="false" />
52
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/OmniGenome186M.json" afterDir="false" />
53
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/SpliceBERT.json" afterDir="false" />
54
+ <change afterPath="$PROJECT_DIR$/eval-results/RGB/yangheng/results_OmniGenome-52M.json" afterDir="false" />
55
  <change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
56
+ <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
57
  <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
58
+ <change beforePath="$PROJECT_DIR$/src/about.py" beforeDir="false" afterPath="$PROJECT_DIR$/src/about.py" afterDir="false" />
59
  </list>
60
  <option name="SHOW_DIALOG" value="false" />
61
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
 
75
  </component>
76
  <component name="PropertiesComponent"><![CDATA[{
77
  "keyToString": {
78
+ "Python.app.executor": "Run",
79
  "RunOnceActivity.ShowReadmeOnStart": "true",
80
  "git-widget-placeholder": "main",
81
+ "last_opened_file_path": "C:/Users/chuan/OneDrive - University of Exeter/AIProjects/OmniGenomeLeaderboard",
82
  "node.js.detected.package.eslint": "true",
83
  "node.js.detected.package.tslint": "true",
84
  "node.js.selected.package.eslint": "(autodetect)",
 
89
  }]]></component>
90
  <component name="RecentsManager">
91
  <key name="CopyFile.RECENT_KEYS">
92
+ <recent name="C:\Users\chuan\OneDrive - University of Exeter\AIProjects\OmniGenomeLeaderboard" />
93
  <recent name="C:\Users\chuan\OneDrive - University of Exeter\AIProjects\OmniGenomeLeaderboard\eval-queue" />
94
  <recent name="C:\Users\chuan\OneDrive - University of Exeter\AIProjects\OmniGenomeLeaderboard\eval-queue\.cache\huggingface\download\GB\InstaDeepAI" />
 
95
  </key>
96
  </component>
97
  <component name="RunManager">
 
143
  <workItem from="1726143220520" duration="3239000" />
144
  <workItem from="1726146471494" duration="2362000" />
145
  <workItem from="1726223901200" duration="2056000" />
146
+ <workItem from="1726434553745" duration="6270000" />
147
+ <workItem from="1726515056209" duration="632000" />
148
+ <workItem from="1726687104634" duration="322000" />
149
  </task>
150
  <servers />
151
  </component>
 
185
  </breakpoint-manager>
186
  </component>
187
  <component name="com.intellij.coverage.CoverageDataManagerImpl">
188
+ <SUITE FILE_PATH="coverage/OmniGenomeLeaderboard$app.coverage" NAME="app Coverage Results" MODIFIED="1726687258224" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
189
  </component>
190
  </project>
app.py CHANGED
@@ -35,31 +35,30 @@ def restart_space():
35
 
36
 
37
  ### Space initialisation
38
-
39
- try:
40
- print(EVAL_REQUESTS_PATH)
41
- snapshot_download(
42
- repo_id=QUEUE_REPO,
43
- local_dir=EVAL_REQUESTS_PATH,
44
- repo_type="dataset",
45
- tqdm_class=None,
46
- etag_timeout=30,
47
- token=TOKEN,
48
- )
49
- except Exception:
50
- restart_space()
51
- try:
52
- print(EVAL_RESULTS_PATH)
53
- snapshot_download(
54
- repo_id=RESULTS_REPO,
55
- local_dir=EVAL_RESULTS_PATH,
56
- repo_type="dataset",
57
- tqdm_class=None,
58
- etag_timeout=30,
59
- token=TOKEN,
60
- )
61
- except Exception:
62
- restart_space()
63
 
64
  RGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/RGB/", EVAL_REQUESTS_PATH+"/RGB/", RGB_COLS, RGB_BENCHMARK_COLS)
65
  PGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/PGB/", EVAL_REQUESTS_PATH+"/PGB/", PGB_COLS, PGB_BENCHMARK_COLS)
 
35
 
36
 
37
  ### Space initialisation
38
+ # try:
39
+ # print(EVAL_REQUESTS_PATH)
40
+ # snapshot_download(
41
+ # repo_id=QUEUE_REPO,
42
+ # local_dir=EVAL_REQUESTS_PATH,
43
+ # repo_type="dataset",
44
+ # tqdm_class=None,
45
+ # etag_timeout=30,
46
+ # token=TOKEN,
47
+ # )
48
+ # except Exception:
49
+ # restart_space()
50
+ # try:
51
+ # print(EVAL_RESULTS_PATH)
52
+ # snapshot_download(
53
+ # repo_id=RESULTS_REPO,
54
+ # local_dir=EVAL_RESULTS_PATH,
55
+ # repo_type="dataset",
56
+ # tqdm_class=None,
57
+ # etag_timeout=30,
58
+ # token=TOKEN,
59
+ # )
60
+ # except Exception:
61
+ # restart_space()
 
62
 
63
  RGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/RGB/", EVAL_REQUESTS_PATH+"/RGB/", RGB_COLS, RGB_BENCHMARK_COLS)
64
  PGB_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH+"/PGB/", EVAL_REQUESTS_PATH+"/PGB/", PGB_COLS, PGB_BENCHMARK_COLS)
eval-queue/GB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"}
eval-queue/GB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"}
eval-queue/GUE/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"}
eval-queue/GUE/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"}
eval-queue/PGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 96, "license": "custom"}
eval-queue/PGB/kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16_eval_request_False_bfloat16_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 7.73, "license": "custom"}
eval-queue/RGB/InstaDeepAI/nucleotide-transformer-v2-100m-multi-species_eval_request_False_bfloat16_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", "base_model": "", "revision": "main", "private": false, "precision": "bfloat16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 100, "license": "custom"}
eval-results/GB/3UTRBERT.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/utrbert-4mer",
4
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/utrbert-4mer",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "DEM":{
21
+ "F1":0.8950
22
+ },
23
+ "DOW":{
24
+ "F1":0.9022
25
+ },
26
+ "DRE":{
27
+ "F1":0.7435
28
+ },
29
+ "DME":{
30
+ "F1":0.8014
31
+ },
32
+ "HCE":{
33
+ "F1":0.7023
34
+ },
35
+ "HEE":{
36
+ "F1":0.7633
37
+ },
38
+ "HRE":{
39
+ "F1":0.9847
40
+ },
41
+ "HNP":{
42
+ "F1":0.8249
43
+ },
44
+ "HOR":{
45
+ "F1":0.6678
46
+ }
47
+ }
48
+ }
eval-results/GB/Caduceus.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
4
+ "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "DEM":{
21
+ "F1":0.9213
22
+ },
23
+ "DOW":{
24
+ "F1":0.9474
25
+ },
26
+ "DRE":{
27
+ "F1":0.7203
28
+ },
29
+ "DME":{
30
+ "F1":0.7561
31
+ },
32
+ "HCE":{
33
+ "F1":0.7020
34
+ },
35
+ "HEE":{
36
+ "F1":0.7647
37
+ },
38
+ "HRE":{
39
+ "F1":0.7916
40
+ },
41
+ "HNP":{
42
+ "F1":0.8436
43
+ },
44
+ "HOR":{
45
+ "F1":0.6317
46
+ }
47
+ }
48
+ }
eval-results/GB/DNABERT-2-117M.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"zhihan1996/DNABERT-2-117M",
4
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"zhihan1996/DNABERT-2-117M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "DEM":{
21
+ "F1":0.9267
22
+ },
23
+ "DOW":{
24
+ "F1":0.9517
25
+ },
26
+ "DRE":{
27
+ "F1":0.4377
28
+ },
29
+ "DME":{
30
+ "F1":0.7721
31
+ },
32
+ "HCE":{
33
+ "F1":0.7558
34
+ },
35
+ "HEE":{
36
+ "F1":0.8066
37
+ },
38
+ "HRE":{
39
+ "F1":0.7814
40
+ },
41
+ "HNP":{
42
+ "F1":0.8580
43
+ },
44
+ "HOR":{
45
+ "F1":0.6803
46
+ }
47
+ }
48
+ }
eval-results/GB/HyenaDNA.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
4
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "DEM":{
21
+ "F1":0.8821
22
+ },
23
+ "DOW":{
24
+ "F1":0.9413
25
+ },
26
+ "DRE":{
27
+ "F1":0.7011
28
+ },
29
+ "DME":{
30
+ "F1":0.7644
31
+ },
32
+ "HCE":{
33
+ "F1":0.7038
34
+ },
35
+ "HEE":{
36
+ "F1":0.7958
37
+ },
38
+ "HRE":{
39
+ "F1":0.9633
40
+ },
41
+ "HNP":{
42
+ "F1":0.8599
43
+ },
44
+ "HOR":{
45
+ "F1":0.6703
46
+ }
47
+ }
48
+ }
eval-results/GB/NT-V2-100M.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
4
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "DEM":{
21
+ "F1":0.9166
22
+ },
23
+ "DOW":{
24
+ "F1":0.9432
25
+ },
26
+ "DRE":{
27
+ "F1":0.7820
28
+ },
29
+ "DME":{
30
+ "F1":0.8172
31
+ },
32
+ "HCE":{
33
+ "F1":0.7198
34
+ },
35
+ "HEE":{
36
+ "F1":0.7985
37
+ },
38
+ "HRE":{
39
+ "F1":0.9330
40
+ },
41
+ "HNP":{
42
+ "F1":0.8530
43
+ },
44
+ "HOR":{
45
+ "F1":0.6853
46
+ }
47
+ }
48
+ }
eval-results/GB/OmniGenome186M.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"yangheng/omnigenome-186M",
4
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"yangheng/omnigenome-186M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "DEM":{
21
+ "F1":0.9416
22
+ },
23
+ "DOW":{
24
+ "F1":0.9349
25
+ },
26
+ "DRE":{
27
+ "F1":0.7717
28
+ },
29
+ "DME":{
30
+ "F1":0.8034
31
+ },
32
+ "HCE":{
33
+ "F1":0.7351
34
+ },
35
+ "HEE":{
36
+ "F1":0.8223
37
+ },
38
+ "HRE":{
39
+ "F1":0.9566
40
+ },
41
+ "HNP":{
42
+ "F1":0.8787
43
+ },
44
+ "HOR":{
45
+ "F1":0.6897
46
+ }
47
+ }
48
+ }
eval-results/GB/SpliceBERT.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/splicebert",
4
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/splicebert",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "DEM":{
21
+ "F1":0.9472
22
+ },
23
+ "DOW":{
24
+ "F1":0.9642
25
+ },
26
+ "DRE":{
27
+ "F1":0.7229
28
+ },
29
+ "DME":{
30
+ "F1":0.7470
31
+ },
32
+ "HCE":{
33
+ "F1":0.7350
34
+ },
35
+ "HEE":{
36
+ "F1":0.7960
37
+ },
38
+ "HRE":{
39
+ "F1":0.9523
40
+ },
41
+ "HNP":{
42
+ "F1":0.8957
43
+ },
44
+ "HOR":{
45
+ "F1":0.6889
46
+ }
47
+ }
48
+ }
eval-results/GUE/3UTRBERT.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/utrbert-4mer",
4
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/utrbert-4mer",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "Yeast EMP":{
21
+ "F1":0.7189
22
+ },
23
+ "Mouse TF-M":{
24
+ "F1":0.7146
25
+ },
26
+ "Virus CVC":{
27
+ "F1":0.6871
28
+ },
29
+ "Human TF-H":{
30
+ "F1":0.7485
31
+ },
32
+ "Human PD":{
33
+ "F1":0.8237
34
+ },
35
+ "Human CPD":{
36
+ "F1":0.9051
37
+ },
38
+ "Human SSP":{
39
+ "F1":0.8195
40
+ }
41
+ }
42
+ }
eval-results/GUE/Caduceus.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
4
+ "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "Yeast EMP":{
21
+ "F1":0.7349
22
+ },
23
+ "Mouse TF-M":{
24
+ "F1":0.7818
25
+ },
26
+ "Virus CVC":{
27
+ "F1":0.4909
28
+ },
29
+ "Human TF-H":{
30
+ "F1":0.7956
31
+ },
32
+ "Human PD":{
33
+ "F1":0.8913
34
+ },
35
+ "Human CPD":{
36
+ "F1":0.8509
37
+ },
38
+ "Human SSP":{
39
+ "F1":0.8182
40
+ }
41
+ }
42
+ }
eval-results/GUE/DNABERT-2-117M.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"zhihan1996/DNABERT-2-117M",
4
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"zhihan1996/DNABERT-2-117M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "Yeast EMP":{
21
+ "F1":0.7585
22
+ },
23
+ "Mouse TF-M":{
24
+ "F1":0.8623
25
+ },
26
+ "Virus CVC":{
27
+ "F1":0.6890
28
+ },
29
+ "Human TF-H":{
30
+ "F1":0.8180
31
+ },
32
+ "Human PD":{
33
+ "F1":0.9017
34
+ },
35
+ "Human CPD":{
36
+ "F1":0.8257
37
+ },
38
+ "Human SSP":{
39
+ "F1":0.8521
40
+ }
41
+ }
42
+ }
eval-results/GUE/HyenaDNA.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
4
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "Yeast EMP":{
21
+ "F1":0.7308
22
+ },
23
+ "Mouse TF-M":{
24
+ "F1":0.7344
25
+ },
26
+ "Virus CVC":{
27
+ "F1":0.6637
28
+ },
29
+ "Human TF-H":{
30
+ "F1":0.7762
31
+ },
32
+ "Human PD":{
33
+ "F1":0.9119
34
+ },
35
+ "Human CPD":{
36
+ "F1":0.8431
37
+ },
38
+ "Human SSP":{
39
+ "F1":0.8334
40
+ }
41
+ }
42
+ }
eval-results/GUE/NT-V2-100M.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
4
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "Yeast EMP":{
21
+ "F1":0.7493
22
+ },
23
+ "Mouse TF-M":{
24
+ "F1":0.7810
25
+ },
26
+ "Virus CVC":{
27
+ "F1":0.5923
28
+ },
29
+ "Human TF-H":{
30
+ "F1":0.7912
31
+ },
32
+ "Human PD":{
33
+ "F1":0.9087
34
+ },
35
+ "Human CPD":{
36
+ "F1":0.8470
37
+ },
38
+ "Human SSP":{
39
+ "F1":0.8413
40
+ }
41
+ }
42
+ }
eval-results/GUE/OmniGenome186M.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"yangheng/omnigenome-186M",
4
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"yangheng/omnigenome-186M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "Yeast EMP":{
21
+ "F1":0.7851
22
+ },
23
+ "Mouse TF-M":{
24
+ "F1":0.8472
25
+ },
26
+ "Virus CVC":{
27
+ "F1":0.7472
28
+ },
29
+ "Human TF-H":{
30
+ "F1":0.8173
31
+ },
32
+ "Human PD":{
33
+ "F1":0.9004
34
+ },
35
+ "Human CPD":{
36
+ "F1":0.8522
37
+ },
38
+ "Human SSP":{
39
+ "F1":0.9039
40
+ }
41
+ }
42
+ }
eval-results/GUE/SpliceBERT.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/splicebert",
4
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/splicebert",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "Yeast EMP":{
21
+ "F1":0.7766
22
+ },
23
+ "Mouse TF-M":{
24
+ "F1":0.8497
25
+ },
26
+ "Virus CVC":{
27
+ "F1":0.5624
28
+ },
29
+ "Human TF-H":{
30
+ "F1":0.8277
31
+ },
32
+ "Human PD":{
33
+ "F1":0.9224
34
+ },
35
+ "Human CPD":{
36
+ "F1":0.8396
37
+ },
38
+ "Human SSP":{
39
+ "F1":0.9381
40
+ }
41
+ }
42
+ }
eval-results/PGB/3UTRBERT.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/utrbert-4mer",
4
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/utrbert-4mer",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.7648
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.7075
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.6371
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":1.04
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.36
34
+ },
35
+ "Splice":{
36
+ "F1":0.9444
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.87
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.7167
43
+ }
44
+ }
45
+ }
eval-results/PGB/Agro-NT.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"InstaDeepAI/agro-nucleotide-transformer-1b",
4
+ "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.7889
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.6724
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.6327
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.94
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.78
34
+ },
35
+ "Splice":{
36
+ "F1":0.8845
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":15.56
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.6283
43
+ }
44
+ }
45
+ }
eval-results/PGB/CDSBERT.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"GleghornLab/cdsBERT",
4
+ "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"GleghornLab/cdsBERT",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.3972
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.3306
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.4895
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":2.19
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.59
34
+ },
35
+ "Splice":{
36
+ "F1":0.5220
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.77
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.3393
43
+ }
44
+ }
45
+ }
eval-results/PGB/Caduceus.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
4
+ "model_args":"pretrained= kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"7.73M", "Pretraining Data":"35 billion nucleotide base pairs", "Species":"Human", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"kuleshov-group/caduceus-ps_seqlen-131k_d_model-256_n_layer-16",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.7089
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.6840
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.6453
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.91
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.26
34
+ },
35
+ "Splice":{
36
+ "F1":0.7951
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.72
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.6083
43
+ }
44
+ }
45
+ }
eval-results/PGB/DNABERT-2-117M.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"zhihan1996/DNABERT-2-117M",
4
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"zhihan1996/DNABERT-2-117M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.4135
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.7255
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.6149
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.99
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.24
34
+ },
35
+ "Splice":{
36
+ "F1":0.4534
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.78
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.3640
43
+ }
44
+ }
45
+ }
eval-results/PGB/HyenaDNA.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
4
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.8311
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.5821
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.5220
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.88
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.26
34
+ },
35
+ "Splice":{
36
+ "F1":0.9028
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.76
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.6617
43
+ }
44
+ }
45
+ }
eval-results/PGB/NT-V2-100M.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
4
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.7126
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.7308
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.6571
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.81
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.27
34
+ },
35
+ "Splice":{
36
+ "F1":0.9505
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.69
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.7389
43
+ }
44
+ }
45
+ }
eval-results/PGB/OmniGenome186M.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"yangheng/omnigenome-186M",
4
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"yangheng/omnigenome-186M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.8755
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.7796
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.6769
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.59
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.18
34
+ },
35
+ "Splice":{
36
+ "F1":0.9841
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.71
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.7977
43
+ }
44
+ }
45
+ }
eval-results/PGB/RNA-BERT.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/rnabert",
4
+ "model_args":"pretrained=multimolecule/rnabert,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"0.48M", "Pretraining Data":"76,237 human ncRNA sequences", "Species":"Human", "Nucleic Acid":"ncRNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/rnabert",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.7854
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.6199
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.4894
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":1.81
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.38
34
+ },
35
+ "Splice":{
36
+ "F1":0.9445
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.89
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.5761
43
+ }
44
+
45
+ }
46
+ }
eval-results/PGB/RNA-FM.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/rnafm",
4
+ "model_args":"pretrained=multimolecule/rnafm,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"99.52M", "Pretraining Data":"23.7 million non-redundant RNA sequences", "Species":"Multi-Species", "Nucleic Acid":"ncRNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/rnafm",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.8494
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.6875
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.5492
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.95
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.27
34
+ },
35
+ "Splice":{
36
+ "F1":0.9595
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.83
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.5714
43
+ }
44
+ }
45
+ }
eval-results/PGB/RNA-MSM.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/rnamsm",
4
+ "model_args":"pretrained=multimolecule/rnamsm,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"96.5M", "Pretraining Data":"3,932 RNA families", "Species":"Multi-Species", "Nucleic Acid":"RNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/rnamsm",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.8425
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.6749
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.5352
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":1.28
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.28
34
+ },
35
+ "Splice":{
36
+ "F1":0.9549
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.87
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.6145
43
+ }
44
+ }
45
+ }
eval-results/PGB/SpliceBERT.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/splicebert",
4
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/splicebert",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "PolyA":{
21
+ "F1":0.6523
22
+ },
23
+ "LncRNA":{
24
+ "F1":0.7188
25
+ },
26
+ "Chrom Acc":{
27
+ "F1":0.6362
28
+ },
29
+ "Prom Str":{
30
+ "RMSE":0.75
31
+ },
32
+ "Term Str":{
33
+ "RMSE":0.22
34
+ },
35
+ "Splice":{
36
+ "F1":0.9645
37
+ },
38
+ "Gene Exp":{
39
+ "RMSE":14.70
40
+ },
41
+ "Enhancer":{
42
+ "F1":0.6971
43
+ }
44
+ }
45
+ }
eval-results/RGB/yangheng/3UTRBERT.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/utrbert-4mer",
4
+ "model_args":"pretrained=multimolecule/utrbert-4mer,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"86M", "Pretraining Data":"20,362 Sequences", "Species":"Multi-Species", "Nucleic Acid":"mRNA 3'UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/utrbert-4mer",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.7772
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.5002
25
+ },
26
+ "SNMR":{
27
+ "F1":0.2401
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.7898
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.5693
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.9203
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/Agro-NT.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"InstaDeepAI/agro-nucleotide-transformer-1b",
4
+ "model_args":"pretrained=InstaDeepAI/agro-nucleotide-transformer-1b,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"985M", "Pretraining Data":"472.5B Tokens", "Species":"48 Edible Plants", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"InstaDeepAI/agro-nucleotide-transformer-1b",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.7830
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.4999
25
+ },
26
+ "SNMR":{
27
+ "F1":0.2638
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.7013
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.4871
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.7521
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/CDSBERT.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"GleghornLab/cdsBERT",
4
+ "model_args":"pretrained=GleghornLab/cdsBERT,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"420M", "Pretraining Data":"4M Sequences", "Species":"4,069 RNA families", "Nucleic Acid":"CDS"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"GleghornLab/cdsBERT",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.7468
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.5503
25
+ },
26
+ "SNMR":{
27
+ "F1":0.3616
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.8934
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.7001
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.9715
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/DNABERT-2-117M.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"zhihan1996/DNABERT-2-117M",
4
+ "model_args":"pretrained=zhihan1996/DNABERT-2-117M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"BPE", "# of Params":"117M", "Pretraining Data":"32.49B Tokens", "Species":"Human + 135 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"zhihan1996/DNABERT-2-117M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.8158
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.4994
25
+ },
26
+ "SNMR":{
27
+ "F1":0.1586
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.5982
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.4340
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.6549
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/HyenaDNA.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"LongSafari/hyenadna-large-1m-seqlen-hf",
4
+ "model_args":"pretrained=LongSafari/hyenadna-large-1m-seqlen-hf,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"47M", "Pretraining Data":"~3.2B Tokens", "Species":"Human", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"LongSafari/hyenadna-large-1m-seqlen-hf",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.8056
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.5332
25
+ },
26
+ "SNMR":{
27
+ "F1":0.3980
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.8423
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.5662
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.9542
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/NT-V2-100M.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
4
+ "model_args":"pretrained=InstaDeepAI/nucleotide-transformer-v2-100m-multi-species,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"k-mers", "# of Params":"96M", "Pretraining Data":"300B Tokens", "Species":"Human + 850 Species", "Nucleic Acid":"DNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.7826
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.5049
25
+ },
26
+ "SNMR":{
27
+ "F1":0.2601
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.7990
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.5660
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.9084
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/OmniGenome186M.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"yangheng/omnigenome-186M",
4
+ "model_args":"pretrained=yangheng/omnigenome-186M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"186M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"yangheng/omnigenome-186M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.7164
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.6381
25
+ },
26
+ "SNMR":{
27
+ "F1":0.4980
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.9520
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.8248
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.9912
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/SpliceBERT.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"multimolecule/splicebert",
4
+ "model_args":"pretrained=multimolecule/splicebert,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"19.7M", "Pretraining Data":"65 billion nucleotides", "Species":"Multi-Species", "Nucleic Acid":"mRNA"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"multimolecule/splicebert",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.7340
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.5811
25
+ },
26
+ "SNMR":{
27
+ "F1":0.4644
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.8905
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.6910
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.9697
37
+ }
38
+ }
39
+ }
eval-results/RGB/yangheng/results_OmniGenome-52M.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config":{
3
+ "model":"yangheng/omnigenome-52M",
4
+ "model_args":"pretrained=yangheng/omnigenome-52M,revision=main,dtype=bfloat16",
5
+ "num_fewshot":0,
6
+ "batch_size":1,
7
+ "batch_sizes":[
8
+
9
+ ],
10
+ "device":"cpu",
11
+ "no_cache":true,
12
+ "limit":20,
13
+ "bootstrap_iters":100000,
14
+ "description_dict":{"Tokenization":"SNT", "# of Params":"52M", "Pretraining Data":"54.2B Tokens", "Species":"1124 Plant Species", "Nucleic Acid":"mRNA, CDS, UTR"},
15
+ "model_dtype":"bfloat16",
16
+ "model_name":"yangheng/omnigenome-52M",
17
+ "model_sha":"main"
18
+ },
19
+ "results":{
20
+ "mRNA":{
21
+ "RMSE":0.7191
22
+ },
23
+ "SNMD":{
24
+ "AUC":0.6244
25
+ },
26
+ "SNMR":{
27
+ "F1":0.4891
28
+ },
29
+ "ArchiveII":{
30
+ "F1":0.9498
31
+ },
32
+ "bpRNA":{
33
+ "F1":0.8234
34
+ },
35
+ "RNAStralign":{
36
+ "F1":0.9901
37
+ }
38
+ }
39
+ }