Zymrael commited on
Commit
578dc72
1 Parent(s): 064dc88

Revert "Upload model"

Browse files

This reverts commit 064dc88b9e971620d3bbea20c58efa2f6a785c59.

config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "_commit_hash": "21ca99cfbcd256fe254352248f086786c2e91c9d",
3
- "_name_or_path": "togethercomputer/evo-1-8k-base",
4
  "architectures": [
5
  "StripedHyenaModelForCausalLM"
6
  ],
@@ -81,7 +81,6 @@
81
  "torch_dtype": "bfloat16",
82
  "transformers_version": null,
83
  "use_cache": true,
84
- "use_flash_attention_2": true,
85
  "use_flash_attn": true,
86
  "use_flash_depthwise": false,
87
  "use_flash_rmsnorm": false,
 
1
  {
2
+ "_commit_hash": "1cc23830f62c268082475776fb449af8428eb703",
3
+ "_name_or_path": "togethercomputer/evo-1-131k-base",
4
  "architectures": [
5
  "StripedHyenaModelForCausalLM"
6
  ],
 
81
  "torch_dtype": "bfloat16",
82
  "transformers_version": null,
83
  "use_cache": true,
 
84
  "use_flash_attn": true,
85
  "use_flash_depthwise": false,
86
  "use_flash_rmsnorm": false,
generation_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
  "_from_model_config": true,
3
- "transformers_version": "4.40.1"
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "transformers_version": "4.36.2"
4
  }
pytorch_model-00001-of-00003.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7287760aa42d8fb677c82727fe8dd184ffea454e16150e6e15b175285e2ef84e
3
- size 4980102165
 
 
 
 
pytorch_model-00002-of-00003.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a6f278e4cf16b659e1ca87439027207c72562f6f60b492318c2727e6011e264
3
- size 4929888009
 
 
 
 
pytorch_model-00003-of-00003.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbfe75db338c1c3e5d3236fd4d370ed50b70ec27582b8418c4b5e72cbdf3036e
3
- size 3003328882
 
 
 
 
pytorch_model.bin.index.json DELETED
@@ -1,446 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 12913164672
4
- },
5
- "weight_map": {
6
- "backbone.blocks.0.filter.D": "pytorch_model-00001-of-00003.bin",
7
- "backbone.blocks.0.filter.poles": "pytorch_model-00001-of-00003.bin",
8
- "backbone.blocks.0.filter.residues": "pytorch_model-00001-of-00003.bin",
9
- "backbone.blocks.0.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
10
- "backbone.blocks.0.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
11
- "backbone.blocks.0.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
12
- "backbone.blocks.0.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
13
- "backbone.blocks.0.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
14
- "backbone.blocks.0.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
15
- "backbone.blocks.0.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
16
- "backbone.blocks.0.post_norm.scale": "pytorch_model-00001-of-00003.bin",
17
- "backbone.blocks.0.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
18
- "backbone.blocks.0.projections.bias": "pytorch_model-00001-of-00003.bin",
19
- "backbone.blocks.0.projections.weight": "pytorch_model-00001-of-00003.bin",
20
- "backbone.blocks.1.filter.D": "pytorch_model-00001-of-00003.bin",
21
- "backbone.blocks.1.filter.poles": "pytorch_model-00001-of-00003.bin",
22
- "backbone.blocks.1.filter.residues": "pytorch_model-00001-of-00003.bin",
23
- "backbone.blocks.1.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
24
- "backbone.blocks.1.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
25
- "backbone.blocks.1.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
26
- "backbone.blocks.1.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
27
- "backbone.blocks.1.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
28
- "backbone.blocks.1.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
29
- "backbone.blocks.1.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
30
- "backbone.blocks.1.post_norm.scale": "pytorch_model-00001-of-00003.bin",
31
- "backbone.blocks.1.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
32
- "backbone.blocks.1.projections.bias": "pytorch_model-00001-of-00003.bin",
33
- "backbone.blocks.1.projections.weight": "pytorch_model-00001-of-00003.bin",
34
- "backbone.blocks.10.filter.D": "pytorch_model-00001-of-00003.bin",
35
- "backbone.blocks.10.filter.poles": "pytorch_model-00001-of-00003.bin",
36
- "backbone.blocks.10.filter.residues": "pytorch_model-00001-of-00003.bin",
37
- "backbone.blocks.10.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
38
- "backbone.blocks.10.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
39
- "backbone.blocks.10.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
40
- "backbone.blocks.10.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
41
- "backbone.blocks.10.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
42
- "backbone.blocks.10.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
43
- "backbone.blocks.10.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
44
- "backbone.blocks.10.post_norm.scale": "pytorch_model-00001-of-00003.bin",
45
- "backbone.blocks.10.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
46
- "backbone.blocks.10.projections.bias": "pytorch_model-00001-of-00003.bin",
47
- "backbone.blocks.10.projections.weight": "pytorch_model-00001-of-00003.bin",
48
- "backbone.blocks.11.filter.D": "pytorch_model-00001-of-00003.bin",
49
- "backbone.blocks.11.filter.poles": "pytorch_model-00001-of-00003.bin",
50
- "backbone.blocks.11.filter.residues": "pytorch_model-00001-of-00003.bin",
51
- "backbone.blocks.11.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
52
- "backbone.blocks.11.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
53
- "backbone.blocks.11.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
54
- "backbone.blocks.11.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
55
- "backbone.blocks.11.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
56
- "backbone.blocks.11.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
57
- "backbone.blocks.11.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
58
- "backbone.blocks.11.post_norm.scale": "pytorch_model-00001-of-00003.bin",
59
- "backbone.blocks.11.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
60
- "backbone.blocks.11.projections.bias": "pytorch_model-00001-of-00003.bin",
61
- "backbone.blocks.11.projections.weight": "pytorch_model-00001-of-00003.bin",
62
- "backbone.blocks.12.filter.D": "pytorch_model-00001-of-00003.bin",
63
- "backbone.blocks.12.filter.poles": "pytorch_model-00001-of-00003.bin",
64
- "backbone.blocks.12.filter.residues": "pytorch_model-00001-of-00003.bin",
65
- "backbone.blocks.12.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
66
- "backbone.blocks.12.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
67
- "backbone.blocks.12.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
68
- "backbone.blocks.12.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
69
- "backbone.blocks.12.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
70
- "backbone.blocks.12.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
71
- "backbone.blocks.12.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
72
- "backbone.blocks.12.post_norm.scale": "pytorch_model-00001-of-00003.bin",
73
- "backbone.blocks.12.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
74
- "backbone.blocks.12.projections.bias": "pytorch_model-00001-of-00003.bin",
75
- "backbone.blocks.12.projections.weight": "pytorch_model-00001-of-00003.bin",
76
- "backbone.blocks.13.filter.D": "pytorch_model-00002-of-00003.bin",
77
- "backbone.blocks.13.filter.poles": "pytorch_model-00002-of-00003.bin",
78
- "backbone.blocks.13.filter.residues": "pytorch_model-00002-of-00003.bin",
79
- "backbone.blocks.13.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
80
- "backbone.blocks.13.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
81
- "backbone.blocks.13.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
82
- "backbone.blocks.13.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
83
- "backbone.blocks.13.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
84
- "backbone.blocks.13.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
85
- "backbone.blocks.13.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
86
- "backbone.blocks.13.post_norm.scale": "pytorch_model-00002-of-00003.bin",
87
- "backbone.blocks.13.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
88
- "backbone.blocks.13.projections.bias": "pytorch_model-00002-of-00003.bin",
89
- "backbone.blocks.13.projections.weight": "pytorch_model-00002-of-00003.bin",
90
- "backbone.blocks.14.filter.D": "pytorch_model-00002-of-00003.bin",
91
- "backbone.blocks.14.filter.poles": "pytorch_model-00002-of-00003.bin",
92
- "backbone.blocks.14.filter.residues": "pytorch_model-00002-of-00003.bin",
93
- "backbone.blocks.14.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
94
- "backbone.blocks.14.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
95
- "backbone.blocks.14.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
96
- "backbone.blocks.14.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
97
- "backbone.blocks.14.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
98
- "backbone.blocks.14.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
99
- "backbone.blocks.14.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
100
- "backbone.blocks.14.post_norm.scale": "pytorch_model-00002-of-00003.bin",
101
- "backbone.blocks.14.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
102
- "backbone.blocks.14.projections.bias": "pytorch_model-00002-of-00003.bin",
103
- "backbone.blocks.14.projections.weight": "pytorch_model-00002-of-00003.bin",
104
- "backbone.blocks.15.filter.D": "pytorch_model-00002-of-00003.bin",
105
- "backbone.blocks.15.filter.poles": "pytorch_model-00002-of-00003.bin",
106
- "backbone.blocks.15.filter.residues": "pytorch_model-00002-of-00003.bin",
107
- "backbone.blocks.15.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
108
- "backbone.blocks.15.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
109
- "backbone.blocks.15.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
110
- "backbone.blocks.15.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
111
- "backbone.blocks.15.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
112
- "backbone.blocks.15.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
113
- "backbone.blocks.15.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
114
- "backbone.blocks.15.post_norm.scale": "pytorch_model-00002-of-00003.bin",
115
- "backbone.blocks.15.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
116
- "backbone.blocks.15.projections.bias": "pytorch_model-00002-of-00003.bin",
117
- "backbone.blocks.15.projections.weight": "pytorch_model-00002-of-00003.bin",
118
- "backbone.blocks.16.inner_mha_cls.Wqkv.bias": "pytorch_model-00002-of-00003.bin",
119
- "backbone.blocks.16.inner_mha_cls.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
120
- "backbone.blocks.16.inner_mha_cls.out_proj.bias": "pytorch_model-00002-of-00003.bin",
121
- "backbone.blocks.16.inner_mha_cls.out_proj.weight": "pytorch_model-00002-of-00003.bin",
122
- "backbone.blocks.16.inner_mha_cls.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
123
- "backbone.blocks.16.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
124
- "backbone.blocks.16.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
125
- "backbone.blocks.16.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
126
- "backbone.blocks.16.post_norm.scale": "pytorch_model-00002-of-00003.bin",
127
- "backbone.blocks.16.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
128
- "backbone.blocks.17.filter.D": "pytorch_model-00002-of-00003.bin",
129
- "backbone.blocks.17.filter.poles": "pytorch_model-00002-of-00003.bin",
130
- "backbone.blocks.17.filter.residues": "pytorch_model-00002-of-00003.bin",
131
- "backbone.blocks.17.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
132
- "backbone.blocks.17.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
133
- "backbone.blocks.17.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
134
- "backbone.blocks.17.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
135
- "backbone.blocks.17.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
136
- "backbone.blocks.17.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
137
- "backbone.blocks.17.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
138
- "backbone.blocks.17.post_norm.scale": "pytorch_model-00002-of-00003.bin",
139
- "backbone.blocks.17.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
140
- "backbone.blocks.17.projections.bias": "pytorch_model-00002-of-00003.bin",
141
- "backbone.blocks.17.projections.weight": "pytorch_model-00002-of-00003.bin",
142
- "backbone.blocks.18.filter.D": "pytorch_model-00002-of-00003.bin",
143
- "backbone.blocks.18.filter.poles": "pytorch_model-00002-of-00003.bin",
144
- "backbone.blocks.18.filter.residues": "pytorch_model-00002-of-00003.bin",
145
- "backbone.blocks.18.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
146
- "backbone.blocks.18.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
147
- "backbone.blocks.18.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
148
- "backbone.blocks.18.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
149
- "backbone.blocks.18.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
150
- "backbone.blocks.18.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
151
- "backbone.blocks.18.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
152
- "backbone.blocks.18.post_norm.scale": "pytorch_model-00002-of-00003.bin",
153
- "backbone.blocks.18.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
154
- "backbone.blocks.18.projections.bias": "pytorch_model-00002-of-00003.bin",
155
- "backbone.blocks.18.projections.weight": "pytorch_model-00002-of-00003.bin",
156
- "backbone.blocks.19.filter.D": "pytorch_model-00002-of-00003.bin",
157
- "backbone.blocks.19.filter.poles": "pytorch_model-00002-of-00003.bin",
158
- "backbone.blocks.19.filter.residues": "pytorch_model-00002-of-00003.bin",
159
- "backbone.blocks.19.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
160
- "backbone.blocks.19.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
161
- "backbone.blocks.19.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
162
- "backbone.blocks.19.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
163
- "backbone.blocks.19.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
164
- "backbone.blocks.19.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
165
- "backbone.blocks.19.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
166
- "backbone.blocks.19.post_norm.scale": "pytorch_model-00002-of-00003.bin",
167
- "backbone.blocks.19.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
168
- "backbone.blocks.19.projections.bias": "pytorch_model-00002-of-00003.bin",
169
- "backbone.blocks.19.projections.weight": "pytorch_model-00002-of-00003.bin",
170
- "backbone.blocks.2.filter.D": "pytorch_model-00001-of-00003.bin",
171
- "backbone.blocks.2.filter.poles": "pytorch_model-00001-of-00003.bin",
172
- "backbone.blocks.2.filter.residues": "pytorch_model-00001-of-00003.bin",
173
- "backbone.blocks.2.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
174
- "backbone.blocks.2.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
175
- "backbone.blocks.2.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
176
- "backbone.blocks.2.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
177
- "backbone.blocks.2.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
178
- "backbone.blocks.2.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
179
- "backbone.blocks.2.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
180
- "backbone.blocks.2.post_norm.scale": "pytorch_model-00001-of-00003.bin",
181
- "backbone.blocks.2.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
182
- "backbone.blocks.2.projections.bias": "pytorch_model-00001-of-00003.bin",
183
- "backbone.blocks.2.projections.weight": "pytorch_model-00001-of-00003.bin",
184
- "backbone.blocks.20.filter.D": "pytorch_model-00002-of-00003.bin",
185
- "backbone.blocks.20.filter.poles": "pytorch_model-00002-of-00003.bin",
186
- "backbone.blocks.20.filter.residues": "pytorch_model-00002-of-00003.bin",
187
- "backbone.blocks.20.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
188
- "backbone.blocks.20.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
189
- "backbone.blocks.20.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
190
- "backbone.blocks.20.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
191
- "backbone.blocks.20.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
192
- "backbone.blocks.20.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
193
- "backbone.blocks.20.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
194
- "backbone.blocks.20.post_norm.scale": "pytorch_model-00002-of-00003.bin",
195
- "backbone.blocks.20.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
196
- "backbone.blocks.20.projections.bias": "pytorch_model-00002-of-00003.bin",
197
- "backbone.blocks.20.projections.weight": "pytorch_model-00002-of-00003.bin",
198
- "backbone.blocks.21.filter.D": "pytorch_model-00002-of-00003.bin",
199
- "backbone.blocks.21.filter.poles": "pytorch_model-00002-of-00003.bin",
200
- "backbone.blocks.21.filter.residues": "pytorch_model-00002-of-00003.bin",
201
- "backbone.blocks.21.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
202
- "backbone.blocks.21.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
203
- "backbone.blocks.21.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
204
- "backbone.blocks.21.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
205
- "backbone.blocks.21.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
206
- "backbone.blocks.21.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
207
- "backbone.blocks.21.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
208
- "backbone.blocks.21.post_norm.scale": "pytorch_model-00002-of-00003.bin",
209
- "backbone.blocks.21.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
210
- "backbone.blocks.21.projections.bias": "pytorch_model-00002-of-00003.bin",
211
- "backbone.blocks.21.projections.weight": "pytorch_model-00002-of-00003.bin",
212
- "backbone.blocks.22.filter.D": "pytorch_model-00002-of-00003.bin",
213
- "backbone.blocks.22.filter.poles": "pytorch_model-00002-of-00003.bin",
214
- "backbone.blocks.22.filter.residues": "pytorch_model-00002-of-00003.bin",
215
- "backbone.blocks.22.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
216
- "backbone.blocks.22.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
217
- "backbone.blocks.22.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
218
- "backbone.blocks.22.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
219
- "backbone.blocks.22.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
220
- "backbone.blocks.22.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
221
- "backbone.blocks.22.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
222
- "backbone.blocks.22.post_norm.scale": "pytorch_model-00002-of-00003.bin",
223
- "backbone.blocks.22.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
224
- "backbone.blocks.22.projections.bias": "pytorch_model-00002-of-00003.bin",
225
- "backbone.blocks.22.projections.weight": "pytorch_model-00002-of-00003.bin",
226
- "backbone.blocks.23.filter.D": "pytorch_model-00002-of-00003.bin",
227
- "backbone.blocks.23.filter.poles": "pytorch_model-00002-of-00003.bin",
228
- "backbone.blocks.23.filter.residues": "pytorch_model-00002-of-00003.bin",
229
- "backbone.blocks.23.filter.short_filter_bias": "pytorch_model-00002-of-00003.bin",
230
- "backbone.blocks.23.filter.short_filter_weight": "pytorch_model-00002-of-00003.bin",
231
- "backbone.blocks.23.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
232
- "backbone.blocks.23.mlp.l2.weight": "pytorch_model-00002-of-00003.bin",
233
- "backbone.blocks.23.mlp.l3.weight": "pytorch_model-00002-of-00003.bin",
234
- "backbone.blocks.23.out_filter_dense.bias": "pytorch_model-00002-of-00003.bin",
235
- "backbone.blocks.23.out_filter_dense.weight": "pytorch_model-00002-of-00003.bin",
236
- "backbone.blocks.23.post_norm.scale": "pytorch_model-00002-of-00003.bin",
237
- "backbone.blocks.23.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
238
- "backbone.blocks.23.projections.bias": "pytorch_model-00002-of-00003.bin",
239
- "backbone.blocks.23.projections.weight": "pytorch_model-00002-of-00003.bin",
240
- "backbone.blocks.24.inner_mha_cls.Wqkv.bias": "pytorch_model-00002-of-00003.bin",
241
- "backbone.blocks.24.inner_mha_cls.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
242
- "backbone.blocks.24.inner_mha_cls.out_proj.bias": "pytorch_model-00002-of-00003.bin",
243
- "backbone.blocks.24.inner_mha_cls.out_proj.weight": "pytorch_model-00002-of-00003.bin",
244
- "backbone.blocks.24.inner_mha_cls.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
245
- "backbone.blocks.24.mlp.l1.weight": "pytorch_model-00002-of-00003.bin",
246
- "backbone.blocks.24.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
247
- "backbone.blocks.24.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
248
- "backbone.blocks.24.post_norm.scale": "pytorch_model-00002-of-00003.bin",
249
- "backbone.blocks.24.pre_norm.scale": "pytorch_model-00002-of-00003.bin",
250
- "backbone.blocks.25.filter.D": "pytorch_model-00003-of-00003.bin",
251
- "backbone.blocks.25.filter.poles": "pytorch_model-00003-of-00003.bin",
252
- "backbone.blocks.25.filter.residues": "pytorch_model-00003-of-00003.bin",
253
- "backbone.blocks.25.filter.short_filter_bias": "pytorch_model-00003-of-00003.bin",
254
- "backbone.blocks.25.filter.short_filter_weight": "pytorch_model-00003-of-00003.bin",
255
- "backbone.blocks.25.mlp.l1.weight": "pytorch_model-00003-of-00003.bin",
256
- "backbone.blocks.25.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
257
- "backbone.blocks.25.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
258
- "backbone.blocks.25.out_filter_dense.bias": "pytorch_model-00003-of-00003.bin",
259
- "backbone.blocks.25.out_filter_dense.weight": "pytorch_model-00003-of-00003.bin",
260
- "backbone.blocks.25.post_norm.scale": "pytorch_model-00003-of-00003.bin",
261
- "backbone.blocks.25.pre_norm.scale": "pytorch_model-00003-of-00003.bin",
262
- "backbone.blocks.25.projections.bias": "pytorch_model-00003-of-00003.bin",
263
- "backbone.blocks.25.projections.weight": "pytorch_model-00003-of-00003.bin",
264
- "backbone.blocks.26.filter.D": "pytorch_model-00003-of-00003.bin",
265
- "backbone.blocks.26.filter.poles": "pytorch_model-00003-of-00003.bin",
266
- "backbone.blocks.26.filter.residues": "pytorch_model-00003-of-00003.bin",
267
- "backbone.blocks.26.filter.short_filter_bias": "pytorch_model-00003-of-00003.bin",
268
- "backbone.blocks.26.filter.short_filter_weight": "pytorch_model-00003-of-00003.bin",
269
- "backbone.blocks.26.mlp.l1.weight": "pytorch_model-00003-of-00003.bin",
270
- "backbone.blocks.26.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
271
- "backbone.blocks.26.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
272
- "backbone.blocks.26.out_filter_dense.bias": "pytorch_model-00003-of-00003.bin",
273
- "backbone.blocks.26.out_filter_dense.weight": "pytorch_model-00003-of-00003.bin",
274
- "backbone.blocks.26.post_norm.scale": "pytorch_model-00003-of-00003.bin",
275
- "backbone.blocks.26.pre_norm.scale": "pytorch_model-00003-of-00003.bin",
276
- "backbone.blocks.26.projections.bias": "pytorch_model-00003-of-00003.bin",
277
- "backbone.blocks.26.projections.weight": "pytorch_model-00003-of-00003.bin",
278
- "backbone.blocks.27.filter.D": "pytorch_model-00003-of-00003.bin",
279
- "backbone.blocks.27.filter.poles": "pytorch_model-00003-of-00003.bin",
280
- "backbone.blocks.27.filter.residues": "pytorch_model-00003-of-00003.bin",
281
- "backbone.blocks.27.filter.short_filter_bias": "pytorch_model-00003-of-00003.bin",
282
- "backbone.blocks.27.filter.short_filter_weight": "pytorch_model-00003-of-00003.bin",
283
- "backbone.blocks.27.mlp.l1.weight": "pytorch_model-00003-of-00003.bin",
284
- "backbone.blocks.27.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
285
- "backbone.blocks.27.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
286
- "backbone.blocks.27.out_filter_dense.bias": "pytorch_model-00003-of-00003.bin",
287
- "backbone.blocks.27.out_filter_dense.weight": "pytorch_model-00003-of-00003.bin",
288
- "backbone.blocks.27.post_norm.scale": "pytorch_model-00003-of-00003.bin",
289
- "backbone.blocks.27.pre_norm.scale": "pytorch_model-00003-of-00003.bin",
290
- "backbone.blocks.27.projections.bias": "pytorch_model-00003-of-00003.bin",
291
- "backbone.blocks.27.projections.weight": "pytorch_model-00003-of-00003.bin",
292
- "backbone.blocks.28.filter.D": "pytorch_model-00003-of-00003.bin",
293
- "backbone.blocks.28.filter.poles": "pytorch_model-00003-of-00003.bin",
294
- "backbone.blocks.28.filter.residues": "pytorch_model-00003-of-00003.bin",
295
- "backbone.blocks.28.filter.short_filter_bias": "pytorch_model-00003-of-00003.bin",
296
- "backbone.blocks.28.filter.short_filter_weight": "pytorch_model-00003-of-00003.bin",
297
- "backbone.blocks.28.mlp.l1.weight": "pytorch_model-00003-of-00003.bin",
298
- "backbone.blocks.28.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
299
- "backbone.blocks.28.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
300
- "backbone.blocks.28.out_filter_dense.bias": "pytorch_model-00003-of-00003.bin",
301
- "backbone.blocks.28.out_filter_dense.weight": "pytorch_model-00003-of-00003.bin",
302
- "backbone.blocks.28.post_norm.scale": "pytorch_model-00003-of-00003.bin",
303
- "backbone.blocks.28.pre_norm.scale": "pytorch_model-00003-of-00003.bin",
304
- "backbone.blocks.28.projections.bias": "pytorch_model-00003-of-00003.bin",
305
- "backbone.blocks.28.projections.weight": "pytorch_model-00003-of-00003.bin",
306
- "backbone.blocks.29.filter.D": "pytorch_model-00003-of-00003.bin",
307
- "backbone.blocks.29.filter.poles": "pytorch_model-00003-of-00003.bin",
308
- "backbone.blocks.29.filter.residues": "pytorch_model-00003-of-00003.bin",
309
- "backbone.blocks.29.filter.short_filter_bias": "pytorch_model-00003-of-00003.bin",
310
- "backbone.blocks.29.filter.short_filter_weight": "pytorch_model-00003-of-00003.bin",
311
- "backbone.blocks.29.mlp.l1.weight": "pytorch_model-00003-of-00003.bin",
312
- "backbone.blocks.29.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
313
- "backbone.blocks.29.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
314
- "backbone.blocks.29.out_filter_dense.bias": "pytorch_model-00003-of-00003.bin",
315
- "backbone.blocks.29.out_filter_dense.weight": "pytorch_model-00003-of-00003.bin",
316
- "backbone.blocks.29.post_norm.scale": "pytorch_model-00003-of-00003.bin",
317
- "backbone.blocks.29.pre_norm.scale": "pytorch_model-00003-of-00003.bin",
318
- "backbone.blocks.29.projections.bias": "pytorch_model-00003-of-00003.bin",
319
- "backbone.blocks.29.projections.weight": "pytorch_model-00003-of-00003.bin",
320
- "backbone.blocks.3.filter.D": "pytorch_model-00001-of-00003.bin",
321
- "backbone.blocks.3.filter.poles": "pytorch_model-00001-of-00003.bin",
322
- "backbone.blocks.3.filter.residues": "pytorch_model-00001-of-00003.bin",
323
- "backbone.blocks.3.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
324
- "backbone.blocks.3.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
325
- "backbone.blocks.3.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
326
- "backbone.blocks.3.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
327
- "backbone.blocks.3.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
328
- "backbone.blocks.3.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
329
- "backbone.blocks.3.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
330
- "backbone.blocks.3.post_norm.scale": "pytorch_model-00001-of-00003.bin",
331
- "backbone.blocks.3.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
332
- "backbone.blocks.3.projections.bias": "pytorch_model-00001-of-00003.bin",
333
- "backbone.blocks.3.projections.weight": "pytorch_model-00001-of-00003.bin",
334
- "backbone.blocks.30.filter.D": "pytorch_model-00003-of-00003.bin",
335
- "backbone.blocks.30.filter.poles": "pytorch_model-00003-of-00003.bin",
336
- "backbone.blocks.30.filter.residues": "pytorch_model-00003-of-00003.bin",
337
- "backbone.blocks.30.filter.short_filter_bias": "pytorch_model-00003-of-00003.bin",
338
- "backbone.blocks.30.filter.short_filter_weight": "pytorch_model-00003-of-00003.bin",
339
- "backbone.blocks.30.mlp.l1.weight": "pytorch_model-00003-of-00003.bin",
340
- "backbone.blocks.30.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
341
- "backbone.blocks.30.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
342
- "backbone.blocks.30.out_filter_dense.bias": "pytorch_model-00003-of-00003.bin",
343
- "backbone.blocks.30.out_filter_dense.weight": "pytorch_model-00003-of-00003.bin",
344
- "backbone.blocks.30.post_norm.scale": "pytorch_model-00003-of-00003.bin",
345
- "backbone.blocks.30.pre_norm.scale": "pytorch_model-00003-of-00003.bin",
346
- "backbone.blocks.30.projections.bias": "pytorch_model-00003-of-00003.bin",
347
- "backbone.blocks.30.projections.weight": "pytorch_model-00003-of-00003.bin",
348
- "backbone.blocks.31.filter.D": "pytorch_model-00003-of-00003.bin",
349
- "backbone.blocks.31.filter.poles": "pytorch_model-00003-of-00003.bin",
350
- "backbone.blocks.31.filter.residues": "pytorch_model-00003-of-00003.bin",
351
- "backbone.blocks.31.filter.short_filter_bias": "pytorch_model-00003-of-00003.bin",
352
- "backbone.blocks.31.filter.short_filter_weight": "pytorch_model-00003-of-00003.bin",
353
- "backbone.blocks.31.mlp.l1.weight": "pytorch_model-00003-of-00003.bin",
354
- "backbone.blocks.31.mlp.l2.weight": "pytorch_model-00003-of-00003.bin",
355
- "backbone.blocks.31.mlp.l3.weight": "pytorch_model-00003-of-00003.bin",
356
- "backbone.blocks.31.out_filter_dense.bias": "pytorch_model-00003-of-00003.bin",
357
- "backbone.blocks.31.out_filter_dense.weight": "pytorch_model-00003-of-00003.bin",
358
- "backbone.blocks.31.post_norm.scale": "pytorch_model-00003-of-00003.bin",
359
- "backbone.blocks.31.pre_norm.scale": "pytorch_model-00003-of-00003.bin",
360
- "backbone.blocks.31.projections.bias": "pytorch_model-00003-of-00003.bin",
361
- "backbone.blocks.31.projections.weight": "pytorch_model-00003-of-00003.bin",
362
- "backbone.blocks.4.filter.D": "pytorch_model-00001-of-00003.bin",
363
- "backbone.blocks.4.filter.poles": "pytorch_model-00001-of-00003.bin",
364
- "backbone.blocks.4.filter.residues": "pytorch_model-00001-of-00003.bin",
365
- "backbone.blocks.4.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
366
- "backbone.blocks.4.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
367
- "backbone.blocks.4.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
368
- "backbone.blocks.4.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
369
- "backbone.blocks.4.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
370
- "backbone.blocks.4.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
371
- "backbone.blocks.4.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
372
- "backbone.blocks.4.post_norm.scale": "pytorch_model-00001-of-00003.bin",
373
- "backbone.blocks.4.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
374
- "backbone.blocks.4.projections.bias": "pytorch_model-00001-of-00003.bin",
375
- "backbone.blocks.4.projections.weight": "pytorch_model-00001-of-00003.bin",
376
- "backbone.blocks.5.filter.D": "pytorch_model-00001-of-00003.bin",
377
- "backbone.blocks.5.filter.poles": "pytorch_model-00001-of-00003.bin",
378
- "backbone.blocks.5.filter.residues": "pytorch_model-00001-of-00003.bin",
379
- "backbone.blocks.5.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
380
- "backbone.blocks.5.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
381
- "backbone.blocks.5.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
382
- "backbone.blocks.5.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
383
- "backbone.blocks.5.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
384
- "backbone.blocks.5.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
385
- "backbone.blocks.5.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
386
- "backbone.blocks.5.post_norm.scale": "pytorch_model-00001-of-00003.bin",
387
- "backbone.blocks.5.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
388
- "backbone.blocks.5.projections.bias": "pytorch_model-00001-of-00003.bin",
389
- "backbone.blocks.5.projections.weight": "pytorch_model-00001-of-00003.bin",
390
- "backbone.blocks.6.filter.D": "pytorch_model-00001-of-00003.bin",
391
- "backbone.blocks.6.filter.poles": "pytorch_model-00001-of-00003.bin",
392
- "backbone.blocks.6.filter.residues": "pytorch_model-00001-of-00003.bin",
393
- "backbone.blocks.6.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
394
- "backbone.blocks.6.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
395
- "backbone.blocks.6.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
396
- "backbone.blocks.6.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
397
- "backbone.blocks.6.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
398
- "backbone.blocks.6.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
399
- "backbone.blocks.6.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
400
- "backbone.blocks.6.post_norm.scale": "pytorch_model-00001-of-00003.bin",
401
- "backbone.blocks.6.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
402
- "backbone.blocks.6.projections.bias": "pytorch_model-00001-of-00003.bin",
403
- "backbone.blocks.6.projections.weight": "pytorch_model-00001-of-00003.bin",
404
- "backbone.blocks.7.filter.D": "pytorch_model-00001-of-00003.bin",
405
- "backbone.blocks.7.filter.poles": "pytorch_model-00001-of-00003.bin",
406
- "backbone.blocks.7.filter.residues": "pytorch_model-00001-of-00003.bin",
407
- "backbone.blocks.7.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
408
- "backbone.blocks.7.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
409
- "backbone.blocks.7.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
410
- "backbone.blocks.7.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
411
- "backbone.blocks.7.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
412
- "backbone.blocks.7.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
413
- "backbone.blocks.7.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
414
- "backbone.blocks.7.post_norm.scale": "pytorch_model-00001-of-00003.bin",
415
- "backbone.blocks.7.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
416
- "backbone.blocks.7.projections.bias": "pytorch_model-00001-of-00003.bin",
417
- "backbone.blocks.7.projections.weight": "pytorch_model-00001-of-00003.bin",
418
- "backbone.blocks.8.inner_mha_cls.Wqkv.bias": "pytorch_model-00001-of-00003.bin",
419
- "backbone.blocks.8.inner_mha_cls.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
420
- "backbone.blocks.8.inner_mha_cls.out_proj.bias": "pytorch_model-00001-of-00003.bin",
421
- "backbone.blocks.8.inner_mha_cls.out_proj.weight": "pytorch_model-00001-of-00003.bin",
422
- "backbone.blocks.8.inner_mha_cls.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
423
- "backbone.blocks.8.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
424
- "backbone.blocks.8.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
425
- "backbone.blocks.8.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
426
- "backbone.blocks.8.post_norm.scale": "pytorch_model-00001-of-00003.bin",
427
- "backbone.blocks.8.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
428
- "backbone.blocks.9.filter.D": "pytorch_model-00001-of-00003.bin",
429
- "backbone.blocks.9.filter.poles": "pytorch_model-00001-of-00003.bin",
430
- "backbone.blocks.9.filter.residues": "pytorch_model-00001-of-00003.bin",
431
- "backbone.blocks.9.filter.short_filter_bias": "pytorch_model-00001-of-00003.bin",
432
- "backbone.blocks.9.filter.short_filter_weight": "pytorch_model-00001-of-00003.bin",
433
- "backbone.blocks.9.mlp.l1.weight": "pytorch_model-00001-of-00003.bin",
434
- "backbone.blocks.9.mlp.l2.weight": "pytorch_model-00001-of-00003.bin",
435
- "backbone.blocks.9.mlp.l3.weight": "pytorch_model-00001-of-00003.bin",
436
- "backbone.blocks.9.out_filter_dense.bias": "pytorch_model-00001-of-00003.bin",
437
- "backbone.blocks.9.out_filter_dense.weight": "pytorch_model-00001-of-00003.bin",
438
- "backbone.blocks.9.post_norm.scale": "pytorch_model-00001-of-00003.bin",
439
- "backbone.blocks.9.pre_norm.scale": "pytorch_model-00001-of-00003.bin",
440
- "backbone.blocks.9.projections.bias": "pytorch_model-00001-of-00003.bin",
441
- "backbone.blocks.9.projections.weight": "pytorch_model-00001-of-00003.bin",
442
- "backbone.embedding_layer.weight": "pytorch_model-00001-of-00003.bin",
443
- "backbone.norm.scale": "pytorch_model-00001-of-00003.bin",
444
- "backbone.unembed.weight": "pytorch_model-00001-of-00003.bin"
445
- }
446
- }