voidful commited on
Commit
c98ab35
1 Parent(s): 1d700f6

Upload PhiForCausalLM

Browse files
config.json CHANGED
@@ -33,5 +33,5 @@
33
  "torch_dtype": "float32",
34
  "transformers_version": "4.39.1",
35
  "use_cache": true,
36
- "vocab_size": 169000
37
  }
 
33
  "torch_dtype": "float32",
34
  "transformers_version": "4.39.1",
35
  "use_cache": true,
36
+ "vocab_size": 80980
37
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb71e37302519ddd910cfc9cf2c9299c811582179a060474b1c943dbc650df56
3
- size 4942842592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4e5a557a334966f1dcb31fd3ed7f07668c2c4a2b47087492f8d1ca6bb23485
3
+ size 4960313864
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54aeec3d5146a62a9b21542e2d5b300531e0b11d94b0679ad40f4ee309861c13
3
- size 2660784152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef82279c47ded7134bef495a0b6bb4295b60886c85d619a8f968119e37e4fc0f
3
+ size 1200841024
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 7603589280
4
  },
5
  "weight_map": {
6
  "lm_head.bias": "model-00002-of-00002.safetensors",
@@ -134,12 +134,12 @@
134
  "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
135
  "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
136
  "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
137
- "model.layers.17.input_layernorm.bias": "model-00002-of-00002.safetensors",
138
- "model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
139
  "model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
140
  "model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
141
- "model.layers.17.mlp.fc2.bias": "model-00002-of-00002.safetensors",
142
- "model.layers.17.mlp.fc2.weight": "model-00002-of-00002.safetensors",
143
  "model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
144
  "model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
145
  "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
@@ -148,34 +148,34 @@
148
  "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
149
  "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
150
  "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
- "model.layers.18.input_layernorm.bias": "model-00002-of-00002.safetensors",
152
- "model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
153
- "model.layers.18.mlp.fc1.bias": "model-00002-of-00002.safetensors",
154
- "model.layers.18.mlp.fc1.weight": "model-00002-of-00002.safetensors",
155
- "model.layers.18.mlp.fc2.bias": "model-00002-of-00002.safetensors",
156
- "model.layers.18.mlp.fc2.weight": "model-00002-of-00002.safetensors",
157
- "model.layers.18.self_attn.dense.bias": "model-00002-of-00002.safetensors",
158
- "model.layers.18.self_attn.dense.weight": "model-00002-of-00002.safetensors",
159
- "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
160
- "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
161
- "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
162
- "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
163
- "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
164
- "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
165
- "model.layers.19.input_layernorm.bias": "model-00002-of-00002.safetensors",
166
- "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
167
- "model.layers.19.mlp.fc1.bias": "model-00002-of-00002.safetensors",
168
- "model.layers.19.mlp.fc1.weight": "model-00002-of-00002.safetensors",
169
- "model.layers.19.mlp.fc2.bias": "model-00002-of-00002.safetensors",
170
- "model.layers.19.mlp.fc2.weight": "model-00002-of-00002.safetensors",
171
- "model.layers.19.self_attn.dense.bias": "model-00002-of-00002.safetensors",
172
- "model.layers.19.self_attn.dense.weight": "model-00002-of-00002.safetensors",
173
- "model.layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
174
- "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
175
- "model.layers.19.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
176
- "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
177
- "model.layers.19.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
178
- "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
179
  "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
180
  "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
181
  "model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
@@ -190,34 +190,34 @@
190
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
191
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
192
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
193
- "model.layers.20.input_layernorm.bias": "model-00002-of-00002.safetensors",
194
- "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
195
- "model.layers.20.mlp.fc1.bias": "model-00002-of-00002.safetensors",
196
- "model.layers.20.mlp.fc1.weight": "model-00002-of-00002.safetensors",
197
- "model.layers.20.mlp.fc2.bias": "model-00002-of-00002.safetensors",
198
- "model.layers.20.mlp.fc2.weight": "model-00002-of-00002.safetensors",
199
- "model.layers.20.self_attn.dense.bias": "model-00002-of-00002.safetensors",
200
- "model.layers.20.self_attn.dense.weight": "model-00002-of-00002.safetensors",
201
- "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
202
- "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
203
- "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
204
- "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
- "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
206
- "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
207
  "model.layers.21.input_layernorm.bias": "model-00002-of-00002.safetensors",
208
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
209
  "model.layers.21.mlp.fc1.bias": "model-00002-of-00002.safetensors",
210
  "model.layers.21.mlp.fc1.weight": "model-00002-of-00002.safetensors",
211
  "model.layers.21.mlp.fc2.bias": "model-00002-of-00002.safetensors",
212
  "model.layers.21.mlp.fc2.weight": "model-00002-of-00002.safetensors",
213
- "model.layers.21.self_attn.dense.bias": "model-00002-of-00002.safetensors",
214
- "model.layers.21.self_attn.dense.weight": "model-00002-of-00002.safetensors",
215
- "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
216
- "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
217
- "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
218
- "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
219
- "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
220
- "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
221
  "model.layers.22.input_layernorm.bias": "model-00002-of-00002.safetensors",
222
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
223
  "model.layers.22.mlp.fc1.bias": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 6161117520
4
  },
5
  "weight_map": {
6
  "lm_head.bias": "model-00002-of-00002.safetensors",
 
134
  "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
135
  "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
136
  "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
137
+ "model.layers.17.input_layernorm.bias": "model-00001-of-00002.safetensors",
138
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
139
  "model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
140
  "model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
141
+ "model.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
142
+ "model.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
143
  "model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
144
  "model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
145
  "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
 
148
  "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
149
  "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
150
  "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.layers.18.input_layernorm.bias": "model-00001-of-00002.safetensors",
152
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
153
+ "model.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
154
+ "model.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
155
+ "model.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
156
+ "model.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
157
+ "model.layers.18.self_attn.dense.bias": "model-00001-of-00002.safetensors",
158
+ "model.layers.18.self_attn.dense.weight": "model-00001-of-00002.safetensors",
159
+ "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
160
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
161
+ "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
162
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
163
+ "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
164
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
165
+ "model.layers.19.input_layernorm.bias": "model-00001-of-00002.safetensors",
166
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
167
+ "model.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
168
+ "model.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
169
+ "model.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
170
+ "model.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
171
+ "model.layers.19.self_attn.dense.bias": "model-00001-of-00002.safetensors",
172
+ "model.layers.19.self_attn.dense.weight": "model-00001-of-00002.safetensors",
173
+ "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
174
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
175
+ "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
176
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
177
+ "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
178
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
179
  "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
180
  "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
181
  "model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
 
190
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
191
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
192
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
193
+ "model.layers.20.input_layernorm.bias": "model-00001-of-00002.safetensors",
194
+ "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
195
+ "model.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
196
+ "model.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
197
+ "model.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
198
+ "model.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
199
+ "model.layers.20.self_attn.dense.bias": "model-00001-of-00002.safetensors",
200
+ "model.layers.20.self_attn.dense.weight": "model-00001-of-00002.safetensors",
201
+ "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
202
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
203
+ "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
204
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
205
+ "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
206
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
207
  "model.layers.21.input_layernorm.bias": "model-00002-of-00002.safetensors",
208
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
209
  "model.layers.21.mlp.fc1.bias": "model-00002-of-00002.safetensors",
210
  "model.layers.21.mlp.fc1.weight": "model-00002-of-00002.safetensors",
211
  "model.layers.21.mlp.fc2.bias": "model-00002-of-00002.safetensors",
212
  "model.layers.21.mlp.fc2.weight": "model-00002-of-00002.safetensors",
213
+ "model.layers.21.self_attn.dense.bias": "model-00001-of-00002.safetensors",
214
+ "model.layers.21.self_attn.dense.weight": "model-00001-of-00002.safetensors",
215
+ "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
216
+ "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
217
+ "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
218
+ "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
219
+ "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
220
+ "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
221
  "model.layers.22.input_layernorm.bias": "model-00002-of-00002.safetensors",
222
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
223
  "model.layers.22.mlp.fc1.bias": "model-00002-of-00002.safetensors",