voidful commited on
Commit
65b6c56
1 Parent(s): 0f212e5

Upload PhiForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "microsoft/phi-1_5",
3
  "architectures": [
4
  "PhiForCausalLM"
5
  ],
@@ -33,5 +33,5 @@
33
  "torch_dtype": "float32",
34
  "transformers_version": "4.39.1",
35
  "use_cache": true,
36
- "vocab_size": 70980
37
  }
 
1
  {
2
+ "_name_or_path": "voidful/phi-1_5_base",
3
  "architectures": [
4
  "PhiForCausalLM"
5
  ],
 
33
  "torch_dtype": "float32",
34
  "transformers_version": "4.39.1",
35
  "use_cache": true,
36
+ "vocab_size": 169000
37
  }
generation_config.json CHANGED
@@ -1,5 +1,9 @@
1
  {
2
  "_from_model_config": true,
3
- "transformers_version": "4.39.1",
4
- "eos_token_id": [70976, 50256, 70977]
 
 
 
 
5
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "eos_token_id": [
4
+ 70976,
5
+ 50256,
6
+ 70977
7
+ ],
8
+ "transformers_version": "4.39.1"
9
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd2bb7a9913c03e0f6ed2fc4cc1fc1f619aafc3d6ed14ee2caaca695a6894ead
3
- size 4945535680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb71e37302519ddd910cfc9cf2c9299c811582179a060474b1c943dbc650df56
3
+ size 4942842592
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88962970cf7be23aceefae5ad9e4ecd214785bfe07f955c0db5b4c56d3a8123b
3
- size 1051739176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54aeec3d5146a62a9b21542e2d5b300531e0b11d94b0679ad40f4ee309861c13
3
+ size 2660784152
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 5997237520
4
  },
5
  "weight_map": {
6
  "lm_head.bias": "model-00002-of-00002.safetensors",
@@ -134,12 +134,12 @@
134
  "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
135
  "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
136
  "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
137
- "model.layers.17.input_layernorm.bias": "model-00001-of-00002.safetensors",
138
- "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
139
  "model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
140
  "model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
141
- "model.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
142
- "model.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
143
  "model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
144
  "model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
145
  "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
@@ -148,34 +148,34 @@
148
  "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
149
  "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
150
  "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
- "model.layers.18.input_layernorm.bias": "model-00001-of-00002.safetensors",
152
- "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
153
- "model.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
154
- "model.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
155
- "model.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
156
- "model.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
157
- "model.layers.18.self_attn.dense.bias": "model-00001-of-00002.safetensors",
158
- "model.layers.18.self_attn.dense.weight": "model-00001-of-00002.safetensors",
159
- "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
160
- "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
161
- "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
162
- "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
163
- "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
164
- "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
165
- "model.layers.19.input_layernorm.bias": "model-00001-of-00002.safetensors",
166
- "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
167
- "model.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
168
- "model.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
169
- "model.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
170
- "model.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
171
- "model.layers.19.self_attn.dense.bias": "model-00001-of-00002.safetensors",
172
- "model.layers.19.self_attn.dense.weight": "model-00001-of-00002.safetensors",
173
- "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
174
- "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
175
- "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
176
- "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
177
- "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
178
- "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
179
  "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
180
  "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
181
  "model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
@@ -190,34 +190,34 @@
190
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
191
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
192
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
193
- "model.layers.20.input_layernorm.bias": "model-00001-of-00002.safetensors",
194
- "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
195
- "model.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
196
- "model.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
197
- "model.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
198
- "model.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
199
- "model.layers.20.self_attn.dense.bias": "model-00001-of-00002.safetensors",
200
- "model.layers.20.self_attn.dense.weight": "model-00001-of-00002.safetensors",
201
- "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
202
- "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
203
- "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
204
- "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
205
- "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
206
- "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
207
  "model.layers.21.input_layernorm.bias": "model-00002-of-00002.safetensors",
208
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
209
- "model.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
210
- "model.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
211
  "model.layers.21.mlp.fc2.bias": "model-00002-of-00002.safetensors",
212
  "model.layers.21.mlp.fc2.weight": "model-00002-of-00002.safetensors",
213
- "model.layers.21.self_attn.dense.bias": "model-00001-of-00002.safetensors",
214
- "model.layers.21.self_attn.dense.weight": "model-00001-of-00002.safetensors",
215
- "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
216
- "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
217
- "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
218
- "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
219
- "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
220
- "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
221
  "model.layers.22.input_layernorm.bias": "model-00002-of-00002.safetensors",
222
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
223
  "model.layers.22.mlp.fc1.bias": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 7603589280
4
  },
5
  "weight_map": {
6
  "lm_head.bias": "model-00002-of-00002.safetensors",
 
134
  "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
135
  "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
136
  "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
137
+ "model.layers.17.input_layernorm.bias": "model-00002-of-00002.safetensors",
138
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
139
  "model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
140
  "model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
141
+ "model.layers.17.mlp.fc2.bias": "model-00002-of-00002.safetensors",
142
+ "model.layers.17.mlp.fc2.weight": "model-00002-of-00002.safetensors",
143
  "model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
144
  "model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
145
  "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
 
148
  "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
149
  "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
150
  "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.layers.18.input_layernorm.bias": "model-00002-of-00002.safetensors",
152
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
153
+ "model.layers.18.mlp.fc1.bias": "model-00002-of-00002.safetensors",
154
+ "model.layers.18.mlp.fc1.weight": "model-00002-of-00002.safetensors",
155
+ "model.layers.18.mlp.fc2.bias": "model-00002-of-00002.safetensors",
156
+ "model.layers.18.mlp.fc2.weight": "model-00002-of-00002.safetensors",
157
+ "model.layers.18.self_attn.dense.bias": "model-00002-of-00002.safetensors",
158
+ "model.layers.18.self_attn.dense.weight": "model-00002-of-00002.safetensors",
159
+ "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
160
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
162
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
163
+ "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
164
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.layers.19.input_layernorm.bias": "model-00002-of-00002.safetensors",
166
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
167
+ "model.layers.19.mlp.fc1.bias": "model-00002-of-00002.safetensors",
168
+ "model.layers.19.mlp.fc1.weight": "model-00002-of-00002.safetensors",
169
+ "model.layers.19.mlp.fc2.bias": "model-00002-of-00002.safetensors",
170
+ "model.layers.19.mlp.fc2.weight": "model-00002-of-00002.safetensors",
171
+ "model.layers.19.self_attn.dense.bias": "model-00002-of-00002.safetensors",
172
+ "model.layers.19.self_attn.dense.weight": "model-00002-of-00002.safetensors",
173
+ "model.layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
174
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.layers.19.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
176
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.layers.19.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
178
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
179
  "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
180
  "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
181
  "model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
 
190
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
191
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
192
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
193
+ "model.layers.20.input_layernorm.bias": "model-00002-of-00002.safetensors",
194
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
195
+ "model.layers.20.mlp.fc1.bias": "model-00002-of-00002.safetensors",
196
+ "model.layers.20.mlp.fc1.weight": "model-00002-of-00002.safetensors",
197
+ "model.layers.20.mlp.fc2.bias": "model-00002-of-00002.safetensors",
198
+ "model.layers.20.mlp.fc2.weight": "model-00002-of-00002.safetensors",
199
+ "model.layers.20.self_attn.dense.bias": "model-00002-of-00002.safetensors",
200
+ "model.layers.20.self_attn.dense.weight": "model-00002-of-00002.safetensors",
201
+ "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
202
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
204
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
206
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
207
  "model.layers.21.input_layernorm.bias": "model-00002-of-00002.safetensors",
208
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
209
+ "model.layers.21.mlp.fc1.bias": "model-00002-of-00002.safetensors",
210
+ "model.layers.21.mlp.fc1.weight": "model-00002-of-00002.safetensors",
211
  "model.layers.21.mlp.fc2.bias": "model-00002-of-00002.safetensors",
212
  "model.layers.21.mlp.fc2.weight": "model-00002-of-00002.safetensors",
213
+ "model.layers.21.self_attn.dense.bias": "model-00002-of-00002.safetensors",
214
+ "model.layers.21.self_attn.dense.weight": "model-00002-of-00002.safetensors",
215
+ "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
216
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
217
+ "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
218
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
220
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
221
  "model.layers.22.input_layernorm.bias": "model-00002-of-00002.safetensors",
222
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
223
  "model.layers.22.mlp.fc1.bias": "model-00002-of-00002.safetensors",