perlthoughts commited on
Commit
fd6c5ab
1 Parent(s): 9cf77c4

Upload MistralForCausalLM

Browse files
config.json CHANGED
@@ -12,7 +12,7 @@
12
  "max_position_embeddings": 32768,
13
  "model_type": "mistral",
14
  "num_attention_heads": 32,
15
- "num_hidden_layers": 31,
16
  "num_key_value_heads": 8,
17
  "rms_norm_eps": 1e-05,
18
  "rope_theta": 100000,
 
12
  "max_position_embeddings": 32768,
13
  "model_type": "mistral",
14
  "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
  "num_key_value_heads": 8,
17
  "rms_norm_eps": 1e-05,
18
  "rope_theta": 100000,
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06f2c06100d22009d139c764c10f73d67762fc198df9e8a6eedb76eb9397dc12
3
  size 4943178624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645d3a085a80e1d69795fb247a166f72002ac60b8727068a679239490a4fe050
3
  size 4943178624
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c7707ddd6bf13370930f6fbebcb8a1b93d923171e549bbbe1b7360f3de94e58
3
  size 4999819232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613844c6264bcafb6d79ba297b99c7eed91267c86c374b36ef802dc2d2e46b26
3
  size 4999819232
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ccb9ea1f858e6e3bef22f3d80529a9be46c70b23d1ee7925f2dd9b70257c6fe
3
- size 4104307600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e12da8c2069a6bb3deb3ed98e0de071a880babc3267e7ca8b14c7f4aafae4a23
3
+ size 4540532640
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14047272960
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -230,6 +230,15 @@
230
  "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
233
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
234
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
235
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14483496960
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
230
  "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",