rwitz commited on
Commit
952810d
1 Parent(s): 60c13cf

Upload MistralForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "merged",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
@@ -12,7 +12,7 @@
12
  "max_position_embeddings": 32768,
13
  "model_type": "mistral",
14
  "num_attention_heads": 32,
15
- "num_hidden_layers": 31,
16
  "num_key_value_heads": 8,
17
  "rms_norm_eps": 1e-05,
18
  "rope_theta": 10000.0,
 
1
  {
2
+ "_name_or_path": "bruins-rp",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
12
  "max_position_embeddings": 32768,
13
  "model_type": "mistral",
14
  "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
  "num_key_value_heads": 8,
17
  "rms_norm_eps": 1e-05,
18
  "rope_theta": 10000.0,
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff8453e4d4d7d334aa937f9d484c12e4cb2aca752e1d2ccdf99eb2076a5f31e8
3
  size 4943162240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:856cbe3f4ec9f41a8523bf64c7f9cdec4b188e615136fe96f7a3822c4d77c9e6
3
  size 4943162240
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acde25b8a0e1133a62e8c01d85cf393577d2ee06d163b7d48a875c9f71839406
3
  size 4999819232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec97b4b65e5b76303b33215211d5ab87fbd1b031351cfccef906b0ff2b97c356
3
  size 4999819232
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0a5ab65f562e56c0eeda4066a17b9b74d8ba6dee11a1b4d4ff5b42f59df2bb5
3
- size 4104291216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12ac8df8e0ccf4f23628aaf88ae4e685ca7ea587e45b6b18cd48c9d7994fdb12
3
+ size 4540516256
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14047240192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -230,6 +230,15 @@
230
  "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
233
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
234
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
235
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14483464192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
230
  "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",