Upload MistralForCausalLM

Files changed (5) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "merged",
   "architectures": [
     "MistralForCausalLM"
   ],
@@ -12,7 +12,7 @@
   "max_position_embeddings": 32768,
   "model_type": "mistral",
   "num_attention_heads": 32,
-  "num_hidden_layers": 31,
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000.0,

 {
+  "_name_or_path": "bruins-rp",
   "architectures": [
     "MistralForCausalLM"
   ],
   "max_position_embeddings": 32768,
   "model_type": "mistral",
   "num_attention_heads": 32,
+  "num_hidden_layers": 32,
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000.0,

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff8453e4d4d7d334aa937f9d484c12e4cb2aca752e1d2ccdf99eb2076a5f31e8
 size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:856cbe3f4ec9f41a8523bf64c7f9cdec4b188e615136fe96f7a3822c4d77c9e6
 size 4943162240

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acde25b8a0e1133a62e8c01d85cf393577d2ee06d163b7d48a875c9f71839406
 size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec97b4b65e5b76303b33215211d5ab87fbd1b031351cfccef906b0ff2b97c356
 size 4999819232

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0a5ab65f562e56c0eeda4066a17b9b74d8ba6dee11a1b4d4ff5b42f59df2bb5
-size 4104291216

 version https://git-lfs.github.com/spec/v1
+oid sha256:12ac8df8e0ccf4f23628aaf88ae4e685ca7ea587e45b6b18cd48c9d7994fdb12
+size 4540516256

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 14047240192
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -230,6 +230,15 @@
     "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
     "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",

 {
   "metadata": {
+    "total_size": 14483464192
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",
     "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
     "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",