Upload LlamaForCausalLM

Files changed (6) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "iambestfeed/vietnamese-llama2-7b-sharded-checkpoints",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -24,5 +24,5 @@
   "torch_dtype": "float16",
   "transformers_version": "4.35.2",
   "use_cache": true,
-  "vocab_size": 46303
 }

 {
+  "_name_or_path": "Tamnemtf/llama-2-7b-vi-oscar_mini",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "torch_dtype": "float16",
   "transformers_version": "4.35.2",
   "use_cache": true,
+  "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -3,14 +3,7 @@
   "do_sample": true,
   "eos_token_id": 2,
   "max_length": 4096,
-  "max_memory": {
-    "cpu": 329543315456
-  },
-  "no_split_module_classes": [
-    "LlamaDecoderLayer"
-  ],
   "pad_token_id": 0,
-  "special_dtypes": {},
   "temperature": 0.6,
   "top_p": 0.9,
   "transformers_version": "4.35.2"

   "do_sample": true,
   "eos_token_id": 2,
   "max_length": 4096,
   "pad_token_id": 0,
   "temperature": 0.6,
   "top_p": 0.9,
   "transformers_version": "4.35.2"

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1b56f5f11468cabaa84ecf412f1273e9616334a4f044b7805ab301754f0d6ae
-size 4965977776

 version https://git-lfs.github.com/spec/v1
+oid sha256:64bdc44b6e9c4bd8690c0515d207fb2fa02ecd2437571d97b30e81f23f34a7df
+size 4938985248

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a95236cea3c1878dcf2f63227b16e8966dd07b57f8e4574996f8ffb3eb33ed1
-size 4947390776

 version https://git-lfs.github.com/spec/v1
+oid sha256:c409824a45d1fdc6d397f7b697d6696986cfcb8983633780a97b0d630c7fb2b4
+size 4947390768

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc0102071a0d43db6b587b1e5e524b9b2ed070668849665a5e7e051dcd6809cb
-size 3797836568

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5df3308c9f46779d3bb641da31b2143e9fdd423c439119f6528bf96ca6a30db
+size 3590488736

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 13711171584
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -34,7 +34,7 @@
     "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
-    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
@@ -152,7 +152,7 @@
     "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
-    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",

 {
   "metadata": {
+    "total_size": 13476831232
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",
     "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",