Tamnemtf commited on
Commit
b6e64a7
1 Parent(s): d531092

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "iambestfeed/vietnamese-llama2-7b-sharded-checkpoints",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -24,5 +24,5 @@
24
  "torch_dtype": "float16",
25
  "transformers_version": "4.35.2",
26
  "use_cache": true,
27
- "vocab_size": 46303
28
  }
 
1
  {
2
+ "_name_or_path": "Tamnemtf/llama-2-7b-vi-oscar_mini",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
24
  "torch_dtype": "float16",
25
  "transformers_version": "4.35.2",
26
  "use_cache": true,
27
+ "vocab_size": 32000
28
  }
generation_config.json CHANGED
@@ -3,14 +3,7 @@
3
  "do_sample": true,
4
  "eos_token_id": 2,
5
  "max_length": 4096,
6
- "max_memory": {
7
- "cpu": 329543315456
8
- },
9
- "no_split_module_classes": [
10
- "LlamaDecoderLayer"
11
- ],
12
  "pad_token_id": 0,
13
- "special_dtypes": {},
14
  "temperature": 0.6,
15
  "top_p": 0.9,
16
  "transformers_version": "4.35.2"
 
3
  "do_sample": true,
4
  "eos_token_id": 2,
5
  "max_length": 4096,
 
 
 
 
 
 
6
  "pad_token_id": 0,
 
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
  "transformers_version": "4.35.2"
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1b56f5f11468cabaa84ecf412f1273e9616334a4f044b7805ab301754f0d6ae
3
- size 4965977776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64bdc44b6e9c4bd8690c0515d207fb2fa02ecd2437571d97b30e81f23f34a7df
3
+ size 4938985248
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a95236cea3c1878dcf2f63227b16e8966dd07b57f8e4574996f8ffb3eb33ed1
3
- size 4947390776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c409824a45d1fdc6d397f7b697d6696986cfcb8983633780a97b0d630c7fb2b4
3
+ size 4947390768
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc0102071a0d43db6b587b1e5e524b9b2ed070668849665a5e7e051dcd6809cb
3
- size 3797836568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5df3308c9f46779d3bb641da31b2143e9fdd423c439119f6528bf96ca6a30db
3
+ size 3590488736
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 13711171584
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -34,7 +34,7 @@
34
  "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
  "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
  "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
- "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
  "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
  "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
  "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
@@ -152,7 +152,7 @@
152
  "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
155
- "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
  "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
  "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
158
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 13476831232
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
34
  "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
  "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
  "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
38
  "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
  "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
  "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
 
152
  "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
156
  "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
  "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
158
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",