Upload LlamaForCausalLM
Browse files- config.json +2 -2
- generation_config.json +0 -7
- model-00001-of-00003.safetensors +2 -2
- model-00002-of-00003.safetensors +2 -2
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +3 -3
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
@@ -24,5 +24,5 @@
|
|
24 |
"torch_dtype": "float16",
|
25 |
"transformers_version": "4.35.2",
|
26 |
"use_cache": true,
|
27 |
-
"vocab_size":
|
28 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "Tamnemtf/llama-2-7b-vi-oscar_mini",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
24 |
"torch_dtype": "float16",
|
25 |
"transformers_version": "4.35.2",
|
26 |
"use_cache": true,
|
27 |
+
"vocab_size": 32000
|
28 |
}
|
generation_config.json
CHANGED
@@ -3,14 +3,7 @@
|
|
3 |
"do_sample": true,
|
4 |
"eos_token_id": 2,
|
5 |
"max_length": 4096,
|
6 |
-
"max_memory": {
|
7 |
-
"cpu": 329543315456
|
8 |
-
},
|
9 |
-
"no_split_module_classes": [
|
10 |
-
"LlamaDecoderLayer"
|
11 |
-
],
|
12 |
"pad_token_id": 0,
|
13 |
-
"special_dtypes": {},
|
14 |
"temperature": 0.6,
|
15 |
"top_p": 0.9,
|
16 |
"transformers_version": "4.35.2"
|
|
|
3 |
"do_sample": true,
|
4 |
"eos_token_id": 2,
|
5 |
"max_length": 4096,
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"pad_token_id": 0,
|
|
|
7 |
"temperature": 0.6,
|
8 |
"top_p": 0.9,
|
9 |
"transformers_version": "4.35.2"
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64bdc44b6e9c4bd8690c0515d207fb2fa02ecd2437571d97b30e81f23f34a7df
|
3 |
+
size 4938985248
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c409824a45d1fdc6d397f7b697d6696986cfcb8983633780a97b0d630c7fb2b4
|
3 |
+
size 4947390768
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5df3308c9f46779d3bb641da31b2143e9fdd423c439119f6528bf96ca6a30db
|
3 |
+
size 3590488736
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
@@ -34,7 +34,7 @@
|
|
34 |
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
35 |
"model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
36 |
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
37 |
-
"model.layers.11.mlp.gate_proj.weight": "model-
|
38 |
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
39 |
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
40 |
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
@@ -152,7 +152,7 @@
|
|
152 |
"model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
153 |
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
154 |
"model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
155 |
-
"model.layers.23.mlp.up_proj.weight": "model-
|
156 |
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
157 |
"model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
158 |
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 13476831232
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
|
|
34 |
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
35 |
"model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
36 |
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
38 |
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
39 |
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
40 |
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
152 |
"model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
153 |
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
154 |
"model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
156 |
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
157 |
"model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
158 |
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|