Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank2.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank3.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank2.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank3.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank2.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank3.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank2.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank3.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank4.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank5.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank6.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank7.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank2.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank3.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank4.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank5.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank6.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank7.safetensors +3 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/config.json +37 -0
- quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": null,
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 1,
|
30 |
+
"tp_size": 1,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fdc97e6f754f66320050462c9afe6e33580d32a68d14530598d9fccaef84aab
|
3 |
+
size 42509327144
|
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "FP8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 1,
|
30 |
+
"tp_size": 1,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cb5d3a3152f2b5c22a8b1e9647ce63a80ca36da0ec96d654eef68de9dee9a9d
|
3 |
+
size 42509337408
|
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "INT8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 1,
|
30 |
+
"tp_size": 1,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:843b555c6bb27499443877081125823d45e7280c44e708d0735c7de1630cc1af
|
3 |
+
size 42509337408
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": null,
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 2,
|
30 |
+
"tp_size": 2,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66bc432e82f9ea3be434394ad7a987990fbb584de521d43ab67e99694af5e3e0
|
3 |
+
size 21258033216
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88a9c9274a415af0b2959184cb066fffb8a2be8af844b46e1699bf2bffc564c6
|
3 |
+
size 21258033216
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "FP8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 2,
|
30 |
+
"tp_size": 2,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8c132954436bc70f3145ba27dfb4838d82d59131f5e00d922fffdcdd10962f2
|
3 |
+
size 21258043432
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:785de253dc345c39a95ea8c0ea730018279005b1f5dd5a808d855a1413799c42
|
3 |
+
size 21258043432
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "INT8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 2,
|
30 |
+
"tp_size": 2,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1192c889ec8919209170f9664a062100288f824a03401a6137d30ff3743fb814
|
3 |
+
size 21258043432
|
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e32f059c20a426f413612f3be65957409f18a68c58820b96c0757bec27f2edd9
|
3 |
+
size 21258043432
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": null,
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 4,
|
30 |
+
"tp_size": 4,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56c4cd25334ed1a20ff0675d5be013ce92caf65d9a38ced253c339cbb8a03d50
|
3 |
+
size 10632385392
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:289cded54c120a18bdf80da0cd51c1ba58d2e215f11effd59bd247f6a9530f77
|
3 |
+
size 10632385392
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fe48de02b4ea7be8ee656daa586676430ba71ae73f5edd3c429359885de47c0
|
3 |
+
size 10632385392
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank3.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:530350069f003f10612d3d4dd74a714ec259ac53b331242b0f59b9ee120c3cbf
|
3 |
+
size 10632385392
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "FP8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 4,
|
30 |
+
"tp_size": 4,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5da92dad5af08d4ade29d176125a9fee002b2359678ef13f1064ec0fca6caf6
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e495b18ad031dea3d3a2ec7088183a2710e87b7802c9b44c2b494531556d38b
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9200198c179162826cb4e01ef9e81d19e4cae05978d24406dcc6fc74eed42615
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank3.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9228b82b365ebc3d265d8d07bf5652a5604047aa5ec5d6b86e11bbcfa22ec51b
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "INT8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 4,
|
30 |
+
"tp_size": 4,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5254f000f42f6401a2b2c9d79e99273916f1b6e13181c9a64b320309c39e7c86
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a96e2ae6a146b35b3845839dfbb8f2986a3d173894d5ba95d9fcc741b46ae8bd
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e8d5fbf8f7f420a2135fa043b0fa501be9c8174abcb21bf00630e155731e68
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank3.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13463830303c2457cc079a208108f656d07bb765ea2a9132be1c296a9faf8395
|
3 |
+
size 10632395520
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": null,
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 8,
|
30 |
+
"tp_size": 8,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:766d5dc44e59c74742cfee3395b9a7411ea57fa9aa75b2951f2d8c5118a972b5
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dc98ef3959b18de0bbf8853e12dc4674adf9d17687db0bac0e7044580dae2da
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49913f9045db6afbccc44c2fab70ae23c282575e7eb42ab71355ff995cf3e918
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank3.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad114813392fdbe70c990a741774dca9a649f3b9ac6c39d682397bc818e72733
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank4.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7b37a695078e44600c263db4997107a3bfd7f00ab101fce51981132bf4f6083
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank5.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c33c424877ab3b3ba668f8d726064d528827d46762bba4f359cf690a818fd78
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank6.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b33025dfdbc309d2842f8b64b9d68decfc9f4e0805ff8c2cad83d423dd6c443a
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank7.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7557d12af05437c696a17c2d581fd335ae88e1cc988f9de0c89e64bdc5e5a9e3
|
3 |
+
size 5319560624
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "FP8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 8,
|
30 |
+
"tp_size": 8,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c01c6cf1fd0494dba1c0e8fce856e9478f16a9a57cb86b6370dbea43e7cbd6a1
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09477dc6468deafed5bdcb417d95eb490f6efd4b9ccf32170a8067c8dc3c257d
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f6a595e810767e6ebaeb1f6bce335c9534ea1cc1c8fbbe72807dbfca3efd85b
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank3.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be62bdfb0bbb5710bdd21cc85e0658a6e929519cb5d7bc896f6cd7f939d3f37c
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank4.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d3f4e9b8517c9274c10c59e147d612e35112aa14e9b332020698ba948ea35fb
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank5.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0e206918644e6ef4977e28e7d82e5d1a1667e6cabcf8f4b403781c9d86a6102
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank6.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81b7083b8015a6958ff1ee51b323035e1ceb718708ebda2faab10158bfefde93
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank7.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a69d76984b4726955cf4f2d9c6e61067af41f2d1a023bc67c65b90928d98405
|
3 |
+
size 5319570736
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "ammo",
|
4 |
+
"version": "0.7.4"
|
5 |
+
},
|
6 |
+
"architecture": "LlamaForCausalLM",
|
7 |
+
"dtype": "float16",
|
8 |
+
"num_hidden_layers": 80,
|
9 |
+
"num_attention_heads": 64,
|
10 |
+
"num_key_value_heads": 8,
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"norm_epsilon": 1e-05,
|
13 |
+
"vocab_size": 128256,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"use_parallel_embedding": true,
|
17 |
+
"embedding_sharding_dim": 0,
|
18 |
+
"quantization": {
|
19 |
+
"quant_algo": "W4A16_AWQ",
|
20 |
+
"kv_cache_quant_algo": "INT8",
|
21 |
+
"group_size": 64,
|
22 |
+
"has_zero_point": false,
|
23 |
+
"pre_quant_scale": true,
|
24 |
+
"exclude_modules": [
|
25 |
+
"lm_head"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"mapping": {
|
29 |
+
"world_size": 8,
|
30 |
+
"tp_size": 8,
|
31 |
+
"pp_size": 1
|
32 |
+
},
|
33 |
+
"head_size": 128,
|
34 |
+
"intermediate_size": 28672,
|
35 |
+
"position_embedding_type": "rope_gpt_neox",
|
36 |
+
"rotary_base": 500000.0
|
37 |
+
}
|
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/rank0.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0765763096ca71daf268eaf362722b45c1f27e9c34bf270919a556b57a6bdcd6
|
3 |
+
size 5319570736
|