matichon commited on
Commit
494657e
1 Parent(s): d33deca

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/config.json +37 -0
  2. quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
  3. quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/config.json +37 -0
  4. quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
  5. quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/config.json +37 -0
  6. quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
  7. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/config.json +37 -0
  8. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
  9. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
  10. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/config.json +37 -0
  11. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
  12. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
  13. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/config.json +37 -0
  14. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
  15. quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank1.safetensors +3 -0
  16. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/config.json +37 -0
  17. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
  18. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
  19. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank2.safetensors +3 -0
  20. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank3.safetensors +3 -0
  21. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/config.json +37 -0
  22. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
  23. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
  24. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank2.safetensors +3 -0
  25. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank3.safetensors +3 -0
  26. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/config.json +37 -0
  27. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
  28. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank1.safetensors +3 -0
  29. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank2.safetensors +3 -0
  30. quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank3.safetensors +3 -0
  31. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/config.json +37 -0
  32. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
  33. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
  34. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank2.safetensors +3 -0
  35. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank3.safetensors +3 -0
  36. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank4.safetensors +3 -0
  37. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank5.safetensors +3 -0
  38. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank6.safetensors +3 -0
  39. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank7.safetensors +3 -0
  40. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/config.json +37 -0
  41. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
  42. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
  43. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank2.safetensors +3 -0
  44. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank3.safetensors +3 -0
  45. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank4.safetensors +3 -0
  46. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank5.safetensors +3 -0
  47. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank6.safetensors +3 -0
  48. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank7.safetensors +3 -0
  49. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/config.json +37 -0
  50. quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": null,
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 1,
30
+ "tp_size": 1,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdc97e6f754f66320050462c9afe6e33580d32a68d14530598d9fccaef84aab
3
+ size 42509327144
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "FP8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 1,
30
+ "tp_size": 1,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb5d3a3152f2b5c22a8b1e9647ce63a80ca36da0ec96d654eef68de9dee9a9d
3
+ size 42509337408
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "INT8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 1,
30
+ "tp_size": 1,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843b555c6bb27499443877081125823d45e7280c44e708d0735c7de1630cc1af
3
+ size 42509337408
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": null,
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 2,
30
+ "tp_size": 2,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66bc432e82f9ea3be434394ad7a987990fbb584de521d43ab67e99694af5e3e0
3
+ size 21258033216
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a9c9274a415af0b2959184cb066fffb8a2be8af844b46e1699bf2bffc564c6
3
+ size 21258033216
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "FP8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 2,
30
+ "tp_size": 2,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8c132954436bc70f3145ba27dfb4838d82d59131f5e00d922fffdcdd10962f2
3
+ size 21258043432
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785de253dc345c39a95ea8c0ea730018279005b1f5dd5a808d855a1413799c42
3
+ size 21258043432
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "INT8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 2,
30
+ "tp_size": 2,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1192c889ec8919209170f9664a062100288f824a03401a6137d30ff3743fb814
3
+ size 21258043432
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32f059c20a426f413612f3be65957409f18a68c58820b96c0757bec27f2edd9
3
+ size 21258043432
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": null,
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 4,
30
+ "tp_size": 4,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56c4cd25334ed1a20ff0675d5be013ce92caf65d9a38ced253c339cbb8a03d50
3
+ size 10632385392
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289cded54c120a18bdf80da0cd51c1ba58d2e215f11effd59bd247f6a9530f77
3
+ size 10632385392
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe48de02b4ea7be8ee656daa586676430ba71ae73f5edd3c429359885de47c0
3
+ size 10632385392
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:530350069f003f10612d3d4dd74a714ec259ac53b331242b0f59b9ee120c3cbf
3
+ size 10632385392
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "FP8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 4,
30
+ "tp_size": 4,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5da92dad5af08d4ade29d176125a9fee002b2359678ef13f1064ec0fca6caf6
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e495b18ad031dea3d3a2ec7088183a2710e87b7802c9b44c2b494531556d38b
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9200198c179162826cb4e01ef9e81d19e4cae05978d24406dcc6fc74eed42615
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9228b82b365ebc3d265d8d07bf5652a5604047aa5ec5d6b86e11bbcfa22ec51b
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "INT8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 4,
30
+ "tp_size": 4,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5254f000f42f6401a2b2c9d79e99273916f1b6e13181c9a64b320309c39e7c86
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a96e2ae6a146b35b3845839dfbb8f2986a3d173894d5ba95d9fcc741b46ae8bd
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e8d5fbf8f7f420a2135fa043b0fa501be9c8174abcb21bf00630e155731e68
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13463830303c2457cc079a208108f656d07bb765ea2a9132be1c296a9faf8395
3
+ size 10632395520
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": null,
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 8,
30
+ "tp_size": 8,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766d5dc44e59c74742cfee3395b9a7411ea57fa9aa75b2951f2d8c5118a972b5
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc98ef3959b18de0bbf8853e12dc4674adf9d17687db0bac0e7044580dae2da
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49913f9045db6afbccc44c2fab70ae23c282575e7eb42ab71355ff995cf3e918
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad114813392fdbe70c990a741774dca9a649f3b9ac6c39d682397bc818e72733
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7b37a695078e44600c263db4997107a3bfd7f00ab101fce51981132bf4f6083
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank5.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c33c424877ab3b3ba668f8d726064d528827d46762bba4f359cf690a818fd78
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank6.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b33025dfdbc309d2842f8b64b9d68decfc9f4e0805ff8c2cad83d423dd6c443a
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank7.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7557d12af05437c696a17c2d581fd335ae88e1cc988f9de0c89e64bdc5e5a9e3
3
+ size 5319560624
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "FP8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 8,
30
+ "tp_size": 8,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01c6cf1fd0494dba1c0e8fce856e9478f16a9a57cb86b6370dbea43e7cbd6a1
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09477dc6468deafed5bdcb417d95eb490f6efd4b9ccf32170a8067c8dc3c257d
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f6a595e810767e6ebaeb1f6bce335c9534ea1cc1c8fbbe72807dbfca3efd85b
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be62bdfb0bbb5710bdd21cc85e0658a6e929519cb5d7bc896f6cd7f939d3f37c
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3f4e9b8517c9274c10c59e147d612e35112aa14e9b332020698ba948ea35fb
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank5.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e206918644e6ef4977e28e7d82e5d1a1667e6cabcf8f4b403781c9d86a6102
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank6.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b7083b8015a6958ff1ee51b323035e1ceb718708ebda2faab10158bfefde93
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank7.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a69d76984b4726955cf4f2d9c6e61067af41f2d1a023bc67c65b90928d98405
3
+ size 5319570736
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "ammo",
4
+ "version": "0.7.4"
5
+ },
6
+ "architecture": "LlamaForCausalLM",
7
+ "dtype": "float16",
8
+ "num_hidden_layers": 80,
9
+ "num_attention_heads": 64,
10
+ "num_key_value_heads": 8,
11
+ "hidden_size": 8192,
12
+ "norm_epsilon": 1e-05,
13
+ "vocab_size": 128256,
14
+ "max_position_embeddings": 8192,
15
+ "hidden_act": "silu",
16
+ "use_parallel_embedding": true,
17
+ "embedding_sharding_dim": 0,
18
+ "quantization": {
19
+ "quant_algo": "W4A16_AWQ",
20
+ "kv_cache_quant_algo": "INT8",
21
+ "group_size": 64,
22
+ "has_zero_point": false,
23
+ "pre_quant_scale": true,
24
+ "exclude_modules": [
25
+ "lm_head"
26
+ ]
27
+ },
28
+ "mapping": {
29
+ "world_size": 8,
30
+ "tp_size": 8,
31
+ "pp_size": 1
32
+ },
33
+ "head_size": 128,
34
+ "intermediate_size": 28672,
35
+ "position_embedding_type": "rope_gpt_neox",
36
+ "rotary_base": 500000.0
37
+ }
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0765763096ca71daf268eaf362722b45c1f27e9c34bf270919a556b57a6bdcd6
3
+ size 5319570736