matichon commited on May 25

Commit

494657e

•

1 Parent(s): d33deca

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank2.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank3.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank2.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank3.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank2.safetensors +3 -0
quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank3.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank2.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank3.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank4.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank5.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank6.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank7.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank0.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank1.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank2.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank3.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank4.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank5.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank6.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank7.safetensors +3 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/config.json +37 -0
quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/rank0.safetensors +3 -0

quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": null,
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 1,
+        "tp_size": 1,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fdc97e6f754f66320050462c9afe6e33580d32a68d14530598d9fccaef84aab
+size 42509327144

quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "FP8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 1,
+        "tp_size": 1,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4cb5d3a3152f2b5c22a8b1e9647ce63a80ca36da0ec96d654eef68de9dee9a9d
+size 42509337408

quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "INT8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 1,
+        "tp_size": 1,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp1-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:843b555c6bb27499443877081125823d45e7280c44e708d0735c7de1630cc1af
+size 42509337408

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": null,
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 2,
+        "tp_size": 2,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66bc432e82f9ea3be434394ad7a987990fbb584de521d43ab67e99694af5e3e0
+size 21258033216

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp16-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88a9c9274a415af0b2959184cb066fffb8a2be8af844b46e1699bf2bffc564c6
+size 21258033216

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "FP8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 2,
+        "tp_size": 2,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8c132954436bc70f3145ba27dfb4838d82d59131f5e00d922fffdcdd10962f2
+size 21258043432

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvfp8-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:785de253dc345c39a95ea8c0ea730018279005b1f5dd5a808d855a1413799c42
+size 21258043432

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "INT8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 2,
+        "tp_size": 2,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1192c889ec8919209170f9664a062100288f824a03401a6137d30ff3743fb814
+size 21258043432

quantized-llama-3-70b-pp1-tp2-awq-w4a16-kvint8-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e32f059c20a426f413612f3be65957409f18a68c58820b96c0757bec27f2edd9
+size 21258043432

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": null,
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 4,
+        "tp_size": 4,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56c4cd25334ed1a20ff0675d5be013ce92caf65d9a38ced253c339cbb8a03d50
+size 10632385392

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:289cded54c120a18bdf80da0cd51c1ba58d2e215f11effd59bd247f6a9530f77
+size 10632385392

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank2.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fe48de02b4ea7be8ee656daa586676430ba71ae73f5edd3c429359885de47c0
+size 10632385392

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp16-gs64/rank3.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:530350069f003f10612d3d4dd74a714ec259ac53b331242b0f59b9ee120c3cbf
+size 10632385392

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "FP8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 4,
+        "tp_size": 4,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5da92dad5af08d4ade29d176125a9fee002b2359678ef13f1064ec0fca6caf6
+size 10632395520

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e495b18ad031dea3d3a2ec7088183a2710e87b7802c9b44c2b494531556d38b
+size 10632395520

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank2.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9200198c179162826cb4e01ef9e81d19e4cae05978d24406dcc6fc74eed42615
+size 10632395520

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvfp8-gs64/rank3.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9228b82b365ebc3d265d8d07bf5652a5604047aa5ec5d6b86e11bbcfa22ec51b
+size 10632395520

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "INT8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 4,
+        "tp_size": 4,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5254f000f42f6401a2b2c9d79e99273916f1b6e13181c9a64b320309c39e7c86
+size 10632395520

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a96e2ae6a146b35b3845839dfbb8f2986a3d173894d5ba95d9fcc741b46ae8bd
+size 10632395520

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank2.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6e8d5fbf8f7f420a2135fa043b0fa501be9c8174abcb21bf00630e155731e68
+size 10632395520

quantized-llama-3-70b-pp1-tp4-awq-w4a16-kvint8-gs64/rank3.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13463830303c2457cc079a208108f656d07bb765ea2a9132be1c296a9faf8395
+size 10632395520

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": null,
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 8,
+        "tp_size": 8,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:766d5dc44e59c74742cfee3395b9a7411ea57fa9aa75b2951f2d8c5118a972b5
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dc98ef3959b18de0bbf8853e12dc4674adf9d17687db0bac0e7044580dae2da
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank2.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49913f9045db6afbccc44c2fab70ae23c282575e7eb42ab71355ff995cf3e918
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank3.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad114813392fdbe70c990a741774dca9a649f3b9ac6c39d682397bc818e72733
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank4.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7b37a695078e44600c263db4997107a3bfd7f00ab101fce51981132bf4f6083
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank5.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c33c424877ab3b3ba668f8d726064d528827d46762bba4f359cf690a818fd78
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank6.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b33025dfdbc309d2842f8b64b9d68decfc9f4e0805ff8c2cad83d423dd6c443a
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp16-gs64/rank7.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7557d12af05437c696a17c2d581fd335ae88e1cc988f9de0c89e64bdc5e5a9e3
+size 5319560624

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "FP8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 8,
+        "tp_size": 8,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c01c6cf1fd0494dba1c0e8fce856e9478f16a9a57cb86b6370dbea43e7cbd6a1
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09477dc6468deafed5bdcb417d95eb490f6efd4b9ccf32170a8067c8dc3c257d
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank2.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f6a595e810767e6ebaeb1f6bce335c9534ea1cc1c8fbbe72807dbfca3efd85b
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank3.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be62bdfb0bbb5710bdd21cc85e0658a6e929519cb5d7bc896f6cd7f939d3f37c
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank4.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3f4e9b8517c9274c10c59e147d612e35112aa14e9b332020698ba948ea35fb
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank5.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0e206918644e6ef4977e28e7d82e5d1a1667e6cabcf8f4b403781c9d86a6102
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank6.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81b7083b8015a6958ff1ee51b323035e1ceb718708ebda2faab10158bfefde93
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvfp8-gs64/rank7.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a69d76984b4726955cf4f2d9c6e61067af41f2d1a023bc67c65b90928d98405
+size 5319570736

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "INT8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    },
+    "mapping": {
+        "world_size": 8,
+        "tp_size": 8,
+        "pp_size": 1
+    },
+    "head_size": 128,
+    "intermediate_size": 28672,
+    "position_embedding_type": "rope_gpt_neox",
+    "rotary_base": 500000.0
+}

quantized-llama-3-70b-pp1-tp8-awq-w4a16-kvint8-gs64/rank0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0765763096ca71daf268eaf362722b45c1f27e9c34bf270919a556b57a6bdcd6
+size 5319570736