Upload folder using huggingface_hub
Browse files- config.json +15 -2
- generation_config.json +1 -1
- model-00001-of-00002.safetensors +2 -2
- model-00002-of-00002.safetensors +2 -2
- model.safetensors.index.json +0 -0
- recipe.yaml +1 -1
config.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"LlamaForCausalLM"
|
| 4 |
],
|
|
@@ -58,7 +59,19 @@
|
|
| 58 |
],
|
| 59 |
"kv_cache_scheme": null,
|
| 60 |
"quant_method": "compressed-tensors",
|
| 61 |
-
"quantization_status": "compressed"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
},
|
| 63 |
"rms_norm_eps": 1e-05,
|
| 64 |
"rope_scaling": {
|
|
@@ -71,7 +84,7 @@
|
|
| 71 |
"rope_theta": 500000.0,
|
| 72 |
"tie_word_embeddings": false,
|
| 73 |
"torch_dtype": "bfloat16",
|
| 74 |
-
"transformers_version": "4.
|
| 75 |
"use_cache": false,
|
| 76 |
"vocab_size": 128256
|
| 77 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "nm-testing/Sparse-Llama-3.1-8B-tldr-2of4-quantized.w8a8",
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
|
|
|
| 59 |
],
|
| 60 |
"kv_cache_scheme": null,
|
| 61 |
"quant_method": "compressed-tensors",
|
| 62 |
+
"quantization_status": "compressed",
|
| 63 |
+
"sparsity_config": {
|
| 64 |
+
"format": "sparse-24-bitmask",
|
| 65 |
+
"global_sparsity": 0.43743278973295324,
|
| 66 |
+
"ignore": [
|
| 67 |
+
"lm_head"
|
| 68 |
+
],
|
| 69 |
+
"registry_requires_subclass": false,
|
| 70 |
+
"sparsity_structure": "2:4",
|
| 71 |
+
"targets": [
|
| 72 |
+
"Linear"
|
| 73 |
+
]
|
| 74 |
+
}
|
| 75 |
},
|
| 76 |
"rms_norm_eps": 1e-05,
|
| 77 |
"rope_scaling": {
|
|
|
|
| 84 |
"rope_theta": 500000.0,
|
| 85 |
"tie_word_embeddings": false,
|
| 86 |
"torch_dtype": "bfloat16",
|
| 87 |
+
"transformers_version": "4.49.0",
|
| 88 |
"use_cache": false,
|
| 89 |
"vocab_size": 128256
|
| 90 |
}
|
generation_config.json
CHANGED
|
@@ -5,5 +5,5 @@
|
|
| 5 |
"eos_token_id": 128001,
|
| 6 |
"temperature": 0.6,
|
| 7 |
"top_p": 0.9,
|
| 8 |
-
"transformers_version": "4.
|
| 9 |
}
|
|
|
|
| 5 |
"eos_token_id": 128001,
|
| 6 |
"temperature": 0.6,
|
| 7 |
"top_p": 0.9,
|
| 8 |
+
"transformers_version": "4.49.0"
|
| 9 |
}
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3e250365825bde4118ebba7faf71f6e0bd6b190f5ab1a8b989ab3f768447a5a
|
| 3 |
+
size 4999518512
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff7884b9ae599e8ad4dea50aed71928ca49a40fe5e346d8760694e72d3dbfc11
|
| 3 |
+
size 1467300592
|
model.safetensors.index.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
recipe.yaml
CHANGED
|
@@ -24,4 +24,4 @@ quant_stage:
|
|
| 24 |
targets: [Linear]
|
| 25 |
weights: {num_bits: 8, type: int, strategy: channel, symmetric: true, observer: mse}
|
| 26 |
input_activations: {num_bits: 8, type: int, symmetric: true, strategy: token, dynamic: true,
|
| 27 |
-
observer: memoryless}
|
|
|
|
| 24 |
targets: [Linear]
|
| 25 |
weights: {num_bits: 8, type: int, strategy: channel, symmetric: true, observer: mse}
|
| 26 |
input_activations: {num_bits: 8, type: int, symmetric: true, strategy: token, dynamic: true,
|
| 27 |
+
observer: memoryless}
|