RelaxingSnorlax commited on
Commit
3b47101
·
verified ·
1 Parent(s): c5cceb5

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
@@ -58,7 +59,19 @@
58
  ],
59
  "kv_cache_scheme": null,
60
  "quant_method": "compressed-tensors",
61
- "quantization_status": "compressed"
 
 
 
 
 
 
 
 
 
 
 
 
62
  },
63
  "rms_norm_eps": 1e-05,
64
  "rope_scaling": {
@@ -71,7 +84,7 @@
71
  "rope_theta": 500000.0,
72
  "tie_word_embeddings": false,
73
  "torch_dtype": "bfloat16",
74
- "transformers_version": "4.51.3",
75
  "use_cache": false,
76
  "vocab_size": 128256
77
  }
 
1
  {
2
+ "_name_or_path": "nm-testing/Sparse-Llama-3.1-8B-tldr-2of4-quantized.w8a8",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
59
  ],
60
  "kv_cache_scheme": null,
61
  "quant_method": "compressed-tensors",
62
+ "quantization_status": "compressed",
63
+ "sparsity_config": {
64
+ "format": "sparse-24-bitmask",
65
+ "global_sparsity": 0.43743278973295324,
66
+ "ignore": [
67
+ "lm_head"
68
+ ],
69
+ "registry_requires_subclass": false,
70
+ "sparsity_structure": "2:4",
71
+ "targets": [
72
+ "Linear"
73
+ ]
74
+ }
75
  },
76
  "rms_norm_eps": 1e-05,
77
  "rope_scaling": {
 
84
  "rope_theta": 500000.0,
85
  "tie_word_embeddings": false,
86
  "torch_dtype": "bfloat16",
87
+ "transformers_version": "4.49.0",
88
  "use_cache": false,
89
  "vocab_size": 128256
90
  }
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  "eos_token_id": 128001,
6
  "temperature": 0.6,
7
  "top_p": 0.9,
8
- "transformers_version": "4.51.3"
9
  }
 
5
  "eos_token_id": 128001,
6
  "temperature": 0.6,
7
  "top_p": 0.9,
8
+ "transformers_version": "4.49.0"
9
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd88323ff99c1f1b86a6bdefa90c19fb4eed40d878fa41914f0f9874e4c93776
3
- size 4999400864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e250365825bde4118ebba7faf71f6e0bd6b190f5ab1a8b989ab3f768447a5a
3
+ size 4999518512
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e9894717df590a759ea0c05f55817ab5f387a31b15f4f2623b72e053e2c95d6
3
- size 4084612496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff7884b9ae599e8ad4dea50aed71928ca49a40fe5e346d8760694e72d3dbfc11
3
+ size 1467300592
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
recipe.yaml CHANGED
@@ -24,4 +24,4 @@ quant_stage:
24
  targets: [Linear]
25
  weights: {num_bits: 8, type: int, strategy: channel, symmetric: true, observer: mse}
26
  input_activations: {num_bits: 8, type: int, symmetric: true, strategy: token, dynamic: true,
27
- observer: memoryless}
 
24
  targets: [Linear]
25
  weights: {num_bits: 8, type: int, strategy: channel, symmetric: true, observer: mse}
26
  input_activations: {num_bits: 8, type: int, symmetric: true, strategy: token, dynamic: true,
27
+ observer: memoryless}