mgoin commited on
Commit
87e14e9
·
verified ·
1 Parent(s): 6c31251

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,11 +1,61 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "eos_token_id": 128009,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
@@ -18,20 +68,12 @@
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
  "pretraining_tp": 1,
21
- "quantization_config": {
22
- "activation_scheme": "static",
23
- "ignored_layers": [
24
- "lm_head"
25
- ],
26
- "kv_cache_scheme": "static",
27
- "quant_method": "fp8"
28
- },
29
  "rms_norm_eps": 1e-05,
30
  "rope_scaling": null,
31
  "rope_theta": 500000.0,
32
  "tie_word_embeddings": false,
33
  "torch_dtype": "bfloat16",
34
- "transformers_version": "4.42.4",
35
  "use_cache": true,
36
  "vocab_size": 128256
37
- }
 
1
  {
2
+ "_name_or_path": "/home/mgoin/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "compression_config": {
10
+ "config_groups": {
11
+ "group_0": {
12
+ "input_activations": {
13
+ "block_structure": null,
14
+ "dynamic": false,
15
+ "group_size": null,
16
+ "num_bits": 8,
17
+ "observer": "minmax",
18
+ "observer_kwargs": {},
19
+ "strategy": "tensor",
20
+ "symmetric": true,
21
+ "type": "float"
22
+ },
23
+ "output_activations": null,
24
+ "targets": [
25
+ "Linear"
26
+ ],
27
+ "weights": {
28
+ "block_structure": null,
29
+ "dynamic": false,
30
+ "group_size": null,
31
+ "num_bits": 8,
32
+ "observer": "minmax",
33
+ "observer_kwargs": {},
34
+ "strategy": "tensor",
35
+ "symmetric": true,
36
+ "type": "float"
37
+ }
38
+ }
39
+ },
40
+ "format": "float-quantized",
41
+ "global_compression_ratio": 1.459016372092587,
42
+ "ignore": [
43
+ "lm_head"
44
+ ],
45
+ "kv_cache_scheme": {
46
+ "block_structure": null,
47
+ "dynamic": false,
48
+ "group_size": null,
49
+ "num_bits": 8,
50
+ "observer": "minmax",
51
+ "observer_kwargs": {},
52
+ "strategy": "tensor",
53
+ "symmetric": true,
54
+ "type": "float"
55
+ },
56
+ "quant_method": "compressed-tensors",
57
+ "quantization_status": "frozen"
58
+ },
59
  "eos_token_id": 128009,
60
  "hidden_act": "silu",
61
  "hidden_size": 4096,
 
68
  "num_hidden_layers": 32,
69
  "num_key_value_heads": 8,
70
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
71
  "rms_norm_eps": 1e-05,
72
  "rope_scaling": null,
73
  "rope_theta": 500000.0,
74
  "tie_word_embeddings": false,
75
  "torch_dtype": "bfloat16",
76
+ "transformers_version": "4.43.1",
77
  "use_cache": true,
78
  "vocab_size": 128256
79
+ }
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.42.4"
12
  }
 
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.43.1"
12
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3984136d7c07adb442b2c081a7e78b683f53d5738417a9ec8f5dc9e140751434
3
- size 4997857192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:552b13fa34c1fe48969abf0f41ef7ef263d33ac90500652d3da65200dee40650
3
+ size 4997861162
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61a4a49b2165f2ce2dad57f842c92f467036f37e1ce083d9e22c40f4583814b6
3
- size 4083429400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:082eaa4b1c2fbd0007b9813f5ce2c6d29b24ee5aefcb265c0e6d91aa26e53d3d
3
+ size 4083432614
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 9081202688
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",
@@ -279,7 +279,7 @@
279
  "model.layers.18.self_attn.v_proj.input_scale": "model-00002-of-00002.safetensors",
280
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
281
  "model.layers.18.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
282
- "model.layers.18.self_attn.v_scale": "model-00001-of-00002.safetensors",
283
  "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
  "model.layers.19.mlp.down_proj.input_scale": "model-00002-of-00002.safetensors",
285
  "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 9081201664
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",
 
279
  "model.layers.18.self_attn.v_proj.input_scale": "model-00002-of-00002.safetensors",
280
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
281
  "model.layers.18.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
282
+ "model.layers.18.self_attn.v_scale": "model-00002-of-00002.safetensors",
283
  "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
  "model.layers.19.mlp.down_proj.input_scale": "model-00002-of-00002.safetensors",
285
  "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",