OPEA
/

Safetensors
llama
4-bit precision
intel/auto-round
wenhuach commited on
Commit
2252808
·
1 Parent(s): 90c15db

update to autoround format

Browse files

Signed-off-by: wenhuach <wenhuach87@gmail.com>

config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/data5/models/Meta-Llama-3.1-70B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -26,22 +26,22 @@
26
  "quantization_config": {
27
  "amp": true,
28
  "autoround_version": "0.4.1",
29
- "batch_size": 1,
 
30
  "bits": 4,
31
- "damp_percent": 0.01,
32
  "data_type": "int",
33
- "desc_act": false,
34
  "enable_minmax_tuning": true,
35
  "enable_norm_bias_tuning": false,
36
  "enable_quanted_input": true,
37
  "gradient_accumulate_steps": 1,
38
  "group_size": 128,
39
- "iters": 1,
40
  "low_gpu_mem_usage": true,
41
- "lr": 1.0,
42
- "minmax_lr": 1.0,
43
- "nsamples": 1,
44
- "quant_method": "gptq",
45
  "scale_dtype": "torch.float16",
46
  "seqlen": 2048,
47
  "sym": true,
@@ -128,8 +128,7 @@
128
  "model.layers.78",
129
  "model.layers.79"
130
  ]
131
- ],
132
- "true_sequential": false
133
  },
134
  "rms_norm_eps": 1e-05,
135
  "rope_scaling": {
@@ -141,7 +140,7 @@
141
  },
142
  "rope_theta": 500000.0,
143
  "tie_word_embeddings": false,
144
- "torch_dtype": "float16",
145
  "transformers_version": "4.46.1",
146
  "use_cache": true,
147
  "vocab_size": 128256
 
1
  {
2
+ "_name_or_path": ".",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
26
  "quantization_config": {
27
  "amp": true,
28
  "autoround_version": "0.4.1",
29
+ "backend": "auto_round:gptq:exllamav2",
30
+ "batch_size": 8,
31
  "bits": 4,
 
32
  "data_type": "int",
33
+ "dataset": "NeelNanda/pile-10k",
34
  "enable_minmax_tuning": true,
35
  "enable_norm_bias_tuning": false,
36
  "enable_quanted_input": true,
37
  "gradient_accumulate_steps": 1,
38
  "group_size": 128,
39
+ "iters": 1000,
40
  "low_gpu_mem_usage": true,
41
+ "lr": 0.001,
42
+ "minmax_lr": 0.001,
43
+ "nsamples": 512,
44
+ "quant_method": "intel/auto-round",
45
  "scale_dtype": "torch.float16",
46
  "seqlen": 2048,
47
  "sym": true,
 
128
  "model.layers.78",
129
  "model.layers.79"
130
  ]
131
+ ]
 
132
  },
133
  "rms_norm_eps": 1e-05,
134
  "rope_scaling": {
 
140
  },
141
  "rope_theta": 500000.0,
142
  "tie_word_embeddings": false,
143
+ "torch_dtype": "bfloat16",
144
  "transformers_version": "4.46.1",
145
  "use_cache": true,
146
  "vocab_size": 128256
quantize_config.json → quantization_config.json RENAMED
@@ -6,14 +6,14 @@
6
  "enable_quanted_input": true,
7
  "enable_minmax_tuning": true,
8
  "seqlen": 2048,
9
- "batch_size": 1,
10
  "scale_dtype": "torch.float16",
11
- "lr": 1.0,
12
- "minmax_lr": 1.0,
13
  "gradient_accumulate_steps": 1,
14
- "iters": 1,
15
  "amp": true,
16
- "nsamples": 1,
17
  "low_gpu_mem_usage": true,
18
  "to_quant_block_names": [
19
  [
@@ -100,9 +100,8 @@
100
  ]
101
  ],
102
  "enable_norm_bias_tuning": false,
 
103
  "autoround_version": "0.4.1",
104
- "quant_method": "gptq",
105
- "desc_act": false,
106
- "true_sequential": false,
107
- "damp_percent": 0.01
108
  }
 
6
  "enable_quanted_input": true,
7
  "enable_minmax_tuning": true,
8
  "seqlen": 2048,
9
+ "batch_size": 8,
10
  "scale_dtype": "torch.float16",
11
+ "lr": 0.001,
12
+ "minmax_lr": 0.001,
13
  "gradient_accumulate_steps": 1,
14
+ "iters": 1000,
15
  "amp": true,
16
+ "nsamples": 512,
17
  "low_gpu_mem_usage": true,
18
  "to_quant_block_names": [
19
  [
 
100
  ]
101
  ],
102
  "enable_norm_bias_tuning": false,
103
+ "dataset": "NeelNanda/pile-10k",
104
  "autoround_version": "0.4.1",
105
+ "quant_method": "intel/auto-round",
106
+ "backend": "auto_round:gptq:exllamav2"
 
 
107
  }