OPEA
/

Safetensors
llama
4-bit precision
intel/auto-round
wenhuach commited on
Commit
996a178
·
1 Parent(s): 77e8fae

autoround format

Browse files

Signed-off-by: wenhuach <wenhuach87@gmail.com>

Files changed (2) hide show
  1. config.json +9 -10
  2. quantize_config.json +0 -108
config.json CHANGED
@@ -26,22 +26,22 @@
26
  "quantization_config": {
27
  "amp": true,
28
  "autoround_version": "0.4.2.dev",
 
29
  "batch_size": 8,
30
  "bits": 4,
31
- "damp_percent": 0.01,
32
  "data_type": "int",
33
- "desc_act": false,
34
  "enable_minmax_tuning": true,
35
  "enable_norm_bias_tuning": false,
36
  "enable_quanted_input": true,
37
  "gradient_accumulate_steps": 1,
38
  "group_size": 128,
39
- "iters": 1000,
40
- "low_gpu_mem_usage": true,
41
- "lr": 0.001,
42
- "minmax_lr": 0.001,
43
- "nsamples": 512,
44
- "quant_method": "gptq",
45
  "scale_dtype": "torch.float16",
46
  "seqlen": 2048,
47
  "sym": true,
@@ -128,8 +128,7 @@
128
  "model.layers.78",
129
  "model.layers.79"
130
  ]
131
- ],
132
- "true_sequential": false
133
  },
134
  "rms_norm_eps": 1e-05,
135
  "rope_scaling": {
 
26
  "quantization_config": {
27
  "amp": true,
28
  "autoround_version": "0.4.2.dev",
29
+ "backend": "auto_round:gptq:exllamav2",
30
  "batch_size": 8,
31
  "bits": 4,
 
32
  "data_type": "int",
33
+ "dataset": "NeelNanda/pile-10k",
34
  "enable_minmax_tuning": true,
35
  "enable_norm_bias_tuning": false,
36
  "enable_quanted_input": true,
37
  "gradient_accumulate_steps": 1,
38
  "group_size": 128,
39
+ "iters": 200,
40
+ "low_gpu_mem_usage": false,
41
+ "lr": 0.005,
42
+ "minmax_lr": 0.005,
43
+ "nsamples": 128,
44
+ "quant_method": "intel/auto-round",
45
  "scale_dtype": "torch.float16",
46
  "seqlen": 2048,
47
  "sym": true,
 
128
  "model.layers.78",
129
  "model.layers.79"
130
  ]
131
+ ]
 
132
  },
133
  "rms_norm_eps": 1e-05,
134
  "rope_scaling": {
quantize_config.json DELETED
@@ -1,108 +0,0 @@
1
- {
2
- "bits": 4,
3
- "group_size": 128,
4
- "sym": true,
5
- "data_type": "int",
6
- "enable_quanted_input": true,
7
- "enable_minmax_tuning": true,
8
- "seqlen": 2048,
9
- "batch_size": 8,
10
- "scale_dtype": "torch.float16",
11
- "lr": 0.001,
12
- "minmax_lr": 0.001,
13
- "gradient_accumulate_steps": 1,
14
- "iters": 1000,
15
- "amp": true,
16
- "nsamples": 512,
17
- "low_gpu_mem_usage": true,
18
- "to_quant_block_names": [
19
- [
20
- "model.layers.0",
21
- "model.layers.1",
22
- "model.layers.2",
23
- "model.layers.3",
24
- "model.layers.4",
25
- "model.layers.5",
26
- "model.layers.6",
27
- "model.layers.7",
28
- "model.layers.8",
29
- "model.layers.9",
30
- "model.layers.10",
31
- "model.layers.11",
32
- "model.layers.12",
33
- "model.layers.13",
34
- "model.layers.14",
35
- "model.layers.15",
36
- "model.layers.16",
37
- "model.layers.17",
38
- "model.layers.18",
39
- "model.layers.19",
40
- "model.layers.20",
41
- "model.layers.21",
42
- "model.layers.22",
43
- "model.layers.23",
44
- "model.layers.24",
45
- "model.layers.25",
46
- "model.layers.26",
47
- "model.layers.27",
48
- "model.layers.28",
49
- "model.layers.29",
50
- "model.layers.30",
51
- "model.layers.31",
52
- "model.layers.32",
53
- "model.layers.33",
54
- "model.layers.34",
55
- "model.layers.35",
56
- "model.layers.36",
57
- "model.layers.37",
58
- "model.layers.38",
59
- "model.layers.39",
60
- "model.layers.40",
61
- "model.layers.41",
62
- "model.layers.42",
63
- "model.layers.43",
64
- "model.layers.44",
65
- "model.layers.45",
66
- "model.layers.46",
67
- "model.layers.47",
68
- "model.layers.48",
69
- "model.layers.49",
70
- "model.layers.50",
71
- "model.layers.51",
72
- "model.layers.52",
73
- "model.layers.53",
74
- "model.layers.54",
75
- "model.layers.55",
76
- "model.layers.56",
77
- "model.layers.57",
78
- "model.layers.58",
79
- "model.layers.59",
80
- "model.layers.60",
81
- "model.layers.61",
82
- "model.layers.62",
83
- "model.layers.63",
84
- "model.layers.64",
85
- "model.layers.65",
86
- "model.layers.66",
87
- "model.layers.67",
88
- "model.layers.68",
89
- "model.layers.69",
90
- "model.layers.70",
91
- "model.layers.71",
92
- "model.layers.72",
93
- "model.layers.73",
94
- "model.layers.74",
95
- "model.layers.75",
96
- "model.layers.76",
97
- "model.layers.77",
98
- "model.layers.78",
99
- "model.layers.79"
100
- ]
101
- ],
102
- "enable_norm_bias_tuning": false,
103
- "autoround_version": "0.4.2.dev",
104
- "quant_method": "gptq",
105
- "desc_act": false,
106
- "true_sequential": false,
107
- "damp_percent": 0.01
108
- }