TheBloke commited on
Commit
6801dba
1 Parent(s): 4225be4

GPTQ model commit

Browse files
Files changed (2) hide show
  1. config.json +10 -40
  2. quantize_config.json +6 -6
config.json CHANGED
@@ -18,45 +18,6 @@
18
  "num_key_value_heads": 8,
19
  "pad_token_id": 0,
20
  "pretraining_tp": 1,
21
- "quantization_config": {
22
- "batch_size": 1,
23
- "bits": 4,
24
- "block_name_to_quantize": null,
25
- "cache_block_outputs": true,
26
- "damp_percent": 0.1,
27
- "desc_act": true,
28
- "exllama_config": {
29
- "version": 1
30
- },
31
- "group_size": -1,
32
- "max_input_length": null,
33
- "model_seqlen": null,
34
- "module_name_preceding_first_block": null,
35
- "modules_in_block_to_quantize": [
36
- [
37
- "self_attn.k_proj",
38
- "self_attn.v_proj",
39
- "self_attn.q_proj"
40
- ],
41
- [
42
- "self_attn.o_proj"
43
- ],
44
- [
45
- "mlp.up_proj",
46
- "mlp.gate_proj"
47
- ],
48
- [
49
- "mlp.down_proj"
50
- ]
51
- ],
52
- "pad_token_id": null,
53
- "quant_method": "gptq",
54
- "sym": true,
55
- "tokenizer": null,
56
- "true_sequential": true,
57
- "use_cuda_fp16": false,
58
- "use_exllama": true
59
- },
60
  "rms_norm_eps": 1e-05,
61
  "rope_scaling": null,
62
  "rope_theta": 10000.0,
@@ -64,5 +25,14 @@
64
  "torch_dtype": "float16",
65
  "transformers_version": "4.37.0.dev0",
66
  "use_cache": true,
67
- "vocab_size": 32000
 
 
 
 
 
 
 
 
 
68
  }
 
18
  "num_key_value_heads": 8,
19
  "pad_token_id": 0,
20
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "rms_norm_eps": 1e-05,
22
  "rope_scaling": null,
23
  "rope_theta": 10000.0,
 
25
  "torch_dtype": "float16",
26
  "transformers_version": "4.37.0.dev0",
27
  "use_cache": true,
28
+ "vocab_size": 32000,
29
+ "quantization_config": {
30
+ "bits": 4,
31
+ "group_size": -1,
32
+ "damp_percent": 0.1,
33
+ "desc_act": true,
34
+ "sym": true,
35
+ "true_sequential": true,
36
+ "quant_method": "gptq"
37
+ }
38
  }
quantize_config.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "bits": 4,
3
- "group_size": -1,
4
- "damp_percent": 0.1,
5
- "desc_act": true,
6
- "sym": true,
7
- "true_sequential": true
8
  }
 
1
  {
2
+ "bits": 4,
3
+ "group_size": -1,
4
+ "damp_percent": 0.1,
5
+ "desc_act": true,
6
+ "sym": true,
7
+ "true_sequential": true
8
  }