change to 4-bit quan config
Browse files- config.json +5 -5
config.json
CHANGED
@@ -106,15 +106,15 @@
|
|
106 |
"pad2square": false,
|
107 |
"ps_version": "v2",
|
108 |
"quantization_config": {
|
109 |
-
"bnb_4bit_compute_dtype": "
|
110 |
-
"bnb_4bit_quant_type": "
|
111 |
-
"bnb_4bit_use_double_quant":
|
112 |
"llm_int8_enable_fp32_cpu_offload": true,
|
113 |
"llm_int8_has_fp16_weight": false,
|
114 |
"llm_int8_skip_modules": null,
|
115 |
"llm_int8_threshold": 6.0,
|
116 |
-
"load_in_4bit":
|
117 |
-
"load_in_8bit":
|
118 |
"quant_method": "bitsandbytes"
|
119 |
},
|
120 |
"select_layer": -1,
|
|
|
106 |
"pad2square": false,
|
107 |
"ps_version": "v2",
|
108 |
"quantization_config": {
|
109 |
+
"bnb_4bit_compute_dtype": "bfloat16",
|
110 |
+
"bnb_4bit_quant_type": "nf4",
|
111 |
+
"bnb_4bit_use_double_quant": true,
|
112 |
"llm_int8_enable_fp32_cpu_offload": true,
|
113 |
"llm_int8_has_fp16_weight": false,
|
114 |
"llm_int8_skip_modules": null,
|
115 |
"llm_int8_threshold": 6.0,
|
116 |
+
"load_in_4bit": true,
|
117 |
+
"load_in_8bit": false,
|
118 |
"quant_method": "bitsandbytes"
|
119 |
},
|
120 |
"select_layer": -1,
|