Update config.json
Browse filesThis PR adds the transformers support for AWQ models. If you have AWQ kernels installed through `autoawq` package or `llm-awq` you can load this model directly through `AutoModelForCausalLM.from_pretrained` out of the box.
You can read more about the integration here: https://huggingface.co/docs/transformers/main_classes/quantization#awq-integration or this google colab demo: https://colab.research.google.com/drive/1HzZH89yAXJaZgwJDhQj9LqSBux932BvY (we'll announce it early next week)
- config.json +8 -1
config.json
CHANGED
@@ -24,5 +24,12 @@
|
|
24 |
"torch_dtype": "float16",
|
25 |
"transformers_version": "4.35.0",
|
26 |
"use_cache": true,
|
27 |
-
"vocab_size": 32000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
}
|
|
|
24 |
"torch_dtype": "float16",
|
25 |
"transformers_version": "4.35.0",
|
26 |
"use_cache": true,
|
27 |
+
"vocab_size": 32000,
|
28 |
+
"quantization_config": {
|
29 |
+
"quant_method": "awq",
|
30 |
+
"zero_point": true,
|
31 |
+
"group_size": 128,
|
32 |
+
"bits": 4,
|
33 |
+
"version": "gemm"
|
34 |
+
}
|
35 |
}
|