Update README.md
Browse files
README.md
CHANGED
@@ -17,7 +17,7 @@ Then load the model from the hub:
|
|
17 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
18 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
19 |
|
20 |
-
model_name = "smpanaro/gpt2-AutoGPTQ-4bit-128g"
|
21 |
model = AutoGPTQForCausalLM.from_quantized(model_name, use_triton=True)
|
22 |
# Note: despite this model being quantized only using groups and desc_act=False, Triton still seems to be required.
|
23 |
```
|
|
|
17 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
18 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
19 |
|
20 |
+
model_name = "smpanaro/gpt2-xl-AutoGPTQ-4bit-128g"
|
21 |
model = AutoGPTQForCausalLM.from_quantized(model_name, use_triton=True)
|
22 |
# Note: despite this model being quantized only using groups and desc_act=False, Triton still seems to be required.
|
23 |
```
|