gptq-w4-gs32-sparse-compressed-oc14336-ic4096
/
internal
/donttouch_unpacking_autogptq
/autogpt_sample.py
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig | |
model_id = "facebook/opt-125m" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
quantization_config = GPTQConfig(bits=4, sym=True, dataset = 'wikitext2', tokenizer=tokenizer, group_size=128, desc_act=False, use_exllama=False) | |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config) | |
print("joto") | |