Vui Seng Chua
Add content
cfb9114
raw
history blame contribute delete
No virus
438 Bytes
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
model_id = "facebook/opt-125m"
tokenizer = AutoTokenizer.from_pretrained(model_id)
quantization_config = GPTQConfig(bits=4, sym=True, dataset = 'wikitext2', tokenizer=tokenizer, group_size=128, desc_act=False, use_exllama=False)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
print("joto")