SushiTokyo's picture
init
ccc6e89 verified
raw
history blame contribute delete
464 Bytes
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
model_id = "elyza/ELYZA-japanese-Llama-2-13b-fast-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
gptq_config = GPTQConfig(bits=4, dataset = "c4", tokenizer=tokenizer, group_size=64)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", quantization_config=gptq_config)
tokenizer.save_pretrained("./quantized")
model.save_pretrained("./quantized")