from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig model_id = "elyza/ELYZA-japanese-Llama-2-13b-fast-instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) gptq_config = GPTQConfig(bits=4, dataset = "c4", tokenizer=tokenizer, group_size=64) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", quantization_config=gptq_config) tokenizer.save_pretrained("./quantized") model.save_pretrained("./quantized")