import torch
from hqq.core.quantize import *
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
from hqq.models.hf.base import AutoHQQHFModel
compute_dtype = torch.float16
model_id="uisikdag/Meta-Llama-3-8B-Instruct-4bit-hqq"
model = HQQModelForCausalLM.from_quantized(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
- Downloads last month
- 9