from transformers import AutoTokenizer, AutoModelForCausalLM
HF_MODEL_REPO = "anslin-raj/Llama-2-7b-chat-hf-8-bit"
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_REPO, device=device)
model = AutoModelForCausalLM.from_pretrained(HF_MODEL_REPO, device_map=device, load_in_8bit=True)
- Downloads last month
- 3