LLM-As-Chatbot / models /replit.py
chansung's picture
update
88f55d9
import torch
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from optimum.bettertransformer import BetterTransformer
def load_model(
base,
finetuned,
mode_cpu,
mode_mps,
mode_full_gpu,
mode_8bit,
mode_4bit,
force_download_ckpt
):
tokenizer = AutoTokenizer.from_pretrained(
base, trust_remote_code=True)
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained(
base,
load_in_8bit=mode_8bit,
load_in_4bit=mode_4bit,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
)
if finetuned is not None and \
finetuned != "" and \
finetuned != "N/A":
model = PeftModel.from_pretrained(
model,
finetuned,
# force_download=force_download_ckpt,
trust_remote_code=True
)
model = model.merge_and_unload()
# model = BetterTransformer.transform(model)
model.to('cuda')
return model, tokenizer