nyc-savvy-llama2-7b / peftmerger.py
Nick Doiron
model stuff
6c09a1c
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
model_name = "./llama2-hf"
adapters_name = './checkpoint-760/adapter_model'
print(f"Starting to load the model {model_name} into memory")
m = AutoModelForCausalLM.from_pretrained(
model_name,
#load_in_4bit=True,
torch_dtype=torch.bfloat16,
#device_map={"": 0},
)
m = PeftModel.from_pretrained(m, adapters_name)
m = m.merge_and_unload()
print(f"Successfully loaded the model {model_name} into memory")
m.save_pretrained("nyc-savvy")