agent-os-training-guide / scripts /merge_7b_cloud.py
devsomosahub's picture
Upload scripts/merge_7b_cloud.py with huggingface_hub
04a4a4c verified
raw
history blame contribute delete
994 Bytes
"""Merge 7B LoRA adapter with FP16 base (NOT quantized) and push."""
import os, torch, gc
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
HF_TOKEN = os.environ["HF_TOKEN"]
login(token=HF_TOKEN)
print("Loading Qwen 7B FP16 on CPU...")
base = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-7B-Instruct",
torch_dtype=torch.float16,
device_map="cpu",
trust_remote_code=True,
)
print("Loading adapter...")
model = PeftModel.from_pretrained(base, "devsomosahub/agent-os-adapter-7b")
print("Merging...")
merged = model.merge_and_unload()
tok = AutoTokenizer.from_pretrained("devsomosahub/agent-os-adapter-7b", trust_remote_code=True)
print("Pushing merged 7B to Hub...")
merged.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN, max_shard_size="2GB")
tok.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN)
print("DONE! https://huggingface.co/devsomosahub/agent-os-7b-merged")