training_script / use_model.py
Ern You
initial commit
131e609
from peft import PeftModel
from huggingface_hub import login
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
Trainer,
TrainingArguments,
logging,
set_seed,
BitsAndBytesConfig,
)
MODEL = "bigcode/starcoderbase-1b" # Model checkpoint on the Hugging Face Hub
# load the original model first
print("Load Tokenizer")
tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
print("Load Model")
base_model = AutoModelForCausalLM.from_pretrained(
MODEL,
quantization_config=None,
device_map=None,
trust_remote_code=True,
torch_dtype=torch.float32,
).cuda()
# merge fine-tuned weights with the base model
peft_model_id = f"limernyou/starcoder-peft-conti"
model = PeftModel.from_pretrained(base_model, peft_model_id, adapter_name="personal_copilot")
#model.add_weighted_adapter(["personal_copilot"], [0.8], "best_personal_copilot")
#model.set_adapter("best_personal_copilot")
model.merge_and_unload()
#if not hasattr(model, "hf_device_map"):
# model.cuda()
def get_code_completion(prefix, suffix):
text = prompt = f"""<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"""
base_model.eval()
outputs = base_model.generate(
input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(),
#attention_mask=tokenizer(prompt, return_tensors="pt").to("cuda")["attention_mask"],
max_new_tokens=128,
temperature=0.2,
top_k=50,
top_p=0.95,
do_sample=True,
repetition_penalty=1.0,
)
return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
def get_code_completion1(prefix, suffix):
prompt = prefix + suffix
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
model.eval()
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=128,
temperature=0.2,
top_k=50,
top_p=0.95,
do_sample=True,
repetition_penalty=1.0,
pad_token_id=tokenizer.eos_token_id,
)
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
completion = output_text.split("<fim_middle>")[-1].strip()
return completion
prefix = """from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM
peft_config = LoraConfig(
"""
suffix = """"""
print("Starcoder generating response")
#print(tokenizer.special_tokens_map)
print(get_code_completion(prefix, suffix))
print("Successful")