|
|
from peft import PeftModel |
|
|
from huggingface_hub import login |
|
|
import torch |
|
|
|
|
|
from transformers import ( |
|
|
AutoModelForCausalLM, |
|
|
AutoTokenizer, |
|
|
Trainer, |
|
|
TrainingArguments, |
|
|
logging, |
|
|
set_seed, |
|
|
BitsAndBytesConfig, |
|
|
) |
|
|
|
|
|
MODEL = "bigcode/starcoderbase-1b" |
|
|
|
|
|
|
|
|
print("Load Tokenizer") |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True) |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
tokenizer.padding_side = "left" |
|
|
print("Load Model") |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL, |
|
|
quantization_config=None, |
|
|
device_map=None, |
|
|
trust_remote_code=True, |
|
|
torch_dtype=torch.float32, |
|
|
).cuda() |
|
|
|
|
|
|
|
|
peft_model_id = f"limernyou/starcoder-peft-conti" |
|
|
model = PeftModel.from_pretrained(base_model, peft_model_id, adapter_name="personal_copilot") |
|
|
|
|
|
|
|
|
|
|
|
model.merge_and_unload() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_code_completion(prefix, suffix): |
|
|
text = prompt = f"""<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>""" |
|
|
base_model.eval() |
|
|
outputs = base_model.generate( |
|
|
input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(), |
|
|
|
|
|
max_new_tokens=128, |
|
|
temperature=0.2, |
|
|
top_k=50, |
|
|
top_p=0.95, |
|
|
do_sample=True, |
|
|
repetition_penalty=1.0, |
|
|
) |
|
|
return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] |
|
|
|
|
|
def get_code_completion1(prefix, suffix): |
|
|
prompt = prefix + suffix |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") |
|
|
model.eval() |
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
input_ids=inputs["input_ids"], |
|
|
attention_mask=inputs["attention_mask"], |
|
|
max_new_tokens=128, |
|
|
temperature=0.2, |
|
|
top_k=50, |
|
|
top_p=0.95, |
|
|
do_sample=True, |
|
|
repetition_penalty=1.0, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
) |
|
|
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
completion = output_text.split("<fim_middle>")[-1].strip() |
|
|
return completion |
|
|
|
|
|
prefix = """from peft import LoraConfig, TaskType, get_peft_model |
|
|
from transformers import AutoModelForCausalLM |
|
|
peft_config = LoraConfig( |
|
|
""" |
|
|
suffix = """""" |
|
|
|
|
|
print("Starcoder generating response") |
|
|
|
|
|
print(get_code_completion(prefix, suffix)) |
|
|
|
|
|
print("Successful") |