test-space / app.py
sachit-sankhe's picture
Update app.py
67f0ddf
raw
history blame contribute delete
No virus
984 Bytes
import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
def greet(name):
peft_model_id = "sachit-sankhe/openllama7b-lora-adapter2"
config = PeftConfig.from_pretrained(peft_model_id)
loaded_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# Load the Lora model
loaded_model = PeftModel.from_pretrained(loaded_model, peft_model_id)
input_prompt = name
batch = tokenizer(f"###Human: {input_prompt}### Assistant: ", return_tensors='pt')
with torch.cuda.amp.autocast():
output_tokens = loaded_model.generate(**batch,max_new_tokens=300)
return str('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch(share=True)