mostafaamiri's picture
Create app.py
ee71205
raw
history blame contribute delete
No virus
1.56 kB
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
from peft import PeftModel
import re
import gradio as gr
tokenizer = LlamaTokenizer.from_pretrained('mostafaamiri/persian_llama_7b')
base_model = LlamaForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
load_in_8bit=False,
)
base_model.resize_token_embeddings(len(tokenizer))
model = PeftModel.from_pretrained(
base_model,
"mostafaamiri/persian_llama_7b",)
prompt_input = (
"Below is an instruction that describes a task. "
"Write a response that appropriately completes the request.\n\n"
"### Instruction:\n\n{instruction}\n\n### Response:\n\n"
)
def generate_prompt(instruction, input=None):
if input:
instruction = instruction + '\n' + input
return prompt_input.format_map({'instruction': instruction})
config=dict(
temperature=0.2,
top_k=40,
top_p=0.9,
do_sample=True,
num_beams=1,
repetition_penalty=1.2,
max_new_tokens=300
)
def launch_model(text):
sample_data = [text]
inputToken = tokenizer(generate_prompt(sample_data) , return_tensors="pt")
outputs = model.generate(**inputToken, **config)
output = tokenizer.decode(outputs[0],skip_special_tokens=True)
output = re.sub(r"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n\n\[.*\]\n\n### Response:\n\n", "", output)
return output
iface = gr.Interface(fn=launch_model, inputs="text", outputs="text")
iface.launch()