File size: 1,555 Bytes
ee71205 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
from peft import PeftModel
import re
import gradio as gr
tokenizer = LlamaTokenizer.from_pretrained('mostafaamiri/persian_llama_7b')
base_model = LlamaForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
load_in_8bit=False,
)
base_model.resize_token_embeddings(len(tokenizer))
model = PeftModel.from_pretrained(
base_model,
"mostafaamiri/persian_llama_7b",)
prompt_input = (
"Below is an instruction that describes a task. "
"Write a response that appropriately completes the request.\n\n"
"### Instruction:\n\n{instruction}\n\n### Response:\n\n"
)
def generate_prompt(instruction, input=None):
if input:
instruction = instruction + '\n' + input
return prompt_input.format_map({'instruction': instruction})
config=dict(
temperature=0.2,
top_k=40,
top_p=0.9,
do_sample=True,
num_beams=1,
repetition_penalty=1.2,
max_new_tokens=300
)
def launch_model(text):
sample_data = [text]
inputToken = tokenizer(generate_prompt(sample_data) , return_tensors="pt")
outputs = model.generate(**inputToken, **config)
output = tokenizer.decode(outputs[0],skip_special_tokens=True)
output = re.sub(r"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n\n\[.*\]\n\n### Response:\n\n", "", output)
return output
iface = gr.Interface(fn=launch_model, inputs="text", outputs="text")
iface.launch() |