Spaces:
Running
Running
File size: 2,020 Bytes
bce5fbb 59ce05d bce5fbb dd3757f bce5fbb 8b62354 59ce05d bce5fbb 8b62354 5b7e8e1 c7d280c bce5fbb 556d27c bce5fbb 556d27c bce5fbb 5b7e8e1 bce5fbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import torch
import re
import os
model_name = "google/gemma-2b"
peft_model = "kazuma313/gemma-dokter-ft"
device_map = "auto"
hf_token = os.getenv('hftoken', add_to_git_credential=True)
login(token=hf_token)
from accelerate import disk_offload
save_dir="gemma-dokter-ft"
disk_offload(model=model, offload_dir=save_dir)
# config = PeftConfig.from_pretrained(peft_model)
base_model = AutoModelForCausalLM.from_pretrained(
model_name,
token=hf_token,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.float16,
device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, peft_model)
model = model.merge_and_unload()
# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name,
token=hf_token,
trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
def echo(message, history, tokens):
pattern = r'Step \d+/\d+|^\d+\.\s*'
input_ids = tokenizer(message, return_tensors="pt")
outputs = model.generate(**input_ids, max_length=tokens)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True).split('Answer:')[-1]
clean_answer = re.sub(pattern, '', answer)
return clean_answer
demo = gr.ChatInterface(echo,
examples = [["what is the negative effect of alcohol?"],
["i have lack of sleep, what happend if continously do this?"]],
title="dokter Bot",
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
additional_inputs=[
gr.Slider(64, 256, value=80)
],
)
demo.launch() |