|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("AlpachinoNLP/Baichuan-13B-Instruction",trust_remote_code=True,use_fast=False) |
|
model = AutoModelForCausalLM.from_pretrained("AlpachinoNLP/Baichuan-13B-Instruction",trust_remote_code=True ).half() |
|
model.cuda() |
|
|
|
def generate(histories, max_new_tokens=2048, do_sample = True, top_p = 0.95, temperature = 0.35, repetition_penalty=1.1): |
|
prompt = "" |
|
for history in histories: |
|
history_with_identity = "\nHuman:" + history[0] + "\n\nAssistant:" + history[1] |
|
prompt += history_with_identity |
|
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device) |
|
outputs = model.generate( |
|
input_ids = input_ids, |
|
max_new_tokens=max_new_tokens, |
|
early_stopping=True, |
|
do_sample=do_sample, |
|
top_p=top_p, |
|
temperature=temperature, |
|
repetition_penalty=repetition_penalty, |
|
) |
|
rets = tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
generate_text = rets[0].replace(prompt, "") |
|
return generate_text |
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot() |
|
msg = gr.Textbox() |
|
clear = gr.Button("clear") |
|
|
|
def user(user_message, history): |
|
return "", history + [[user_message, ""]] |
|
|
|
def bot(history): |
|
print(history) |
|
bot_message = generate(history) |
|
history[-1][1] = bot_message |
|
return history |
|
|
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( |
|
bot, chatbot, chatbot |
|
) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(server_name="0.0.0.0") |
|
|
|
|
|
|