import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch TOKENIZER_REPO = "MediaTek-Research/Breeze-7B-Instruct-v1_0" tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_REPO,local_files_only=False,use_fast=True) model = AutoModelForCausalLM.from_pretrained( TOKENIZER_REPO, device_map="auto", local_files_only=False, torch_dtype=torch.bfloat16 ) def generate(text): chat_data = [] text = text.strip() print("text===="+text) if text: chat_data.append({"role": "system", "content": text}) print(chat_data) achat=tokenizer.apply_chat_template(chat_data,return_tensors="pt") print(achat) outputs = model.generate(achat, max_new_tokens=128, top_p=0.01, top_k=85, repetition_penalty=1.1, temperature=0.01) theResult=tokenizer.decode(outputs[0]) print(theResult) splitOutput=theResult.splitlines() theReturn="" for i in range(0,len(splitOutput)): print("i={},out={}".format(i, splitOutput[i])) if(i>0 and splitOutput[i].strip()): theReturn+=splitOutput[i].strip() print("result={}".format(theReturn)) return tokenizer.decode(outputs[0]) gradio_app = gr.Interface( generate, inputs=gr.Text(), outputs=gr.Text(), title="test", ) if __name__ == "__main__": gradio_app.launch()