# coding=utf-8 # author: xusong # time: 2022/9/05 14:12 """ TODO: 还要能判断是否需要回复。 """ import torch import gradio as gr from info import article from kplug import modeling_kplug_s2s_patch from transformers import BertTokenizer, BartForConditionalGeneration model = BartForConditionalGeneration.from_pretrained("eson/kplug-base-jddc") tokenizer = BertTokenizer.from_pretrained("eson/kplug-base-jddc") def predict(input, history=[]): """ 拼接方案:直接拼接history作为输入,不区分角色。虽然简单粗糙,但是encoder-decoder架构不会混淆输入和输出(如果是gpt架构就需要区分角色了)。 """ # append the new user input tokens to the chat history history = history + [input] # history如果包含错误的response,可能会造成误差传递 # tokenize the new input sentence bot_input_ids = tokenizer.encode("".join(history)[-500:], return_tensors='pt') # bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1) # generate a response response = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist() # convert the tokens to text, and then split the responses into lines response = "".join(tokenizer.decode(response[0], skip_special_tokens=True).split()) history = history + [response] response = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)] # convert to tuples of list return response, history jddc_examples = [ # 价保 "昨天刚买的怎么就降了几十块,应该补给我差价吧", "请问这个猕猴桃是有货的吗?", # 到货时间 "我下的这个单怎么还没到", # 快递 "发什么快递", "能发邮政吗", ] jddc_iface = gr.Interface( fn=predict, # inputs=["text", "state"], inputs=[ gr.Textbox( label="输入文本", value="发什么快递"), # gr.State() 报错 "state" ], outputs=["chatbot", "state"], examples=jddc_examples, title="电商客服-生成式对话(Response Generation)", article=article, ) if __name__ == "__main__": jddc_iface.launch()