import json import gradio as gr from utils.logging_util import logger from models.cpp_qwen2 import bot # from models.hf_qwen2 import bot # # def postprocess(self, y): # if y is None: # return [] # for i, (message, response) in enumerate(y): # y[i] = ( # None if message is None else mdtex2html.convert((message)), # None if response is None else mdtex2html.convert(response), # ) # return y # # gr.Chatbot.postprocess = postprocess system_list = [ "You are a helpful assistant.", "你是一个导游。", "你是一个英语老师。", "你是一个程序员。", "你是一个心理咨询师。", ] bot.pre_cache_system(system_list) def generate_user_message(chatbot, history): if history and history[-1]["role"] == "user": gr.Warning('You should generate assistant-response.') yield chatbot, history else: chatbot.append(None) streamer = bot.generate(history, stream=True) for user_content, user_tokens in streamer: chatbot[-1] = (user_content, None) yield chatbot, history user_tokens = bot.strip_stoptokens(user_tokens) history.append({"role": "user", "content": user_content, "tokens": user_tokens}) yield chatbot, history def generate_assistant_message(chatbot, history): """ auto-mode:query is None manual-mode:query 是用户输入 """ logger.info(f"generating {json.dumps(history, ensure_ascii=False)}") user_content = history[-1]["content"] if history[-1]["role"] != "user": gr.Warning('You should generate or type user-input first.') yield chatbot, history else: streamer = bot.generate(history, stream=True) for assistant_content, assistant_tokens in streamer: chatbot[-1] = (user_content, assistant_content) yield chatbot, history assistant_tokens = bot.strip_stoptokens(assistant_tokens) history.append({"role": "assistant", "content": assistant_content, "tokens": assistant_tokens}) print(f"chatbot is {chatbot}") print(f"history is {history}") yield chatbot, history def generate(chatbot, history): logger.info(f"chatbot: {chatbot}; history: {history}") streamer = None if history[-1]["role"] in ["assistant", "system"]: streamer = generate_user_message(chatbot, history) elif history[-1]["role"] == "user": streamer = generate_assistant_message(chatbot, history) else: gr.Warning("bug") for out in streamer: yield out def append_user(input_content, chatbot, history): if history[-1]["role"] == "user": gr.Warning('You should generate assistant-response.') return chatbot, history chatbot.append((input_content, None)) history.append({"role": "user", "content": input_content}) return chatbot, history def append_assistant(input_content, chatbot, history): if history[-1]["role"] != "user": gr.Warning('You should generate or type user-input first.') return chatbot, history chatbot[-1] = (chatbot[-1][0], input_content) history.append({"role": "assistant", "content": input_content}) return chatbot, history def undo_generate(chatbot, history): if history[-1]["role"] == "user": history = history[:-1] chatbot = chatbot[:-1] elif history[-1]["role"] == "assistant": history = history[:-1] chatbot[-1] = (chatbot[-1][0], None) else: pass logger.info(f"after undo, {json.dumps(chatbot, ensure_ascii=False)}, {json.dumps(history, ensure_ascii=False)}") return chatbot, history def reset_user_input(): return gr.update(value='') def reset_state(system): return [], [{"role": "system", "content": system}] def set_max_new_tokens(max_new_tokens): bot.generation_kwargs["max_tokens"] = max_new_tokens def set_temperature(temperature): bot.generation_kwargs["temperature"] = temperature def set_top_p(top_p): bot.generation_kwargs["top_p"] = top_p def set_top_k(top_k): bot.generation_kwargs["top_k"] = top_k