from ctransformers import AutoModelForCausalLM import gradio as gr def generate_prompt(history): prompt = " " for chain in history[-2:-1]: prompt += f": {chain[0]}\n: {chain[1]}{end_token}\n" prompt += f": {history[-1][0]}\n:" return prompt def generate(history): prompt = generate_prompt(history) streamer = llm(prompt, stream=True, temperature=0, repetition_penalty=1.2) return streamer llm = AutoModelForCausalLM.from_pretrained("theodotus/llama-uk", model_file="model.bin", model_type='llama') end_token = "" with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") def user(user_message, history): return "", history + [[user_message, ""]] def bot(history): streamer = generate(history) for token in streamer: history[-1][1] += token yield history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() if __name__ == "__main__": demo.launch()