import gradio as gr import random import time from ctransformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained( "manan05/mistral-7b-friends-v0.1.gguf", model_file="mistralfriends-7b-v0.1.gguf", model_type="mistral", gpu_layers=0, hf=True ) from transformers import AutoTokenizer, pipeline # Tokenizer tokenizer = AutoTokenizer.from_pretrained("manan05/mistral-7b-friends") # Pipeline generator = pipeline( model=model, tokenizer=tokenizer, task='text-generation', max_new_tokens=50, repetition_penalty=1.1, return_full_text=False ) with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) def respond(message, chat_history): temp_chat_history = [] for c in chat_history: temp_chat_history.append('\n'.join(c)) print('\n'.join(temp_chat_history)) user_message = "Given the following conversation context, generate the upcomming dialogue of Joey in his style. \n CONTEXT: Me: " + '\n'.join(temp_chat_history) + '\n' + message temp_bot_message = generator(user_message)[0]["generated_text"].split('\n') for i in temp_bot_message: if("Joey:" in i): bot_message = i break; chat_history.append(("Me: " + message, bot_message)) time.sleep(2) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) demo.launch()