import gradio as gr from threading import Thread import random import llama_cpp import os import spaces randtxt = "" print("downloading!") llama = llama_cpp.Llama("Meta-Llama-3-8B-Instruct.Q4_0.gguf", chat_format="llama-3") def randomize(): global randtxt while True: print("generating") genTurn() #llama def reversechat(chat): nchat = [] nchat.append(chat[0]) for msg in chat: nmsg = msg if nmsg["role"] == "user": nmsg["role"] = "assistant" else: nmsg["role"] = "user" if msg["role"] != "system": nchat.append(nmsg) return nchat[1:] chara = "a" def genTurn(): global chara global chat try: if chara == "a": msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"] chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")}) else: #Arteex msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"] chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")}) print(msg) except: print("this chat is over now :(") chara ="a" chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good afternoon!"}] import time with gr.Blocks() as demo: chatbot = gr.Chatbot() clear = gr.ClearButton([chatbot]) btn = gr.Button() stopbtn = gr.Button("Stop") iprompt="" stop = 0 def stp(): global stop stop=1 stopbtn.click(None, js="window.location.reload()") @spaces.GPU def watch(prompt): global chara global chat c1= "berry" c2= "llama" msg1="Good Morning!" chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}] for i in prompt: if i[0] != None: chat.append({"role": "user", "content": i[0]}) if i[1] != None: chat.append({"role": "assistant", "content": i[1]}) #Generate message try: if chara == "a": msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"] chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")}) else: #Arteex msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"] chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")}) print(msg) except: print("this chat is over now :(") chara ="a" chat = [{"role": "system", "content": prompt}, {"role": "user", "content": c2 + ": " + msg1}] msgsview = [] for msg in chat: if msg["role"] == "system": pass else: if not msg["content"].lower().startswith("llama:"): msgsview.append((msg["content"], None)) else: msgsview.append((None, msg["content"])) return msgsview btn.click(watch, [chatbot], [chatbot]) chatbot.change(watch, [chatbot], [chatbot]) if __name__ == "__main__": demo.launch()