Spaces:

Sethblocks
/

LlamaLive

Runtime error

File size: 6,878 Bytes

import gradio as gr
from threading import Thread
import random
import llama_cpp
import os
import spaces
randtxt = ""
print("downloading!")
#os.system("wget https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q3_K_L.gguf")
llama = llama_cpp.Llama("Meta-Llama-3-8B-Instruct.Q4_0.gguf", chat_format="llama-3")

def randomize():
    global randtxt
    while True:
        print("generating")
        genTurn()


#chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences. "},
#         {"role": "user", "content": "berry: Good morning"}] # POV: llama is "assistant" 
#print(chat[len(chat)-1]["content"])


#llama

def reversechat(chat):
    nchat = []
    nchat.append(chat[0])
    for msg in chat:
        nmsg = msg
        if nmsg["role"] == "user":
            nmsg["role"] = "assistant"
        else:
            nmsg["role"] = "user"
        if msg["role"] != "system":
            nchat.append(nmsg)
    return nchat[1:]

chara = "a"
def genTurn():
    global chara
    global chat
    try:
        if chara == "a":
            msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
            chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
        else:
            #Arteex
            msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
            chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
        print(msg)
    except:
        print("this chat is over now :(")
        chara ="a"
        chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."},
         {"role": "user", "content": "berry: Good afternoon!"}]




def watch(prompt):
            global chara
            global chat
            prompt.append(("hi", "yo"))
            return prompt


            
            chat[0] = {"role": "system", "content": prompt}
            chat[1] = {"role": "user", "content": c2 + ": " + msg1}

            #Generate message
            try:
                if chara == "a":
                    msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
                else:
                    #Arteex
                    msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
                print(msg)
            except:
                print("this chat is over now :(")
                chara ="a"
                chat = [{"role": "system", "content": prompt},
                {"role": "user", "content": c2 + ": " + msg1}]



            msgsview = []
            for msg in chat:
                if msg["role"] == "system":
                    pass
                else:
                    if not msg["content"].lower().startswith("llama:"):
                        msgsview.append((msg["content"], None))
                    else:
                        msgsview.append((None, msg["content"]))
            yield msgsview

#demo = gr.Interface(watch,inputs=None, outputs=gr.Chatbot(), live=True, description="click generate to show latest chat!", title="LlamaLive, watch an llm conversation!")

#randomize()

import time
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    clear = gr.ClearButton([chatbot])
    btn = gr.Button()
    stopbtn = gr.Button("Stop")
    iprompt=""
    stop = 0
    def stp():
        global stop
        stop=1
    stopbtn.click(None, js="window.location.reload()")

    @spaces.GPU
    def watch(prompt):
            global chara
            global chat
            c1= "berry"
            c2= "llama"
            msg1="Good Morning!"
            
            chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}]
            for i in prompt:
                if i[0] != None:
                    chat.append({"role": "user", "content": i[0]})
                if i[1] != None:
                    chat.append({"role": "assistant", "content": i[1]})

            #Generate message
            
            try:
                if chara == "a":
                    msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
                else:
                    #Arteex
                    msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
                print(msg)
            except:
                print("this chat is over now :(")
                chara ="a"
                chat = [{"role": "system", "content": prompt},
                {"role": "user", "content": c2 + ": " + msg1}]



            msgsview = []
            for msg in chat:
                if msg["role"] == "system":
                    pass
                else:
                    if not msg["content"].lower().startswith("llama:"):
                        msgsview.append((msg["content"], None))
                    else:
                        msgsview.append((None, msg["content"]))
            return msgsview
    btn.click(watch, [chatbot], [chatbot])
    chatbot.change(watch, [chatbot], [chatbot])

if __name__ == "__main__":
    demo.launch()

exit()







print(chat)

if __name__ == "__main__":
    
    #Thread(target=randomize).start() bad idea running llm 24/7 for no reason
    with gr.Blocks() as demo:
        gr.Markdown("# LlamaLive\nwatch a live interaction between 2 chatbots!")
        cb = gr.Chatbot()
        cb.value=([(None, "testing")])
        btn = gr.Button()
        btn.click(watch, inputs=[cb], outputs=[cb])
    demo.launch()