LlamaLive / app.py
Seth Pittman
new file: .gitignore
560c065
raw
history blame
No virus
6.88 kB
import gradio as gr
from threading import Thread
import random
import llama_cpp
import os
import spaces
randtxt = ""
print("downloading!")
#os.system("wget https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q3_K_L.gguf")
llama = llama_cpp.Llama("Meta-Llama-3-8B-Instruct.Q4_0.gguf", chat_format="llama-3")
def randomize():
global randtxt
while True:
print("generating")
genTurn()
#chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences. "},
# {"role": "user", "content": "berry: Good morning"}] # POV: llama is "assistant"
#print(chat[len(chat)-1]["content"])
#llama
def reversechat(chat):
nchat = []
nchat.append(chat[0])
for msg in chat:
nmsg = msg
if nmsg["role"] == "user":
nmsg["role"] = "assistant"
else:
nmsg["role"] = "user"
if msg["role"] != "system":
nchat.append(nmsg)
return nchat[1:]
chara = "a"
def genTurn():
global chara
global chat
try:
if chara == "a":
msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
else:
#Arteex
msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
print(msg)
except:
print("this chat is over now :(")
chara ="a"
chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."},
{"role": "user", "content": "berry: Good afternoon!"}]
def watch(prompt):
global chara
global chat
prompt.append(("hi", "yo"))
return prompt
chat[0] = {"role": "system", "content": prompt}
chat[1] = {"role": "user", "content": c2 + ": " + msg1}
#Generate message
try:
if chara == "a":
msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
else:
#Arteex
msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
print(msg)
except:
print("this chat is over now :(")
chara ="a"
chat = [{"role": "system", "content": prompt},
{"role": "user", "content": c2 + ": " + msg1}]
msgsview = []
for msg in chat:
if msg["role"] == "system":
pass
else:
if not msg["content"].lower().startswith("llama:"):
msgsview.append((msg["content"], None))
else:
msgsview.append((None, msg["content"]))
yield msgsview
#demo = gr.Interface(watch,inputs=None, outputs=gr.Chatbot(), live=True, description="click generate to show latest chat!", title="LlamaLive, watch an llm conversation!")
#randomize()
import time
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
clear = gr.ClearButton([chatbot])
btn = gr.Button()
stopbtn = gr.Button("Stop")
iprompt=""
stop = 0
def stp():
global stop
stop=1
stopbtn.click(None, js="window.location.reload()")
@spaces.GPU
def watch(prompt):
global chara
global chat
c1= "berry"
c2= "llama"
msg1="Good Morning!"
chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}]
for i in prompt:
if i[0] != None:
chat.append({"role": "user", "content": i[0]})
if i[1] != None:
chat.append({"role": "assistant", "content": i[1]})
#Generate message
try:
if chara == "a":
msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
else:
#Arteex
msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
print(msg)
except:
print("this chat is over now :(")
chara ="a"
chat = [{"role": "system", "content": prompt},
{"role": "user", "content": c2 + ": " + msg1}]
msgsview = []
for msg in chat:
if msg["role"] == "system":
pass
else:
if not msg["content"].lower().startswith("llama:"):
msgsview.append((msg["content"], None))
else:
msgsview.append((None, msg["content"]))
return msgsview
btn.click(watch, [chatbot], [chatbot])
chatbot.change(watch, [chatbot], [chatbot])
if __name__ == "__main__":
demo.launch()
exit()
print(chat)
if __name__ == "__main__":
#Thread(target=randomize).start() bad idea running llm 24/7 for no reason
with gr.Blocks() as demo:
gr.Markdown("# LlamaLive\nwatch a live interaction between 2 chatbots!")
cb = gr.Chatbot()
cb.value=([(None, "testing")])
btn = gr.Button()
btn.click(watch, inputs=[cb], outputs=[cb])
demo.launch()