Spaces:

Sethblocks
/

LlamaLive

Runtime error

App Files Files Community

LlamaLive / app.py

Seth Pittman

new file: .gitignore

560c065 4 months ago

raw

history blame

No virus

6.88 kB

	import gradio as gr
	from threading import Thread
	import random
	import llama_cpp
	import os
	import spaces
	randtxt = ""
	print("downloading!")
	#os.system("wget https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q3_K_L.gguf")
	llama = llama_cpp.Llama("Meta-Llama-3-8B-Instruct.Q4_0.gguf", chat_format="llama-3")

	def randomize():
	global randtxt
	while True:
	print("generating")
	genTurn()


	#chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences. "},
	# {"role": "user", "content": "berry: Good morning"}] # POV: llama is "assistant"
	#print(chat[len(chat)-1]["content"])


	#llama

	def reversechat(chat):
	nchat = []
	nchat.append(chat[0])
	for msg in chat:
	nmsg = msg
	if nmsg["role"] == "user":
	nmsg["role"] = "assistant"
	else:
	nmsg["role"] = "user"
	if msg["role"] != "system":
	nchat.append(nmsg)
	return nchat[1:]

	chara = "a"
	def genTurn():
	global chara
	global chat
	try:
	if chara == "a":
	msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
	chat.append({"role": "assistant", "content": msg.removesuffix("<\|eot_id\|>")})
	else:
	#Arteex
	msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
	chat.append({"role": "user", "content": msg.removesuffix("<\|eot_id\|>")})
	print(msg)
	except:
	print("this chat is over now :(")
	chara ="a"
	chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."},
	{"role": "user", "content": "berry: Good afternoon!"}]




	def watch(prompt):
	global chara
	global chat
	prompt.append(("hi", "yo"))
	return prompt



	chat[0] = {"role": "system", "content": prompt}
	chat[1] = {"role": "user", "content": c2 + ": " + msg1}

	#Generate message
	try:
	if chara == "a":
	msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
	chat.append({"role": "assistant", "content": msg.removesuffix("<\|eot_id\|>")})
	else:
	#Arteex
	msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
	chat.append({"role": "user", "content": msg.removesuffix("<\|eot_id\|>")})
	print(msg)
	except:
	print("this chat is over now :(")
	chara ="a"
	chat = [{"role": "system", "content": prompt},
	{"role": "user", "content": c2 + ": " + msg1}]



	msgsview = []
	for msg in chat:
	if msg["role"] == "system":
	pass
	else:
	if not msg["content"].lower().startswith("llama:"):
	msgsview.append((msg["content"], None))
	else:
	msgsview.append((None, msg["content"]))
	yield msgsview

	#demo = gr.Interface(watch,inputs=None, outputs=gr.Chatbot(), live=True, description="click generate to show latest chat!", title="LlamaLive, watch an llm conversation!")

	#randomize()

	import time
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	clear = gr.ClearButton([chatbot])
	btn = gr.Button()
	stopbtn = gr.Button("Stop")
	iprompt=""
	stop = 0
	def stp():
	global stop
	stop=1
	stopbtn.click(None, js="window.location.reload()")

	@spaces.GPU
	def watch(prompt):
	global chara
	global chat
	c1= "berry"
	c2= "llama"
	msg1="Good Morning!"

	chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}]
	for i in prompt:
	if i[0] != None:
	chat.append({"role": "user", "content": i[0]})
	if i[1] != None:
	chat.append({"role": "assistant", "content": i[1]})

	#Generate message

	try:
	if chara == "a":
	msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
	chat.append({"role": "assistant", "content": msg.removesuffix("<\|eot_id\|>")})
	else:
	#Arteex
	msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
	chat.append({"role": "user", "content": msg.removesuffix("<\|eot_id\|>")})
	print(msg)
	except:
	print("this chat is over now :(")
	chara ="a"
	chat = [{"role": "system", "content": prompt},
	{"role": "user", "content": c2 + ": " + msg1}]



	msgsview = []
	for msg in chat:
	if msg["role"] == "system":
	pass
	else:
	if not msg["content"].lower().startswith("llama:"):
	msgsview.append((msg["content"], None))
	else:
	msgsview.append((None, msg["content"]))
	return msgsview
	btn.click(watch, [chatbot], [chatbot])
	chatbot.change(watch, [chatbot], [chatbot])

	if __name__ == "__main__":
	demo.launch()

	exit()







	print(chat)

	if __name__ == "__main__":

	#Thread(target=randomize).start() bad idea running llm 24/7 for no reason
	with gr.Blocks() as demo:
	gr.Markdown("# LlamaLive\nwatch a live interaction between 2 chatbots!")
	cb = gr.Chatbot()
	cb.value=([(None, "testing")])
	btn = gr.Button()
	btn.click(watch, inputs=[cb], outputs=[cb])
	demo.launch()