LlamaLive / appgguf.py
Sethblocks's picture
add delay for fast gpus
a21fcbf
import gradio as gr
from threading import Thread
import random
import llama_cpp
import os
from TTS.api import TTS
import librosa
#import spaces
randtxt = ""
print("downloading!")
llama = llama_cpp.Llama("Meta-Llama-3-8B-Instruct.Q4_0.gguf", chat_format="llama-3")
tts = TTS("tts_models/en/vctk/vits").to('cuda')
def randomize():
global randtxt
while True:
print("generating")
genTurn()
#llama
def reversechat(chat):
nchat = []
nchat.append(chat[0])
for msg in chat:
nmsg = msg
if nmsg["role"] == "user":
nmsg["role"] = "assistant"
else:
nmsg["role"] = "user"
if msg["role"] != "system":
nchat.append(nmsg)
return nchat[1:]
chara = "a"
def genTurn():
global chara
global chat
try:
if chara == "a":
msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
else:
#Arteex
msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
print(msg)
except:
print("this chat is over now :(")
chara ="a"
chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."},
{"role": "user", "content": "berry: Good afternoon!"}]
import time
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
clear = gr.ClearButton([chatbot])
btn = gr.Button()
stopbtn = gr.Button("Stop")
iprompt=""
stop = 0
def stp():
global stop
stop=1
stopbtn.click(None, js="window.location.reload()")
ttsout = gr.Audio(autoplay=True)
ttstimer = time.time()
autime = 0
#@spaces.GPU
def watch(prompt):
global chara
global chat
global autime
c1= "berry"
c2= "llama"
msg1="Good Morning!"
nmsg = ""
strt = time.time()
chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}]
for i in prompt:
if i[0] != None:
chat.append({"role": "user", "content": i[0]})
if i[1] != None:
chat.append({"role": "assistant", "content": i[1]})
#Generate message
canContinue = True
try:
if chara == "a":
msg = llama.create_chat_completion(chat, max_tokens=20)["choices"][0]["message"]["content"]
chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
nmsg = msg
else:
#Arteex
msg = llama.create_chat_completion(reversechat(chat), max_tokens=20)["choices"][0]["message"]["content"]
chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
nmsg = msg
print(msg)
except Exception as err:
print("this chat is over now :( | ", err)
chara ="a"
chat = [{"role": "system", "content": prompt},
{"role": "user", "content": c2 + ": " + msg1}]
canContinue = False
msgsview = []
for msg in chat:
if msg["role"] == "system":
pass
else:
if not msg["content"].lower().startswith("llama:"):
msgsview.append((msg["content"], None))
else:
msgsview.append((None, msg["content"]))
"".removeprefix
if canContinue == True:
tts.tts_to_file(nmsg.removeprefix("llama: ").removeprefix("berry: ") , speaker="p241")#f243 m241
while time.time() < strt - autime:
pass
try:
autime = librosa.get_duration("output.wav")
except:
autime = 0
return msgsview, "output.wav"
btn.click(watch, [chatbot], [chatbot,ttsout])
chatbot.change(watch, [chatbot], [chatbot, ttsout])
if __name__ == "__main__":
demo.launch()