LlamaLive / app.py
Sethblocks's picture
fix gpu delay
84267b2
raw
history blame
No virus
5.64 kB
import gradio as gr
from threading import Thread
import random
import transformers
import torch
import os
import spaces
from TTS.api import TTS
randtxt = ""
print("downloading!")
tts = TTS("tts_models/en/vctk/vits").to('cuda')
modID = "meta-llama/Meta-Llama-3-8B-Instruct"
home=True
modID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = transformers.AutoModelForCausalLM.from_pretrained(modID, device_map="cuda")
tok = transformers.AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
llama = transformers.pipeline(
"text-generation",
model=model,
device_map="auto",
tokenizer=tok
)
@spaces.GPU(duration=90)
def llm(msgs):
prompt = llama.tokenizer.apply_chat_template(
msgs,
tokenize=False,
add_generation_prompt=True
)
terminators = [
llama.tokenizer.eos_token_id,
llama.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = llama(
prompt,
max_new_tokens=100,
eos_token_id=terminators,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
return outputs[0]["generated_text"][len(prompt):]#for the 801100128909120989534879th time remember to transfer changes between test and app.py -_-
def randomize():
global randtxt
while True:
print("generating")
genTurn()
#llama
def reversechat(chat):
nchat = []
nchat.append(chat[0])
for msg in chat:
nmsg = msg
if nmsg["role"] == "user":
nmsg["role"] = "assistant"
else:
nmsg["role"] = "user"
if msg["role"] != "system":
nchat.append(nmsg)
return nchat[1:]
chara = "a"
def genTurn():
global chara
global chat
try:
if chara == "a":
msg = llm(chat)
chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
else:
#Arteex
msg = llm(reversechat(chat))
chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
print(msg)
except:
print("this chat is over now :(")
chara ="a"
chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."},
{"role": "user", "content": "berry: Good afternoon!"}]
import librosa
import time
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
clear = gr.ClearButton([chatbot])
btn = gr.Button()
stopbtn = gr.Button("Stop")
iprompt=""
stop = 0
def stp():
global stop
stop=1
stopbtn.click(None, js="window.location.reload()")
ttsout = gr.Audio(autoplay=True)
autime = 0
#@spaces.GPU
def watch(prompt):
global chara
global chat
global autime
c1= "berry"
c2= "llama"
msg1="Good Morning!"
nmsg = ""
strt = time.time()
chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}]
for i in prompt:
if i[0] != None:
chat.append({"role": "user", "content": i[0]})
if i[1] != None:
chat.append({"role": "assistant", "content": i[1]})
#Generate message
canContinue = True
try:
if chara == "a":
msg = llm(chat)
nmsg = msg
chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
else:
#Arteex
msg = llm(reversechat(chat))
nmsg = msg
chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
print(msg)
except Exception as err:
print("this chat is over now :( | ", err)
chara ="a"
chat = [{"role": "system", "content": prompt},
{"role": "user", "content": c2 + ": " + msg1}]
canContinue = False
msgsview = []
for msg in chat:
if msg["role"] == "system":
pass
else:
if not msg["content"].lower().startswith("llama:"):
msgsview.append((msg["content"], None))
else:
msgsview.append((None, msg["content"]))
if canContinue == True:
tts.tts_to_file(nmsg.removeprefix("llama: ").removeprefix("berry: ") , speaker="p243")#f243 m241
while time.time() < strt + autime:
pass
try:
autime = librosa.get_duration("output.wav")
print(autime)
except:
autime = 0
return msgsview, "output.wav"
btn.click(watch, [chatbot], [chatbot, ttsout])
chatbot.change(watch, [chatbot], [chatbot, ttsout])
if __name__ == "__main__":
demo.launch()