Spaces:

Sethblocks
/

LlamaLive

Runtime error

File size: 5,638 Bytes

23b91c1
 
 
e1ca0ac
 
85e4c9e
9fda673
bd0a208
23b91c1
85e4c9e
eea8986
bd0a208
 
e1ca0ac
b868f9d
 
f747aa0
e1ca0ac
 
 
 
 
 
 
 
9fda673
e1ca0ac
 
 
 
 
 
 
 
 
 
 
 
 
f019c26
e1ca0ac
 
 
 
 
f019c26
23b91c1
 
 
85e4c9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b868f9d
85e4c9e
 
 
e1ca0ac
85e4c9e
 
 
 
 
aef15aa
 
85e4c9e
 
 
23b91c1
a21fcbf
560c065
 
 
 
 
 
 
 
 
 
 
 
3a1fb82
a21fcbf
b868f9d
560c065
 
 
a21fcbf
560c065
 
 
249e68c
a21fcbf
560c065
 
 
 
 
 
 
 
 
a21fcbf
 
560c065
 
b868f9d
249e68c
b868f9d
560c065
 
 
b868f9d
249e68c
560c065
 
69ae2b3
 
560c065
 
 
a21fcbf
560c065
 
 
 
 
 
 
 
 
 
 
 
a21fcbf
 
84267b2
a21fcbf
 
 
aac7f28
a21fcbf
 
bd0a208
50d16f9
3a1fb82
560c065

import gradio as gr
from threading import Thread
import random
import transformers
import torch
import os
import spaces
from TTS.api import TTS
randtxt = ""
print("downloading!")


tts = TTS("tts_models/en/vctk/vits").to('cuda')
modID = "meta-llama/Meta-Llama-3-8B-Instruct"
home=True
modID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = transformers.AutoModelForCausalLM.from_pretrained(modID, device_map="cuda")
tok = transformers.AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
llama = transformers.pipeline(
    "text-generation",
    model=model,
    device_map="auto",
    tokenizer=tok
)

@spaces.GPU(duration=90)
def llm(msgs):
    prompt = llama.tokenizer.apply_chat_template(
            msgs, 
            tokenize=False, 
            add_generation_prompt=True
    )

    terminators = [
        llama.tokenizer.eos_token_id,
        llama.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    outputs = llama(
        prompt,
        max_new_tokens=100,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    return outputs[0]["generated_text"][len(prompt):]#for the 801100128909120989534879th  time remember to transfer changes between test and app.py -_-

def randomize():
    global randtxt
    while True:
        print("generating")
        genTurn()


#llama

def reversechat(chat):
    nchat = []
    nchat.append(chat[0])
    for msg in chat:
        nmsg = msg
        if nmsg["role"] == "user":
            nmsg["role"] = "assistant"
        else:
            nmsg["role"] = "user"
        if msg["role"] != "system":
            nchat.append(nmsg)
    return nchat[1:]

chara = "a"
def genTurn():
    global chara
    global chat
    try:
        if chara == "a":
            msg = llm(chat)
            chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
        else:
            #Arteex
            msg = llm(reversechat(chat))
            chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
        print(msg)
    except:
        print("this chat is over now :(")
        chara ="a"
        chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."},
         {"role": "user", "content": "berry: Good afternoon!"}]




import librosa
import time
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    clear = gr.ClearButton([chatbot])
    btn = gr.Button()
    stopbtn = gr.Button("Stop")
    iprompt=""
    stop = 0
    def stp():
        global stop
        stop=1
    stopbtn.click(None, js="window.location.reload()")
    ttsout = gr.Audio(autoplay=True)
    autime = 0
    #@spaces.GPU
    def watch(prompt):
            global chara
            global chat
            global autime
            c1= "berry"
            c2= "llama"
            msg1="Good Morning!"
            nmsg = ""
            strt = time.time()
            
            chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}]
            for i in prompt:
                if i[0] != None:
                    chat.append({"role": "user", "content": i[0]})
                if i[1] != None:
                    chat.append({"role": "assistant", "content": i[1]})

            #Generate message
            canContinue = True

            try:
                if chara == "a":
                    msg = llm(chat)
                    nmsg = msg
                    
                    chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
                else:
                    #Arteex
                    msg = llm(reversechat(chat))
                    nmsg = msg
                    chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
                print(msg)
            except Exception as err:
                print("this chat is over now :(   |    ", err)
                chara ="a"
                chat = [{"role": "system", "content": prompt},
                {"role": "user", "content": c2 + ": " + msg1}]
                canContinue = False



            msgsview = []
            for msg in chat:
                if msg["role"] == "system":
                    pass
                else:
                    if not msg["content"].lower().startswith("llama:"):
                        msgsview.append((msg["content"], None))
                    else:
                        msgsview.append((None, msg["content"]))
            if canContinue == True:
                tts.tts_to_file(nmsg.removeprefix("llama: ").removeprefix("berry: ") , speaker="p243")#f243 m241
                while time.time() < strt + autime:
                    pass
                try:
                    autime = librosa.get_duration("output.wav")
                    print(autime)
                except:
                    autime = 0
            return msgsview, "output.wav"
    btn.click(watch, [chatbot], [chatbot, ttsout])
    chatbot.change(watch, [chatbot], [chatbot, ttsout])

if __name__ == "__main__":
    demo.launch()