import gradio as gr from threading import Thread import random import transformers import torch import os import spaces from TTS.api import TTS randtxt = "" print("downloading!") tts = TTS("tts_models/en/vctk/vits").to('cuda') modID = "meta-llama/Meta-Llama-3-8B-Instruct" home=True modID = "meta-llama/Meta-Llama-3-8B-Instruct" model = transformers.AutoModelForCausalLM.from_pretrained(modID, device_map="cuda") tok = transformers.AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") llama = transformers.pipeline( "text-generation", model=model, device_map="auto", tokenizer=tok ) @spaces.GPU(duration=90) def llm(msgs): prompt = llama.tokenizer.apply_chat_template( msgs, tokenize=False, add_generation_prompt=True ) terminators = [ llama.tokenizer.eos_token_id, llama.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = llama( prompt, max_new_tokens=100, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) return outputs[0]["generated_text"][len(prompt):]#for the 801100128909120989534879th time remember to transfer changes between test and app.py -_- def randomize(): global randtxt while True: print("generating") genTurn() #llama def reversechat(chat): nchat = [] nchat.append(chat[0]) for msg in chat: nmsg = msg if nmsg["role"] == "user": nmsg["role"] = "assistant" else: nmsg["role"] = "user" if msg["role"] != "system": nchat.append(nmsg) return nchat[1:] chara = "a" def genTurn(): global chara global chat try: if chara == "a": msg = llm(chat) chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")}) else: #Arteex msg = llm(reversechat(chat)) chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")}) print(msg) except: print("this chat is over now :(") chara ="a" chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good afternoon!"}] import time with gr.Blocks() as demo: chatbot = gr.Chatbot() clear = gr.ClearButton([chatbot]) btn = gr.Button() stopbtn = gr.Button("Stop") iprompt="" stop = 0 def stp(): global stop stop=1 stopbtn.click(None, js="window.location.reload()") #@spaces.GPU def watch(prompt): global chara global chat c1= "berry" c2= "llama" msg1="Good Morning!" chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}] for i in prompt: if i[0] != None: chat.append({"role": "user", "content": i[0]}) if i[1] != None: chat.append({"role": "assistant", "content": i[1]}) #Generate message try: if chara == "a": msg = llm(chat) chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")}) else: #Arteex msg = llm(reversechat(chat)) chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")}) print(msg) except Exception as err: print("this chat is over now :( | ", err) chara ="a" chat = [{"role": "system", "content": prompt}, {"role": "user", "content": c2 + ": " + msg1}] msgsview = [] for msg in chat: if msg["role"] == "system": pass else: if not msg["content"].lower().startswith("llama:"): msgsview.append((msg["content"], None)) else: msgsview.append((None, msg["content"])) tts.tts_to_file(nmsg.removeprefix("llama: ").removeprefix("berry: ") , speaker="p243")#f243 m241 return msgsview, "output.wav" btn.click(watch, [chatbot], [chatbot]) chatbot.change(watch, [chatbot], [chatbot]) if __name__ == "__main__": demo.launch()