Spaces:
Sleeping
Sleeping
File size: 3,436 Bytes
271f307 0064343 f8bb954 0064343 0477c9e 271f307 27fc254 8d1c101 933b9d8 adc6fa8 8037104 f64fa83 271f307 46e2b35 025291b 467106a 5baa862 271f307 94bfdef 3f7f96a 94bfdef 3f7f96a 271f307 5baa862 271f307 c3aa9e5 271f307 fedb67a 2197af3 8052ea6 0724925 467106a 837d0bc a5a9cdb 61bec1b 6ac2e72 61bec1b 697cfcd ad8222c 8f8286d d5a8fce 0064343 61bec1b 837d0bc 8f3e7b5 837d0bc 894c749 8f3e7b5 c5c3727 6ae48e4 933b9d8 c5c3727 d5a8fce c65b118 6ae48e4 6e51090 1ddfd8c 837d0bc 6e51090 6097a4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
from huggingface_hub import InferenceClient
import gradio as gr
import random
import prompts
from pypipertts import PyPiper
import codecs
pp=PyPiper()
#client = InferenceClient("Qwen/QwQ-32B-Preview")
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
def format_prompt(message, history):
prompt=""
if history:
print(history)
prompt = "<s>"
for user_prompt, bot_response in history:
#print (bot_response)
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt,history,role="ASSISTANT"):
if not history:
history=[]
seed = random.randint(1,9999999999999)
print(seed)
if role == "ASSISTANT":
system_prompt = prompts.ASSISTANT
elif role == "RESEARCHER":
system_prompt = prompts.RESEARCHER
else: system_prompt = ""
generate_kwargs = dict(
temperature=0.9,
max_new_tokens=512,
top_p=0.95,
repetition_penalty=1.0,
do_sample=True,
seed=seed,
)
formatted_prompt = format_prompt(f"{system_prompt}\nUSER:{prompt}", history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
buf = ""
for response in stream:
output += response.token.text
'''buf += response.token.text
if "\n" in buf or ". " in buf or "</s>" in buf:
yield [(prompt,output)]'''
yield [(prompt,output)]
def load_mod(model):
yield f"Loading: {model}"
pp.load_mod(model)
yield f"Voice Loaded: {model}"
def tts(inp,names,length,noise,width,sen_pause):
#print(inp)
print(inp[-1][1])
txt = inp[-1][1].strip("</s>")
#decoded_text = codecs.decode(txt, "unicode_escape")
yield from pp.stream_tts([txt],names,length,noise,width,sen_pause)
with gr.Blocks() as iface:
gr.HTML("""<center><div style='font-size:xx-large;font-weight:900;'>Mixtral 8x7b Chatbot + Piper TTS</div>""")
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, layout="panel")
prompt = gr.Textbox(label="Prompt")
with gr.Row():
submit_b = gr.Button()
stop_b = gr.Button("Stop")
clear = gr.ClearButton([chatbot,prompt])
aud=gr.Audio(streaming=True, autoplay=True)
with gr.Accordion("Voice Controls",open=False):
msg = gr.HTML("""""")
names=gr.Dropdown(label="Voice", choices=pp.key_list,value="en_US-ryan-high")
length=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1)
noise=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5)
width=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5)
sen_pause=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1)
upd_btn=gr.Button("Update Voice")
with gr.Row(visible=False):
stt=gr.Textbox()
iface.load(load_mod,names,msg)
upd = upd_btn.click(tts,[chatbot,names,length,noise,width,sen_pause],aud)
sub_b = submit_b.click(generate, [prompt,chatbot],chatbot).then(tts,[chatbot,names,length,noise,width,sen_pause],aud)
names_change=names.change(load_mod,names,msg)
stop_b.click(None,None,None, cancels=[sub_b,names_change,upd])
iface.queue(default_concurrency_limit=20).launch(max_threads=40)
|