File size: 3,436 Bytes
271f307
 
 
 
0064343
f8bb954
0064343
 
0477c9e
271f307
 
 
27fc254
8d1c101
933b9d8
adc6fa8
8037104
 
 
 
 
f64fa83
271f307
46e2b35
025291b
467106a
5baa862
271f307
94bfdef
3f7f96a
94bfdef
3f7f96a
 
271f307
 
5baa862
271f307
 
 
 
 
c3aa9e5
271f307
 
fedb67a
2197af3
8052ea6
0724925
 
 
467106a
837d0bc
a5a9cdb
 
 
 
61bec1b
6ac2e72
61bec1b
697cfcd
 
ad8222c
8f8286d
d5a8fce
0064343
61bec1b
837d0bc
8f3e7b5
837d0bc
 
 
894c749
8f3e7b5
c5c3727
6ae48e4
933b9d8
c5c3727
 
 
 
d5a8fce
c65b118
 
6ae48e4
6e51090
1ddfd8c
837d0bc
6e51090
6097a4e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from huggingface_hub import InferenceClient
import gradio as gr
import random
import prompts
from pypipertts import PyPiper
import codecs
pp=PyPiper()

#client = InferenceClient("Qwen/QwQ-32B-Preview")
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def format_prompt(message, history):
    prompt=""
    if history:
        print(history)
        prompt = "<s>"
        for user_prompt, bot_response in history:
            #print (bot_response)
            prompt += f"[INST] {user_prompt} [/INST]"
            prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(prompt,history,role="ASSISTANT"):
    if not history:
        history=[]
    seed = random.randint(1,9999999999999)
    print(seed)
    if role == "ASSISTANT":
        system_prompt = prompts.ASSISTANT
    elif role == "RESEARCHER":
        system_prompt = prompts.RESEARCHER
    else: system_prompt = ""
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1.0,
        do_sample=True,
        seed=seed,
    )
    formatted_prompt = format_prompt(f"{system_prompt}\nUSER:{prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    buf = ""
    for response in stream:
        output += response.token.text
        '''buf += response.token.text
        if "\n" in buf or ". " in buf or "</s>" in buf:
            yield [(prompt,output)]'''
        yield [(prompt,output)]

def load_mod(model):
    yield f"Loading: {model}"
    pp.load_mod(model)
    yield f"Voice Loaded: {model}"
    
def tts(inp,names,length,noise,width,sen_pause):
    #print(inp)
    print(inp[-1][1])
    txt = inp[-1][1].strip("</s>")
    #decoded_text = codecs.decode(txt, "unicode_escape")
    yield from pp.stream_tts([txt],names,length,noise,width,sen_pause)

with gr.Blocks() as iface:
    gr.HTML("""<center><div style='font-size:xx-large;font-weight:900;'>Mixtral 8x7b Chatbot + Piper TTS</div>""")
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, layout="panel")
    prompt = gr.Textbox(label="Prompt")
    with gr.Row():
        submit_b = gr.Button()
        stop_b = gr.Button("Stop")
        clear = gr.ClearButton([chatbot,prompt])
    aud=gr.Audio(streaming=True, autoplay=True)
    with gr.Accordion("Voice Controls",open=False):
        msg = gr.HTML("""""")
        names=gr.Dropdown(label="Voice", choices=pp.key_list,value="en_US-ryan-high")
        length=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1)
        noise=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5)
        width=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5)
        sen_pause=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1)
        upd_btn=gr.Button("Update Voice")
    with gr.Row(visible=False):
        stt=gr.Textbox()
    iface.load(load_mod,names,msg)
    upd = upd_btn.click(tts,[chatbot,names,length,noise,width,sen_pause],aud)
    sub_b = submit_b.click(generate, [prompt,chatbot],chatbot).then(tts,[chatbot,names,length,noise,width,sen_pause],aud)
    names_change=names.change(load_mod,names,msg)
    stop_b.click(None,None,None, cancels=[sub_b,names_change,upd])
iface.queue(default_concurrency_limit=20).launch(max_threads=40)