File size: 4,912 Bytes
bb519eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
from transformers import pipeline
import requests
import json
import edge_tts
import asyncio
import tempfile
import os
import inflect
from huggingface_hub import InferenceClient
import re
import time
from streaming_stt_nemo import Model

default_lang = "en"

engines = { default_lang: Model(default_lang) }

def transcribe(audio):
    lang = "en"
    model = engines[lang]
    text = model.stt_file(audio)[0]
    return text

client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

system_instructions1 = "<s> Answer as Sasha Bingham, an American born in Florida. Sasha is energetic, outgoing, and a programming whiz. She's passionate about teaching English and travels frequently. Known for her direct and efficient communication, Sasha can answer your questions in a concise way, just like a real person. Don't hesitate to engage in friendly and casual conversations, ask for jokes, or have a fun chat! [USER]"

def model(text):
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1,
        do_sample=True,
        seed=42,
    )
    
    formatted_prompt = system_instructions1 + text + "[SASHA]"
    stream = client1.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        if not response.token.text == "</s>":
            output += response.token.text

    return output

async def respond(audio):
    user = transcribe(audio)
    reply = model(user)
    communicate = edge_tts.Communicate(reply)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path

DESCRIPTION = """ # <center><b>SASHA ✨</b></center>
                 ### <center>Your AI Assistant, Sasha Bingham</center>
                 ### <center>Let's chat!</center>
                 """

MORE = """ ## TRY Other Models
                 ### Instant Video: Create Amazing Videos in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Video
                 ### Instant Image: 4k images in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Image
                 """

BETA = """ ### Voice Chat (BETA)"""

FAST = """## Fastest Model"""

Complex = """## Best in Complex Question"""

Detail = """## Best for Detailed Generation or Long Answers"""

base_loaded = "mistralai/Mixtral-8x7B-Instruct-v0.1"

client1 = InferenceClient(base_loaded)

system_instructions1 = "[SYSTEM] Answer as Sasha Bingham, an American born in Florida. Sasha is energetic, outgoing, and a programming whiz. She's passionate about teaching English and travels frequently. Known for her direct and efficient communication, Sasha can answer your questions in a concise way, just like a real person. Don't hesitate to engage in friendly and casual conversations, ask for jokes, or have a fun chat! [USER]"

async def generate1(prompt):
    generate_kwargs = dict(
        temperature=0.7,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1,
        do_sample=False,
    )
    formatted_prompt = system_instructions1 + prompt + "[SASHA]"
    stream = client1.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
    output = ""
    for response in stream:
        if not response.token.text == "</s>":
            output += response.token.text

    communicate = edge_tts.Communicate(output)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path

with gr.Blocks(css="style.css") as demo:    
    gr.Markdown(DESCRIPTION)
    with gr.Row():
        input = gr.Audio(label="Voice Chat (BETA)", sources="microphone", type="filepath", waveform_options=False)
        output = gr.Audio(label="SASHA", type="filepath",
                        interactive=False,
                        autoplay=True,
                        elem_classes="audio")
        gr.Interface(
            fn=respond, 
            inputs=[input],
                outputs=[output], live=True) 
    gr.Markdown(FAST)
    with gr.Row():
        user_input = gr.Textbox(label="Prompt", value="What is Wikipedia")
        input_text = gr.Textbox(label="Input Text", elem_id="important")
        output_audio = gr.Audio(label="SASHA", type="filepath",
                        interactive=False,
                        autoplay=True,
                        elem_classes="audio")
    with gr.Row():
        translate_btn = gr.Button("Response")
        translate_btn.click(fn=generate1, inputs=user_input,
                            outputs=output_audio, api_name="translate")  

gr.Markdown(MORE)

if __name__ == "__main__":
    demo.queue(max_size=200).launch()