simonraj's picture
Update app.py
791889e verified
raw
history blame
2.51 kB
import spaces
import tempfile
import gradio as gr
from streaming_stt_nemo import Model
from huggingface_hub import InferenceClient
import edge_tts
# Initialize default language and STT model
default_lang = "en"
engines = {default_lang: Model(default_lang)}
# Function to transcribe audio to text
def transcribe(audio):
lang = "en"
model = engines[lang]
text = model.stt_file(audio)[0]
return text
# Initialize Huggingface InferenceClient
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
# System instructions for the CrucialCoach
system_instructions = "[SYSTEM] You are CrucialCoach, an AI-powered conversational coach. Guide the user through challenging workplace situations using the principles from 'Crucial Conversations'. Ask one question at a time and provide step-by-step guidance.\n\n[USER]"
# Decorator for using GPU with a duration of 120 seconds
@spaces.GPU(duration=120)
def model(text):
generate_kwargs = dict(
temperature=0.7,
max_new_tokens=512,
top_p=0.95,
repetition_penalty=1,
do_sample=True,
seed=42,
)
formatted_prompt = system_instructions + text + "[CrucialCoach]"
stream = client.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
)
output = ""
for response in stream:
if not response.token.text == "</s>":
output += response.token.text
return output
# Asynchronous function to handle audio input and provide response
async def respond(audio):
user = transcribe(audio)
reply = model(user)
communicate = edge_tts.Communicate(reply)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path
# Gradio theme
theme = gr.themes.Base()
# Gradio interface for voice chat
with gr.Blocks(theme=theme, css="footer {visibility: hidden} textbox {resize: none}", title="CrucialCoach DEMO") as demo:
with gr.Tab("🗣️ Crucial Coach Chat"):
input_audio = gr.Audio(sources=["microphone"], type="filepath", label="Voice Chat")
output_audio = gr.Audio(type="filepath", label="CrucialCoach", interactive=False, autoplay=True, elem_classes="audio")
gr.Interface(
fn=respond,
inputs=input_audio,
outputs=output_audio,
live=True
)
# Queue setup and launch
demo.queue(max_size=200)
demo.launch()