Spaces:
Build error
Build error
import os | |
import gradio as gr | |
import requests | |
import json | |
import speech_recognition as sr | |
from tempfile import NamedTemporaryFile | |
import logging | |
import time | |
from huggingface_hub import HfApi | |
# Logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Environment Variables | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768") | |
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" | |
headers = { | |
"Authorization": f"Bearer {GROQ_API_KEY}", | |
"Content-Type": "application/json" | |
} | |
# Emotion descriptions | |
emotion_options = { | |
"neutral": "Neutral or balanced mood", | |
"positive": "Generally positive or optimistic", | |
"happy": "Feeling joy or happiness", | |
"excited": "Feeling enthusiastic or energetic", | |
"sad": "Feeling down or unhappy", | |
"angry": "Feeling frustrated or irritated", | |
"negative": "Generally negative or pessimistic", | |
"anxious": "Feeling worried or nervous" | |
} | |
# History | |
conversation_history = [] | |
# Transcribe audio | |
def transcribe_audio(audio_path): | |
recognizer = sr.Recognizer() | |
try: | |
with sr.AudioFile(audio_path) as source: | |
audio_data = recognizer.record(source) | |
transcription = recognizer.recognize_google(audio_data) | |
return transcription | |
except Exception as e: | |
logger.error(f"Audio transcription failed: {e}") | |
return "" | |
# Generate Groq response | |
def get_groq_response(prompt, history): | |
messages = [{"role": "system", "content": prompt}] | |
for msg in history: | |
if msg.startswith("User: "): | |
messages.append({"role": "user", "content": msg[6:]}) | |
elif msg.startswith("AI: "): | |
messages.append({"role": "assistant", "content": msg[4:]}) | |
payload = { | |
"model": GROQ_MODEL, | |
"messages": messages, | |
"temperature": 0.7, | |
"max_tokens": 1024 | |
} | |
try: | |
response = requests.post(GROQ_API_URL, headers=headers, json=payload) | |
return response.json()["choices"][0]["message"]["content"] | |
except Exception as e: | |
logger.error(f"Groq API error: {e}") | |
return "Error contacting AI." | |
# Generate TTS using Yarngpt | |
def generate_speech_and_upload(text): | |
try: | |
hf_model_id = "saheedniyi/Yarngpt" | |
inference_url = f"https://api-inference.huggingface.co/models/{hf_model_id}" | |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
payload = {"inputs": text} | |
response = requests.post(inference_url, headers=headers, json=payload) | |
if response.status_code != 200: | |
logger.error(f"Hugging Face TTS API error: {response.text}") | |
return None | |
temp_file = NamedTemporaryFile(delete=False, suffix=".wav") | |
with open(temp_file.name, "wb") as f: | |
f.write(response.content) | |
return temp_file.name | |
except Exception as e: | |
logger.error(f"Hugging Face TTS error: {e}") | |
return None | |
# Main handler | |
def chat_with_ai(audio, text_input, emotion, history): | |
global conversation_history | |
user_text = text_input or "" | |
if audio: | |
transcription = transcribe_audio(audio) | |
if transcription: | |
user_text = transcription | |
else: | |
return "Couldn't understand the audio.", None, history | |
if not user_text.strip(): | |
return "No input provided.", None, history | |
conversation_history.append(f"User: {user_text}") | |
recent_messages = conversation_history[-20:] | |
prompt = f"You are an empathetic AI assistant. The user is feeling {emotion} ({emotion_options[emotion]}). Respond supportively." | |
ai_response = get_groq_response(prompt, recent_messages) | |
conversation_history.append(f"AI: {ai_response}") | |
audio_path = generate_speech_and_upload(ai_response) | |
return ai_response, audio_path, history + [[user_text, ai_response]] | |
def clear_conversation(): | |
global conversation_history | |
conversation_history = [] | |
return [], None, None, "Conversation cleared." | |
# Gradio UI | |
iface = gr.Blocks() | |
with iface: | |
gr.Markdown("# Mind AID AI Assistant") | |
gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral") | |
emotion_description = gr.Markdown("**Current mood:** Neutral") | |
def update_emotion_desc(em): | |
return f"**Current mood:** {emotion_options.get(em, 'Unknown')}" | |
emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description]) | |
with gr.Column(scale=1): | |
clear_btn = gr.Button("Clear Conversation") | |
status = gr.Textbox(label="Status") | |
chat_history = gr.Chatbot(label="Chat History", height=300) | |
with gr.Row(): | |
text_input = gr.Textbox(label="Type your message", lines=2) | |
audio_input = gr.Audio(label="Or speak", type="filepath", sources=["microphone"]) | |
output_audio = gr.Audio(label="AI Voice Response") | |
submit_btn = gr.Button("Send", variant="primary") | |
submit_btn.click( | |
fn=chat_with_ai, | |
inputs=[audio_input, text_input, emotion, chat_history], | |
outputs=[status, output_audio, chat_history] | |
) | |
text_input.submit( | |
fn=chat_with_ai, | |
inputs=[audio_input, text_input, emotion, chat_history], | |
outputs=[status, output_audio, chat_history] | |
) | |
clear_btn.click( | |
fn=clear_conversation, | |
inputs=[], | |
outputs=[chat_history, audio_input, text_input, status] | |
) | |
iface.launch() | |
Here is the complete revised code with Yarngpt integrated for text-to-speech output via Hugging Face. Make sure your HF_TOKEN is correctly set in your environment and has access to the model saheedniyi/Yarngpt. Let me know if you need help deploying this. | |