Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import whisper | |
from groq import Groq | |
from gtts import gTTS | |
import tempfile | |
# Load the open-source Whisper model | |
model = whisper.load_model("base") # Options: "tiny", "base", "small", "medium", "large" | |
# Set your Groq API key directly | |
client = Groq(api_key="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") | |
# Function to transcribe, generate response, and convert to speech | |
def chat_with_bot(audio_input): | |
try: | |
# Step 1: Transcribe audio input using open-source Whisper | |
try: | |
result = model.transcribe(audio_input) | |
user_input = result['text'] | |
except Exception as e: | |
return "Error during transcription: " + str(e), "", None | |
# Step 2: Generate response using Groq API with Llama 8B model | |
try: | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": user_input, | |
} | |
], | |
model="llama3-8b-8192", | |
) | |
response_text = chat_completion.choices[0].message.content | |
except Exception as e: | |
return "Error during Groq API call: " + str(e), "", None | |
# Step 3: Convert the response text to speech using gTTS | |
try: | |
tts = gTTS(text=response_text, lang='en') | |
# Save the TTS output to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: | |
tts.save(f.name) | |
output_audio = f.name | |
except Exception as e: | |
return "Error during text-to-speech conversion: " + str(e), "", None | |
# Step 4: Return the transcription, response, and audio file for display in Gradio UI | |
return user_input, response_text, output_audio | |
except Exception as e: | |
return "An unexpected error occurred: " + str(e), "", None | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=chat_with_bot, | |
inputs=gr.Audio(type="filepath"), # Use 'type="filepath"' for audio input | |
outputs=[ | |
gr.Textbox(label="Transcription"), | |
gr.Textbox(label="Response"), | |
gr.Audio(label="Generated Speech") # Output to replay the generated speech | |
], | |
live=True, | |
title="Real-Time Voice-to-Voice Chatbot", | |
description="Speak into the microphone to chat with the Llama 8B model via Groq API." | |
) | |
# Launch the Gradio Interface | |
iface.launch() |