Spaces:
Sleeping
Sleeping
# import os | |
# import gradio as gr | |
# import whisper | |
# from gtts import gTTS | |
# import io | |
# from groq import Groq | |
# # Initialize the Groq client | |
# groq_api_key = os.getenv('GROQ_API_KEY') | |
# client = Groq(api_key=groq_api_key) | |
# # Load the Whisper model | |
# model = whisper.load_model("base") # You can choose other models like "small", "medium", "large" | |
# def process_audio(file_path): | |
# try: | |
# # Load the audio file | |
# audio = whisper.load_audio(file_path) | |
# # Transcribe the audio using Whisper | |
# result = model.transcribe(audio) | |
# text = result["text"] | |
# # Generate a response using Groq | |
# chat_completion = client.chat.completions.create( | |
# messages=[{"role": "user", "content": text}], | |
# model="llama3-8b-8192", # Replace with the correct model if necessary | |
# ) | |
# # Access the response using dot notation | |
# response_message = chat_completion.choices[0].message.content.strip() | |
# # Convert the response text to speech | |
# tts = gTTS(response_message) | |
# response_audio_io = io.BytesIO() | |
# tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object | |
# response_audio_io.seek(0) | |
# # Save audio to a file to ensure it's generated correctly | |
# with open("response.mp3", "wb") as audio_file: | |
# audio_file.write(response_audio_io.getvalue()) | |
# # Return the response text and the path to the saved audio file | |
# return response_message, "response.mp3" | |
# except Exception as e: | |
# return f"An error occurred: {e}", None | |
# iface = gr.Interface( | |
# fn=process_audio, | |
# inputs=gr.Audio(type="filepath"), # Use type="filepath" | |
# outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")], | |
# live=True | |
# ) | |
# iface.launch() | |
import os | |
import gradio as gr | |
import whisper | |
from gtts import gTTS | |
from anthropic import Anthropic # Import the Anthropic client | |
import io # Import io for BytesIO | |
# Get the Anthropic API key from environment variables | |
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") | |
if not ANTHROPIC_API_KEY: | |
raise ValueError("ANTHROPIC_API_KEY environment variable is not set.") | |
client = Anthropic(api_key=ANTHROPIC_API_KEY) # Initialize the Anthropic client | |
# Load Whisper model | |
model = whisper.load_model("base") # You can also use "small", "medium", "large" | |
def chatbot(audio=None): | |
try: | |
if audio is None: | |
return "No input detected. Please provide an audio input.", None | |
# Transcribe the audio input using Whisper | |
transcription = model.transcribe(audio) | |
user_input = transcription.get("text", "") | |
# Generate a response using Anthropic API | |
chat_completion = client.completions.create( | |
model="claude-v1", # Specify the model | |
prompt=user_input, # Provide the user input as the prompt | |
max_tokens_to_sample=100, # Specify the maximum tokens to sample | |
) | |
response_text = chat_completion['completion'] | |
# Convert the response text to speech using gTTS | |
tts = gTTS(text=response_text, lang='en') | |
response_audio_io = io.BytesIO() # Create a BytesIO object | |
tts.save(response_audio_io) # Save the audio to the BytesIO object | |
response_audio_io.seek(0) # Rewind the BytesIO object | |
return response_text, response_audio_io | |
except Exception as e: | |
return f"An error occurred: {e}", None | |
def clear_inputs(): | |
return None, None, None | |
# Create a custom interface | |
def build_interface(): | |
with gr.Blocks(css=""" | |
.block-title { | |
text-align: center; | |
color: white; | |
background-color: #4CAF50; | |
padding: 10px; | |
border-radius: 8px; | |
} | |
.gradio-row { | |
background-color: #f9f9f9; | |
border-radius: 8px; | |
padding: 20px; | |
margin: 10px; | |
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1); | |
} | |
.gradio-column { | |
padding: 10px; | |
} | |
.gradio-button { | |
background-color: #ff6347 !important; | |
color: white !important; | |
border-radius: 8px !important; | |
padding: 10px 20px !important; | |
font-size: 16px !important; | |
border: none !important; | |
cursor: pointer !important; | |
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important; | |
transition: background-color 0.3s ease !important; | |
} | |
.gradio-button:hover { | |
background-color: #e5533d !important; | |
} | |
""") as demo: | |
gr.Markdown( | |
""" | |
<h1 class="block-title">Voice-to-Voice AI Chatbot</h1> | |
""" | |
) | |
with gr.Row(elem_classes="gradio-row"): | |
with gr.Column(elem_classes="gradio-column", scale=1): | |
audio_input = gr.Audio(type="filepath", label="Record Your Voice") | |
with gr.Column(elem_classes="gradio-column", scale=2): | |
chatbot_output_text = gr.Textbox(label="Chatbot Response") | |
chatbot_output_audio = gr.Audio(label="Audio Response") | |
clear_button = gr.Button("Clear", elem_classes="gradio-button") | |
clear_button.click( | |
fn=clear_inputs, | |
outputs=[audio_input, chatbot_output_text, chatbot_output_audio] | |
) | |
audio_input.change( | |
fn=chatbot, | |
inputs=[audio_input], | |
outputs=[chatbot_output_text, chatbot_output_audio] | |
) | |
return demo | |
# Launch the interface | |
if __name__ == "__main__": | |
interface = build_interface() | |
interface.launch() |