voice-to-voice / app.py
Safwanahmad619's picture
Update app.py
bbbe41e verified
raw
history blame
5.73 kB
# import os
# import gradio as gr
# import whisper
# from gtts import gTTS
# import io
# from groq import Groq
# # Initialize the Groq client
# groq_api_key = os.getenv('GROQ_API_KEY')
# client = Groq(api_key=groq_api_key)
# # Load the Whisper model
# model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
# def process_audio(file_path):
# try:
# # Load the audio file
# audio = whisper.load_audio(file_path)
# # Transcribe the audio using Whisper
# result = model.transcribe(audio)
# text = result["text"]
# # Generate a response using Groq
# chat_completion = client.chat.completions.create(
# messages=[{"role": "user", "content": text}],
# model="llama3-8b-8192", # Replace with the correct model if necessary
# )
# # Access the response using dot notation
# response_message = chat_completion.choices[0].message.content.strip()
# # Convert the response text to speech
# tts = gTTS(response_message)
# response_audio_io = io.BytesIO()
# tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
# response_audio_io.seek(0)
# # Save audio to a file to ensure it's generated correctly
# with open("response.mp3", "wb") as audio_file:
# audio_file.write(response_audio_io.getvalue())
# # Return the response text and the path to the saved audio file
# return response_message, "response.mp3"
# except Exception as e:
# return f"An error occurred: {e}", None
# iface = gr.Interface(
# fn=process_audio,
# inputs=gr.Audio(type="filepath"), # Use type="filepath"
# outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
# live=True
# )
# iface.launch()
import os
import gradio as gr
import whisper
from gtts import gTTS
from anthropic import Anthropic # Import the Anthropic client
import io # Import io for BytesIO
# Get the Anthropic API key from environment variables
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
if not ANTHROPIC_API_KEY:
raise ValueError("ANTHROPIC_API_KEY environment variable is not set.")
client = Anthropic(api_key=ANTHROPIC_API_KEY) # Initialize the Anthropic client
# Load Whisper model
model = whisper.load_model("base") # You can also use "small", "medium", "large"
def chatbot(audio=None):
try:
if audio is None:
return "No input detected. Please provide an audio input.", None
# Transcribe the audio input using Whisper
transcription = model.transcribe(audio)
user_input = transcription.get("text", "")
# Generate a response using Anthropic API
chat_completion = client.completions.create(
model="claude-v1", # Specify the model
prompt=user_input, # Provide the user input as the prompt
max_tokens_to_sample=100, # Specify the maximum tokens to sample
)
response_text = chat_completion['completion']
# Convert the response text to speech using gTTS
tts = gTTS(text=response_text, lang='en')
response_audio_io = io.BytesIO() # Create a BytesIO object
tts.save(response_audio_io) # Save the audio to the BytesIO object
response_audio_io.seek(0) # Rewind the BytesIO object
return response_text, response_audio_io
except Exception as e:
return f"An error occurred: {e}", None
def clear_inputs():
return None, None, None
# Create a custom interface
def build_interface():
with gr.Blocks(css="""
.block-title {
text-align: center;
color: white;
background-color: #4CAF50;
padding: 10px;
border-radius: 8px;
}
.gradio-row {
background-color: #f9f9f9;
border-radius: 8px;
padding: 20px;
margin: 10px;
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
}
.gradio-column {
padding: 10px;
}
.gradio-button {
background-color: #ff6347 !important;
color: white !important;
border-radius: 8px !important;
padding: 10px 20px !important;
font-size: 16px !important;
border: none !important;
cursor: pointer !important;
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important;
transition: background-color 0.3s ease !important;
}
.gradio-button:hover {
background-color: #e5533d !important;
}
""") as demo:
gr.Markdown(
"""
<h1 class="block-title">Voice-to-Voice AI Chatbot</h1>
"""
)
with gr.Row(elem_classes="gradio-row"):
with gr.Column(elem_classes="gradio-column", scale=1):
audio_input = gr.Audio(type="filepath", label="Record Your Voice")
with gr.Column(elem_classes="gradio-column", scale=2):
chatbot_output_text = gr.Textbox(label="Chatbot Response")
chatbot_output_audio = gr.Audio(label="Audio Response")
clear_button = gr.Button("Clear", elem_classes="gradio-button")
clear_button.click(
fn=clear_inputs,
outputs=[audio_input, chatbot_output_text, chatbot_output_audio]
)
audio_input.change(
fn=chatbot,
inputs=[audio_input],
outputs=[chatbot_output_text, chatbot_output_audio]
)
return demo
# Launch the interface
if __name__ == "__main__":
interface = build_interface()
interface.launch()