Spaces:

Safwanahmad619
/

voice-to-voice

Sleeping

File size: 5,731 Bytes

# import os
# import gradio as gr
# import whisper
# from gtts import gTTS
# import io
# from groq import Groq

# # Initialize the Groq client
# groq_api_key = os.getenv('GROQ_API_KEY')
# client = Groq(api_key=groq_api_key) 

# # Load the Whisper model
# model = whisper.load_model("base")  # You can choose other models like "small", "medium", "large"

# def process_audio(file_path):
#     try:
#         # Load the audio file
#         audio = whisper.load_audio(file_path)

#         # Transcribe the audio using Whisper
#         result = model.transcribe(audio)
#         text = result["text"]

#         # Generate a response using Groq
#         chat_completion = client.chat.completions.create(
#             messages=[{"role": "user", "content": text}],
#             model="llama3-8b-8192",  # Replace with the correct model if necessary
#         )

#         # Access the response using dot notation
#         response_message = chat_completion.choices[0].message.content.strip()

#         # Convert the response text to speech
#         tts = gTTS(response_message)
#         response_audio_io = io.BytesIO()
#         tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
#         response_audio_io.seek(0)

#         # Save audio to a file to ensure it's generated correctly
#         with open("response.mp3", "wb") as audio_file:
#             audio_file.write(response_audio_io.getvalue())

#         # Return the response text and the path to the saved audio file
#         return response_message, "response.mp3"

#     except Exception as e:
#         return f"An error occurred: {e}", None

# iface = gr.Interface(
#     fn=process_audio,
#     inputs=gr.Audio(type="filepath"),  # Use type="filepath"
#     outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
#     live=True
# )

# iface.launch()
import os
import gradio as gr
import whisper
from gtts import gTTS
from anthropic import Anthropic  # Import the Anthropic client
import io  # Import io for BytesIO

# Get the Anthropic API key from environment variables
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
if not ANTHROPIC_API_KEY:
    raise ValueError("ANTHROPIC_API_KEY environment variable is not set.")
client = Anthropic(api_key=ANTHROPIC_API_KEY)  # Initialize the Anthropic client

# Load Whisper model
model = whisper.load_model("base")  # You can also use "small", "medium", "large"

def chatbot(audio=None):
    try:
        if audio is None:
            return "No input detected. Please provide an audio input.", None

        # Transcribe the audio input using Whisper
        transcription = model.transcribe(audio)
        user_input = transcription.get("text", "")

        # Generate a response using Anthropic API
        chat_completion = client.completions.create(
            model="claude-v1",  # Specify the model
            prompt=user_input,   # Provide the user input as the prompt
            max_tokens_to_sample=100,  # Specify the maximum tokens to sample
        )
        response_text = chat_completion['completion']

        # Convert the response text to speech using gTTS
        tts = gTTS(text=response_text, lang='en')
        response_audio_io = io.BytesIO()  # Create a BytesIO object
        tts.save(response_audio_io)  # Save the audio to the BytesIO object
        response_audio_io.seek(0)  # Rewind the BytesIO object

        return response_text, response_audio_io

    except Exception as e:
        return f"An error occurred: {e}", None

def clear_inputs():
    return None, None, None

# Create a custom interface
def build_interface():
    with gr.Blocks(css="""
        .block-title {
            text-align: center; 
            color: white;
            background-color: #4CAF50; 
            padding: 10px;
            border-radius: 8px;
        }
        .gradio-row {
            background-color: #f9f9f9;
            border-radius: 8px;
            padding: 20px;
            margin: 10px;
            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
        }
        .gradio-column {
            padding: 10px;
        }
        .gradio-button {
            background-color: #ff6347 !important;
            color: white !important;
            border-radius: 8px !important;
            padding: 10px 20px !important;
            font-size: 16px !important;
            border: none !important;
            cursor: pointer !important;
            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important;
            transition: background-color 0.3s ease !important;
        }
        .gradio-button:hover {
            background-color: #e5533d !important;
        }
    """) as demo:
        gr.Markdown(
            """
            <h1 class="block-title">Voice-to-Voice AI Chatbot</h1>
            """
        )
        with gr.Row(elem_classes="gradio-row"):
            with gr.Column(elem_classes="gradio-column", scale=1):
                audio_input = gr.Audio(type="filepath", label="Record Your Voice")
            with gr.Column(elem_classes="gradio-column", scale=2):
                chatbot_output_text = gr.Textbox(label="Chatbot Response")
                chatbot_output_audio = gr.Audio(label="Audio Response")

        clear_button = gr.Button("Clear", elem_classes="gradio-button")

        clear_button.click(
            fn=clear_inputs,
            outputs=[audio_input, chatbot_output_text, chatbot_output_audio]
        )

        audio_input.change(
            fn=chatbot,
            inputs=[audio_input],
            outputs=[chatbot_output_text, chatbot_output_audio]
        )

    return demo

# Launch the interface
if __name__ == "__main__":
    interface = build_interface()
    interface.launch()