# import os # import gradio as gr # import whisper # from gtts import gTTS # import io # from groq import Groq # # Initialize the Groq client # groq_api_key = os.getenv('GROQ_API_KEY') # client = Groq(api_key=groq_api_key) # # Load the Whisper model # model = whisper.load_model("base") # You can choose other models like "small", "medium", "large" # def process_audio(file_path): # try: # # Load the audio file # audio = whisper.load_audio(file_path) # # Transcribe the audio using Whisper # result = model.transcribe(audio) # text = result["text"] # # Generate a response using Groq # chat_completion = client.chat.completions.create( # messages=[{"role": "user", "content": text}], # model="llama3-8b-8192", # Replace with the correct model if necessary # ) # # Access the response using dot notation # response_message = chat_completion.choices[0].message.content.strip() # # Convert the response text to speech # tts = gTTS(response_message) # response_audio_io = io.BytesIO() # tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object # response_audio_io.seek(0) # # Save audio to a file to ensure it's generated correctly # with open("response.mp3", "wb") as audio_file: # audio_file.write(response_audio_io.getvalue()) # # Return the response text and the path to the saved audio file # return response_message, "response.mp3" # except Exception as e: # return f"An error occurred: {e}", None # iface = gr.Interface( # fn=process_audio, # inputs=gr.Audio(type="filepath"), # Use type="filepath" # outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")], # live=True # ) # iface.launch() import os import gradio as gr import whisper from gtts import gTTS from anthropic import Anthropic # Import the Anthropic client import io # Import io for BytesIO # Get the Anthropic API key from environment variables ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") if not ANTHROPIC_API_KEY: raise ValueError("ANTHROPIC_API_KEY environment variable is not set.") client = Anthropic(api_key=ANTHROPIC_API_KEY) # Initialize the Anthropic client # Load Whisper model model = whisper.load_model("base") # You can also use "small", "medium", "large" def chatbot(audio=None): try: if audio is None: return "No input detected. Please provide an audio input.", None # Transcribe the audio input using Whisper transcription = model.transcribe(audio) user_input = transcription.get("text", "") # Generate a response using Anthropic API chat_completion = client.completions.create( model="claude-v1", # Specify the model prompt=user_input, # Provide the user input as the prompt max_tokens_to_sample=100, # Specify the maximum tokens to sample ) response_text = chat_completion['completion'] # Convert the response text to speech using gTTS tts = gTTS(text=response_text, lang='en') response_audio_io = io.BytesIO() # Create a BytesIO object tts.save(response_audio_io) # Save the audio to the BytesIO object response_audio_io.seek(0) # Rewind the BytesIO object return response_text, response_audio_io except Exception as e: return f"An error occurred: {e}", None def clear_inputs(): return None, None, None # Create a custom interface def build_interface(): with gr.Blocks(css=""" .block-title { text-align: center; color: white; background-color: #4CAF50; padding: 10px; border-radius: 8px; } .gradio-row { background-color: #f9f9f9; border-radius: 8px; padding: 20px; margin: 10px; box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1); } .gradio-column { padding: 10px; } .gradio-button { background-color: #ff6347 !important; color: white !important; border-radius: 8px !important; padding: 10px 20px !important; font-size: 16px !important; border: none !important; cursor: pointer !important; box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important; transition: background-color 0.3s ease !important; } .gradio-button:hover { background-color: #e5533d !important; } """) as demo: gr.Markdown( """