# import os # import gradio as gr # import whisper # from gtts import gTTS # import io # from groq import Groq # # Initialize the Groq client # groq_api_key = os.getenv('GROQ_API_KEY') # client = Groq(api_key=groq_api_key) # # Load the Whisper model # model = whisper.load_model("base") # You can choose other models like "small", "medium", "large" # def process_audio(file_path): # try: # # Load the audio file # audio = whisper.load_audio(file_path) # # Transcribe the audio using Whisper # result = model.transcribe(audio) # text = result["text"] # # Generate a response using Groq # chat_completion = client.chat.completions.create( # messages=[{"role": "user", "content": text}], # model="llama3-8b-8192", # Replace with the correct model if necessary # ) # # Access the response using dot notation # response_message = chat_completion.choices[0].message.content.strip() # # Convert the response text to speech # tts = gTTS(response_message) # response_audio_io = io.BytesIO() # tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object # response_audio_io.seek(0) # # Save audio to a file to ensure it's generated correctly # with open("response.mp3", "wb") as audio_file: # audio_file.write(response_audio_io.getvalue()) # # Return the response text and the path to the saved audio file # return response_message, "response.mp3" # except Exception as e: # return f"An error occurred: {e}", None # iface = gr.Interface( # fn=process_audio, # inputs=gr.Audio(type="filepath"), # Use type="filepath" # outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")], # live=True # ) # iface.launch() import os import gradio as gr import whisper from gtts import gTTS from gemani import Gemani # Assuming you have a Gemani client similar to Groq import io # Import io for BytesIO # Get the Gemani API key from environment variables GEMANI_API_KEY = os.getenv("GEMANI_API_KEY") if not GEMANI_API_KEY: raise ValueError("GEMANI_API_KEY environment variable is not set.") client = Gemani(api_key=GEMANI_API_KEY) # Initialize the Gemani client # Load Whisper model model = whisper.load_model("base") def chatbot(audio=None): try: if audio is None: return "No input detected. Please provide an audio input.", None # Transcribe the audio input using Whisper transcription = model.transcribe(audio) user_input = transcription.get("text", "") # Generate a response using Gemani API chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_input}], model="gemani-model-8b", # Replace with the correct model name for Gemani ) response_text = chat_completion.choices[0].message.content # Convert the response text to speech using gTTS tts = gTTS(text=response_text, lang='en') response_audio_io = io.BytesIO() # Create a BytesIO object tts.save(response_audio_io) # Save the audio to the BytesIO object response_audio_io.seek(0) # Rewind the BytesIO object return response_text, response_audio_io except Exception as e: return f"An error occurred: {e}", None def clear_inputs(): return None, None, None # Create a custom interface def build_interface(): with gr.Blocks(css=""" .block-title { text-align: center; color: white; background-color: #4CAF50; padding: 10px; border-radius: 8px; } .gradio-row { background-color: #f9f9f9; border-radius: 8px; padding: 20px; margin: 10px; box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1); } .gradio-column { padding: 10px; } .gradio-button { background-color: #ff6347 !important; color: white !important; border-radius: 8px !important; padding: 10px 20px !important; font-size: 16px !important; border: none !important; cursor: pointer !important; box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important; transition: background-color 0.3s ease !important; } .gradio-button:hover { background-color: #e5533d !important; } """) as demo: gr.Markdown( """