Spaces:
Sleeping
Sleeping
| # Install necessary libraries | |
| #pip uninstall -y whisper | |
| #pip install git+https://github.com/openai/whisper.git | |
| #pip install gradio gtts groq ffmpeg-python | |
| # Import required libraries | |
| import os | |
| import gradio as gr | |
| import whisper | |
| from gtts import gTTS | |
| import io | |
| from groq import Groq | |
| # Set your GROQ_API_KEY | |
| os.environ["GROQ_API_KEY"] = "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" | |
| # Initialize Groq client and Whisper model | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| model = whisper.load_model("base", device="cpu") | |
| # Function to process audio | |
| def process_audio(file_path): | |
| try: | |
| # Ensure the file exists | |
| if not os.path.isfile(file_path): | |
| raise FileNotFoundError(f"The file {file_path} does not exist.") | |
| print(f"Processing file: {file_path}") | |
| # Load and process the audio with Whisper | |
| audio = whisper.load_audio(file_path) | |
| print("Audio loaded successfully.") | |
| # Transcribe the audio | |
| result = model.transcribe(audio) | |
| text = result["text"] | |
| print("Transcription:", text) | |
| # Generate a response using Groq API | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": text}], | |
| model="llama3-8b-8192", | |
| ) | |
| response_message = chat_completion.choices[0].message.content.strip() | |
| print("Chatbot response:", response_message) | |
| # Convert the response to audio | |
| tts = gTTS(response_message) | |
| response_audio_io = io.BytesIO() | |
| tts.write_to_fp(response_audio_io) | |
| response_audio_io.seek(0) | |
| # Save the response audio to a file | |
| response_audio_path = "response.mp3" | |
| with open(response_audio_path, "wb") as audio_file: | |
| audio_file.write(response_audio_io.getvalue()) | |
| return response_message, response_audio_path | |
| except FileNotFoundError as e: | |
| return f"File not found: {e}", None | |
| except UnicodeDecodeError as e: | |
| return f"Invalid audio file encoding: {e}", None | |
| except Exception as e: | |
| return f"An unexpected error occurred: {e}", None | |
| # Define Gradio interface | |
| title = "Voice-to-Voice Chatbot Application" | |
| description = "Run a voice-to-voice chatbot with transcription and audio response." | |
| article = "### Instructions\n1. Upload an audio file.\n2. Wait for transcription and chatbot's response.\n3. Listen to the response audio." | |
| iface = gr.Interface( | |
| fn=process_audio, | |
| inputs=gr.Audio(type="filepath", label="Upload an Audio File"), | |
| outputs=[ | |
| gr.Textbox(label="Response Text"), | |
| gr.Audio(label="Response Audio") | |
| ], | |
| live=True, | |
| title=title, | |
| description=description, | |
| article=article | |
| ) | |
| # Launch Gradio interface | |
| iface.launch(share=True) | |