Spaces:

Safwanahmad619
/

voice-to-voice

Running

App Files Files Community

Safwanahmad619 commited on Aug 23, 2024

Commit

02d76aa

verified ·

1 Parent(s): 586d983

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -38

app.py CHANGED Viewed

@@ -59,58 +59,107 @@ import os
 import gradio as gr
 import whisper
 from gtts import gTTS
-import io
 from groq import Groq
-# Initialize the Groq client
-groq_api_key = os.getenv('GROQ_API_KEY')
-if not groq_api_key:
     raise ValueError("GROQ_API_KEY environment variable is not set.")
-client = Groq(api_key=groq_api_key)
-# Load the Whisper model
-model = whisper.load_model("base")  # You can choose other models like "small", "medium", "large"
-def process_audio(file_path):
     try:
-        # Load the audio file
-        audio = whisper.load_audio(file_path)
-        # Transcribe the audio using Whisper
-        result = model.transcribe(audio)
-        text = result["text"]
-        # Generate a response using Groq
         chat_completion = client.chat.completions.create(
-            messages=[{"role": "user", "content": text}],
-            model="llama3-8b-8192",  # Replace with the correct model if necessary
         )
-        # Access the response using dot notation
-        response_message = chat_completion.choices[0].message.content.strip()
-        # Convert the response text to speech
-        tts = gTTS(response_message)
-        response_audio_io = io.BytesIO()
-        tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
-        response_audio_io.seek(0)
-        # Save audio to a file to ensure it's generated correctly
-        response_audio_path = "response.mp3"
-        with open(response_audio_path, "wb") as audio_file:
-            audio_file.write(response_audio_io.getvalue())
-        # Return the response text and the path to the saved audio file
-        return response_message, response_audio_path
     except Exception as e:
         return f"An error occurred: {e}", None
-iface = gr.Interface(
-    fn=process_audio,
-    inputs=gr.Audio(type="filepath"),  # Use type="filepath"
-    outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
-    live=True
-)
-iface.launch()

 import gradio as gr
 import whisper
 from gtts import gTTS
 from groq import Groq
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+if not GROQ_API_KEY:
     raise ValueError("GROQ_API_KEY environment variable is not set.")
+client = Groq(api_key=GROQ_API_KEY)
+# Load Whisper model
+model = whisper.load_model("base")
+def chatbot(audio=None):
     try:
+        if audio is None:
+            return "No input detected. Please provide an audio input.", None
+        # Transcribe the audio input using Whisper
+        transcription = model.transcribe(audio)
+        user_input = transcription.get("text", "")
+        # Generate a response using Llama 8B via Groq API
         chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": user_input}],
+            model="llama3-8b-8192",
         )
+        response_text = chat_completion.choices[0].message.content
+        # Convert the response text to speech using gTTS
+        tts = gTTS(text=response_text, lang='en')
+        response_audio_io = tts.write_to_fp(None)  # Save the audio to the BytesIO object
+        return response_text, response_audio_io
     except Exception as e:
         return f"An error occurred: {e}", None
+def clear_inputs():
+    return None, None, None
+# Create a custom interface
+def build_interface():
+    with gr.Blocks(css="""
+        .block-title {
+            text-align: center;
+            color: white;
+            background-color: #4CAF50;
+            padding: 10px;
+            border-radius: 8px;
+        }
+        .gradio-row {
+            background-color: #f9f9f9;
+            border-radius: 8px;
+            padding: 20px;
+            margin: 10px;
+            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
+        }
+        .gradio-column {
+            padding: 10px;
+        }
+        .gradio-button {
+            background-color: #ff6347 !important;
+            color: white !important;
+            border-radius: 8px !important;
+            padding: 10px 20px !important;
+            font-size: 16px !important;
+            border: none !important;
+            cursor: pointer !important;
+            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important;
+            transition: background-color 0.3s ease !important;
+        }
+        .gradio-button:hover {
+            background-color: #e5533d !important;
+        }
+    """) as demo:
+        gr.Markdown(
+            """
+            <h1 class="block-title">Voice-to-Voice AI Chatbot</h1>
+            """
+        )
+        with gr.Row(elem_classes="gradio-row"):
+            with gr.Column(elem_classes="gradio-column", scale=1):
+                audio_input = gr.Audio(type="filepath", label="Record Your Voice")
+            with gr.Column(elem_classes="gradio-column", scale=2):
+                chatbot_output_text = gr.Textbox(label="Chatbot Response")
+                chatbot_output_audio = gr.Audio(label="Audio Response")
+        clear_button = gr.Button("Clear", elem_classes="gradio-button")
+        clear_button.click(
+            fn=clear_inputs,
+            outputs=[audio_input, chatbot_output_text, chatbot_output_audio]
+        )
+        audio_input.change(
+            fn=chatbot,
+            inputs=[audio_input],
+            outputs=[chatbot_output_text, chatbot_output_audio]
+        )
+    return demo
+# Launch the interface
+if __name__ == "__main__":
+    interface = build_interface()
+    interface.launch()