Spaces:

Prathamesh1420
/

Virtual_assistant

Sleeping

App Files Files Community

Prathamesh1420 commited on Sep 15

Commit

f975d86

•

1 Parent(s): fa2f154

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -61

app.py CHANGED Viewed

@@ -3,11 +3,9 @@ import groq
 import io
 import numpy as np
 import soundfile as sf
-import pyttsx3  # Text-to-speech conversion
-# Initialize text-to-speech engine
-tts_engine = pyttsx3.init()
 def transcribe_audio(audio, api_key):
     if audio is None:
         return ""
@@ -27,10 +25,11 @@ def transcribe_audio(audio, api_key):
             file=("audio.wav", buffer),
             response_format="text"
         )
-        return completion
     except Exception as e:
         return f"Error in transcription: {str(e)}"
 def generate_response(transcription, api_key):
     if not transcription:
         return "No transcription available. Please try speaking again."
@@ -41,58 +40,55 @@ def generate_response(transcription, api_key):
         # Use Llama 3 70B powered by Groq for text generation
         completion = client.chat.completions.create(
             model="llama3-70b-8192",
-            messages=[{"role": "user", "content": transcription}]
         )
-        return completion.choices[0].message.content
     except Exception as e:
         return f"Error in response generation: {str(e)}"
-def convert_text_to_speech(text):
-    tts_engine.save_to_file(text, 'response_output.wav')
-    tts_engine.runAndWait()
-    with open("response_output.wav", "rb") as f:
-        audio_bytes = f.read()
-    return audio_bytes
-def process_audio(audio, api_key):
-    if not api_key:
-        return "Please enter your Groq API key.", "API key is required."
-    transcription = transcribe_audio(audio, api_key)
-    response = generate_response(transcription, api_key)
-    if "Error" in response:
-        return transcription, response, None  # In case of error, return empty audio
-    audio_output = convert_text_to_speech(response)
-    return transcription, response, audio_output
-# Custom CSS
-custom_css = """
-.gradio-container {
-    background-color: #f5f5f5;
-}
-.gr-button-primary {
-    background-color: #f55036 !important;
-    border-color: #f55036 !important;
-}
-.gr-button-secondary {
-    color: #f55036 !important;
-    border-color: #f55036 !important;
-}
-#groq-badge {
-    position: fixed;
-    bottom: 20px;
-    right: 20px;
-    z-index: 1000;
-}
-"""
-# Gradio Interface
 with gr.Blocks(theme=gr.themes.Default()) as demo:
-    gr.Markdown("# 🎙️ Groq x Gradio Voice-Powered AI Assistant")
     api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
     with gr.Row():
         audio_input = gr.Audio(label="Speak!", type="numpy")
@@ -100,28 +96,15 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     with gr.Row():
         transcription_output = gr.Textbox(label="Transcription")
         response_output = gr.Textbox(label="AI Assistant Response")
-        audio_output = gr.Audio(label="Voice Response", type="file")
-    submit_button = gr.Button("Process", variant="primary")
-    gr.HTML("""
-    <div id="groq-badge">
-        <div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
-    </div>
-    """)
     submit_button.click(
         process_audio,
-        inputs=[audio_input, api_key_input],
         outputs=[transcription_output, response_output, audio_output]
     )
-    gr.Markdown("""
-    ## How to use this app:
-    1. Enter your [Groq API Key](https://console.groq.com/keys) in the provided field.
-    2. Click on the microphone icon and speak your message (or upload an audio file).
-    3. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
-    4. The transcription, AI assistant response, and voice response will appear.
-    """)
 demo.launch()

 import io
 import numpy as np
 import soundfile as sf
+import requests
+# Function to transcribe audio using Groq
 def transcribe_audio(audio, api_key):
     if audio is None:
         return ""
             file=("audio.wav", buffer),
             response_format="text"
         )
+        return completion.get('text', '')  # Extract transcription text from response
     except Exception as e:
         return f"Error in transcription: {str(e)}"
+# Function to generate AI response using Groq
 def generate_response(transcription, api_key):
     if not transcription:
         return "No transcription available. Please try speaking again."
         # Use Llama 3 70B powered by Groq for text generation
         completion = client.chat.completions.create(
             model="llama3-70b-8192",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": transcription}
+            ],
         )
+        return completion.choices[0].message['content']
     except Exception as e:
         return f"Error in response generation: {str(e)}"
+# VoiceRSS TTS function
+def text_to_speech(text, tts_api_key):
+    url = "https://api.voicerss.org/"
+    params = {
+        'key': tts_api_key,
+        'src': text,
+        'hl': 'en-us',  # Language: English (US)
+        'r': '0',  # Speech rate
+        'c': 'mp3',  # Audio format (mp3)
+        'f': '48khz_16bit_stereo'  # Frequency and bitrate
+    }
+    try:
+        response = requests.get(url, params=params)
+        if response.status_code == 200:
+            return response.content  # Return the audio data
+        else:
+            return f"Error in TTS conversion: {response.status_code}"
+    except Exception as e:
+        return f"Error in TTS conversion: {str(e)}"
+# Process audio function to handle transcription, response generation, and TTS
+def process_audio(audio, groq_api_key, tts_api_key):
+    if not groq_api_key:
+        return "Please enter your Groq API key.", "API key is required.", None
+    transcription = transcribe_audio(audio, groq_api_key)
+    response = generate_response(transcription, groq_api_key)
+    # Convert the AI response to speech using VoiceRSS
+    audio_response = text_to_speech(response, tts_api_key)
+    return transcription, response, audio_response
+# Gradio interface with TTS
 with gr.Blocks(theme=gr.themes.Default()) as demo:
+    gr.Markdown("# 🎙️ Groq x Gradio Voice-Powered AI Assistant with TTS")
     api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
+    tts_api_key_input = gr.Textbox(type="password", label="Enter your VoiceRSS API Key")
     with gr.Row():
         audio_input = gr.Audio(label="Speak!", type="numpy")
     with gr.Row():
         transcription_output = gr.Textbox(label="Transcription")
         response_output = gr.Textbox(label="AI Assistant Response")
+    audio_output = gr.Audio(label="AI Response (Audio)", type="auto")
+    submit_button = gr.Button("Process", variant="primary")
     submit_button.click(
         process_audio,
+        inputs=[audio_input, api_key_input, tts_api_key_input],
         outputs=[transcription_output, response_output, audio_output]
     )
 demo.launch()