Safwanahmad619 commited on
Commit
02d76aa
·
verified ·
1 Parent(s): 586d983

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -38
app.py CHANGED
@@ -59,58 +59,107 @@ import os
59
  import gradio as gr
60
  import whisper
61
  from gtts import gTTS
62
- import io
63
  from groq import Groq
64
 
65
- # Initialize the Groq client
66
- groq_api_key = os.getenv('GROQ_API_KEY')
67
- if not groq_api_key:
68
  raise ValueError("GROQ_API_KEY environment variable is not set.")
69
- client = Groq(api_key=groq_api_key)
70
 
71
- # Load the Whisper model
72
- model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
73
 
74
- def process_audio(file_path):
75
  try:
76
- # Load the audio file
77
- audio = whisper.load_audio(file_path)
78
 
79
- # Transcribe the audio using Whisper
80
- result = model.transcribe(audio)
81
- text = result["text"]
82
 
83
- # Generate a response using Groq
84
  chat_completion = client.chat.completions.create(
85
- messages=[{"role": "user", "content": text}],
86
- model="llama3-8b-8192", # Replace with the correct model if necessary
87
  )
 
88
 
89
- # Access the response using dot notation
90
- response_message = chat_completion.choices[0].message.content.strip()
 
91
 
92
- # Convert the response text to speech
93
- tts = gTTS(response_message)
94
- response_audio_io = io.BytesIO()
95
- tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
96
- response_audio_io.seek(0)
97
-
98
- # Save audio to a file to ensure it's generated correctly
99
- response_audio_path = "response.mp3"
100
- with open(response_audio_path, "wb") as audio_file:
101
- audio_file.write(response_audio_io.getvalue())
102
-
103
- # Return the response text and the path to the saved audio file
104
- return response_message, response_audio_path
105
 
106
  except Exception as e:
107
  return f"An error occurred: {e}", None
108
 
109
- iface = gr.Interface(
110
- fn=process_audio,
111
- inputs=gr.Audio(type="filepath"), # Use type="filepath"
112
- outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
113
- live=True
114
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- iface.launch()
 
 
 
 
59
  import gradio as gr
60
  import whisper
61
  from gtts import gTTS
 
62
  from groq import Groq
63
 
64
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
65
+ if not GROQ_API_KEY:
 
66
  raise ValueError("GROQ_API_KEY environment variable is not set.")
67
+ client = Groq(api_key=GROQ_API_KEY)
68
 
69
+ # Load Whisper model
70
+ model = whisper.load_model("base")
71
 
72
+ def chatbot(audio=None):
73
  try:
74
+ if audio is None:
75
+ return "No input detected. Please provide an audio input.", None
76
 
77
+ # Transcribe the audio input using Whisper
78
+ transcription = model.transcribe(audio)
79
+ user_input = transcription.get("text", "")
80
 
81
+ # Generate a response using Llama 8B via Groq API
82
  chat_completion = client.chat.completions.create(
83
+ messages=[{"role": "user", "content": user_input}],
84
+ model="llama3-8b-8192",
85
  )
86
+ response_text = chat_completion.choices[0].message.content
87
 
88
+ # Convert the response text to speech using gTTS
89
+ tts = gTTS(text=response_text, lang='en')
90
+ response_audio_io = tts.write_to_fp(None) # Save the audio to the BytesIO object
91
 
92
+ return response_text, response_audio_io
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  except Exception as e:
95
  return f"An error occurred: {e}", None
96
 
97
+ def clear_inputs():
98
+ return None, None, None
99
+
100
+ # Create a custom interface
101
+ def build_interface():
102
+ with gr.Blocks(css="""
103
+ .block-title {
104
+ text-align: center;
105
+ color: white;
106
+ background-color: #4CAF50;
107
+ padding: 10px;
108
+ border-radius: 8px;
109
+ }
110
+ .gradio-row {
111
+ background-color: #f9f9f9;
112
+ border-radius: 8px;
113
+ padding: 20px;
114
+ margin: 10px;
115
+ box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
116
+ }
117
+ .gradio-column {
118
+ padding: 10px;
119
+ }
120
+ .gradio-button {
121
+ background-color: #ff6347 !important;
122
+ color: white !important;
123
+ border-radius: 8px !important;
124
+ padding: 10px 20px !important;
125
+ font-size: 16px !important;
126
+ border: none !important;
127
+ cursor: pointer !important;
128
+ box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important;
129
+ transition: background-color 0.3s ease !important;
130
+ }
131
+ .gradio-button:hover {
132
+ background-color: #e5533d !important;
133
+ }
134
+ """) as demo:
135
+ gr.Markdown(
136
+ """
137
+ <h1 class="block-title">Voice-to-Voice AI Chatbot</h1>
138
+ """
139
+ )
140
+ with gr.Row(elem_classes="gradio-row"):
141
+ with gr.Column(elem_classes="gradio-column", scale=1):
142
+ audio_input = gr.Audio(type="filepath", label="Record Your Voice")
143
+ with gr.Column(elem_classes="gradio-column", scale=2):
144
+ chatbot_output_text = gr.Textbox(label="Chatbot Response")
145
+ chatbot_output_audio = gr.Audio(label="Audio Response")
146
+
147
+ clear_button = gr.Button("Clear", elem_classes="gradio-button")
148
+
149
+ clear_button.click(
150
+ fn=clear_inputs,
151
+ outputs=[audio_input, chatbot_output_text, chatbot_output_audio]
152
+ )
153
+
154
+ audio_input.change(
155
+ fn=chatbot,
156
+ inputs=[audio_input],
157
+ outputs=[chatbot_output_text, chatbot_output_audio]
158
+ )
159
+
160
+ return demo
161
 
162
+ # Launch the interface
163
+ if __name__ == "__main__":
164
+ interface = build_interface()
165
+ interface.launch()