ALVHB95 commited on
Commit
995cec8
1 Parent(s): c258d4e
Files changed (1) hide show
  1. app.py +15 -29
app.py CHANGED
@@ -119,10 +119,8 @@ qa_chain = ConversationalRetrievalChain.from_llm(
119
  output_key = 'output',
120
  )
121
 
122
- import soundfile as sf
123
- import gradio as gr
124
- from gradio_client import Client
125
  import numpy as np
 
126
 
127
  # Load ASR pipeline
128
  transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large")
@@ -130,43 +128,31 @@ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-lar
130
  def chat_interface(question, audio_input=None, history=None):
131
  if audio_input is not None:
132
  # Function to transcribe the audio input
133
-
134
  def transcribe(audio):
135
- sr, y = audio
136
- y = y.astype(np.float32)
137
- y /= np.max(np.abs(y))
138
 
139
- return transcriber({"sampling_rate": sr, "raw": y})["text"]
140
-
141
- # Transcribe the audio input
142
- question = transcribe(audio_input)
143
 
 
 
 
144
 
145
- return question
 
146
 
147
- # Original chatbot logic
148
- result = qa_chain.invoke({'question': question})
149
- output_string = result['output']
150
-
151
- # Find the index of the last occurrence of "answer": in the string
152
- answer_index = output_string.rfind('"answer":')
153
-
154
- # Extract the substring starting from the "answer": index
155
- answer_part = output_string[answer_index + len('"answer":'):].strip()
156
-
157
- # Find the next occurrence of a double quote to get the start of the answer value
158
- quote_index = answer_part.find('"')
159
-
160
- # Extract the answer value between double quotes
161
- answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)]
162
 
163
- return answer_value
 
164
 
165
  chatbot_gradio_app = gr.Interface(
166
  fn=chat_interface,
167
  inputs=[
168
  gr.Textbox(lines=3, label="Type your message here"),
169
- gr.Audio(label="Record your voice", type='numpy') # Change type to "microphone"
170
  ],
171
  outputs=gr.Textbox(label="Bot's Response"),
172
  )
 
119
  output_key = 'output',
120
  )
121
 
 
 
 
122
  import numpy as np
123
+ import soundfile as sf
124
 
125
  # Load ASR pipeline
126
  transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large")
 
128
  def chat_interface(question, audio_input=None, history=None):
129
  if audio_input is not None:
130
  # Function to transcribe the audio input
 
131
  def transcribe(audio):
132
+ # If the audio input has multiple channels, take the first channel
133
+ audio = audio[:, 0] # Take the first channel
 
134
 
135
+ # Normalize audio
136
+ audio /= np.max(np.abs(audio))
 
 
137
 
138
+ # Write the audio to a temporary file
139
+ temp_audio_file = "temp_audio.wav"
140
+ sf.write(temp_audio_file, audio, 16000) # Assuming 16kHz sample rate
141
 
142
+ # Transcribe the audio from the temporary file
143
+ return transcriber(temp_audio_file)[0]['transcription']
144
 
145
+ # Transcribe the audio input
146
+ question = transcribe(audio_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # Original chatbot logic goes here
149
+ return question
150
 
151
  chatbot_gradio_app = gr.Interface(
152
  fn=chat_interface,
153
  inputs=[
154
  gr.Textbox(lines=3, label="Type your message here"),
155
+ gr.Audio(label="Record your voice", type='numpy')
156
  ],
157
  outputs=gr.Textbox(label="Bot's Response"),
158
  )