pablo-sampaio commited on
Commit
4808718
1 Parent(s): 62fbd81

Using local ASR

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -14,7 +14,7 @@ load_dotenv(find_dotenv())
14
  from match_info_crawler import get_matches_info
15
 
16
 
17
- USE_LOCAL_ASR_PIPELINE = False
18
 
19
 
20
  # used for chat, if provided
@@ -227,26 +227,24 @@ else:
227
  ASR_API_HEADERS = {"Authorization": f"Bearer {HF_KEY}"}
228
 
229
 
230
- def transcribe(audio_in):
231
- # from numpy data:
232
- #sr, y = audio_in # sampling rate and audio data
233
- #y2 = y.astype(np.float32)
234
- #y2 /= np.max(np.abs(y))
235
- #response = ASR_PIPELINE({"sampling_rate": sr, "raw": y})
236
 
237
- # using serverless API
238
- with open(audio_in, "rb") as f:
239
- data = f.read()
240
- response = requests.post(ASR_API_URL, headers=ASR_API_HEADERS, data=data)
241
- response = response.json()
 
242
 
243
- #print(response)
244
- return response['text']
245
 
246
 
247
  def transcribe_and_respond(audio_in, system_txtbox, user_msg_txb, *args):
248
- user_message = transcribe(audio_in)
249
- outputs = respond(system_txtbox, user_message, *args)
250
  return outputs
251
 
252
 
@@ -260,7 +258,7 @@ with gr.Blocks() as demo:
260
 
261
  user_msg_txb = gr.Textbox(label="Mensagem")
262
 
263
- #audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='filepath') # TODO: tentar type='numpy'
264
 
265
  submit_btn = gr.Button("Enviar")
266
 
@@ -289,7 +287,7 @@ with gr.Blocks() as demo:
289
 
290
  reset_btn.click(reset_and_apply, inputs=[voice_ddown], outputs=[chatbot_area, audio_out])
291
 
292
- #audio_in.stop_recording( transcribe_and_respond, inputs=[audio_in, system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out] )
293
  submit_btn.click(respond, inputs=[system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out]) # Click on the button
294
  user_msg_txb.submit(respond, inputs=[system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out]) # Press enter to submit - same effect
295
 
 
14
  from match_info_crawler import get_matches_info
15
 
16
 
17
+ USE_LOCAL_ASR_PIPELINE = True
18
 
19
 
20
  # used for chat, if provided
 
227
  ASR_API_HEADERS = {"Authorization": f"Bearer {HF_KEY}"}
228
 
229
 
230
+ def transcribe(audio_file):
231
+ if USE_LOCAL_ASR_PIPELINE:
232
+ response = ASR_PIPELINE(audio_file)
233
+ text = response[0]["text"]
 
 
234
 
235
+ else:
236
+ # using serverless API
237
+ with open(audio_file, "rb") as f:
238
+ data = f.read()
239
+ response = requests.post(ASR_API_URL, headers=ASR_API_HEADERS, data=data)
240
+ text = response.json()["text"]
241
 
242
+ return text
 
243
 
244
 
245
  def transcribe_and_respond(audio_in, system_txtbox, user_msg_txb, *args):
246
+ transcribed_user_msg = transcribe(audio_in)
247
+ outputs = respond(system_txtbox, transcribed_user_msg, *args)
248
  return outputs
249
 
250
 
 
258
 
259
  user_msg_txb = gr.Textbox(label="Mensagem")
260
 
261
+ audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='filepath')
262
 
263
  submit_btn = gr.Button("Enviar")
264
 
 
287
 
288
  reset_btn.click(reset_and_apply, inputs=[voice_ddown], outputs=[chatbot_area, audio_out])
289
 
290
+ audio_in.stop_recording( transcribe_and_respond, inputs=[audio_in, system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out] )
291
  submit_btn.click(respond, inputs=[system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out]) # Click on the button
292
  user_msg_txb.submit(respond, inputs=[system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out]) # Press enter to submit - same effect
293