develops20 commited on
Commit
6d5604d
·
verified ·
1 Parent(s): 0d22192

extract the numpy.ndarray from the tuple returned by gr.Audio before passing it to the whisper pipeline and added check in transcribe

Browse files
Files changed (1) hide show
  1. app.py +55 -16
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from transformers import pipeline
3
  from gtts import gTTS
4
  import os
 
5
 
6
  # Initialize Whisper for speech-to-text
7
  whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
@@ -10,35 +11,73 @@ whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
10
  knowledge_base = {
11
  "what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.",
12
  "price of camry": "The Toyota Camry starts at $25,000."
 
13
  }
14
 
15
  def transcribe(audio):
16
- return whisper(audio)["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def text_to_speech(text):
19
- tts = gTTS(text, lang="en")
20
- tts.save("response.mp3")
21
- return "response.mp3"
 
 
 
 
 
 
 
 
22
 
23
  def answer_question(text):
24
- for key in knowledge_base:
25
- if key in text.lower():
26
- return knowledge_base[key]
27
- return "Sorry, I can help with car availability and prices. Try again!"
 
 
 
 
 
 
 
 
 
28
 
29
  def process_audio(audio):
30
- text = transcribe(audio)
31
- response = answer_question(text)
32
- audio_response = text_to_speech(response)
33
- return response, audio_response
 
 
 
 
 
 
 
 
34
 
35
  # Gradio interface
36
  with gr.Blocks() as demo:
37
  gr.Markdown("# AI Support Agent: Car Dealership")
38
- audio_input = gr.Audio(label="Speak to the Agent") # No 'type' parameter needed
39
  text_output = gr.Textbox(label="Agent Response")
40
  audio_output = gr.Audio(label="Listen to Response")
41
  btn = gr.Button("Submit")
42
- btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output])
43
-
44
- demo.launch()
 
2
  from transformers import pipeline
3
  from gtts import gTTS
4
  import os
5
+ import numpy as np
6
 
7
  # Initialize Whisper for speech-to-text
8
  whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
 
11
  knowledge_base = {
12
  "what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.",
13
  "price of camry": "The Toyota Camry starts at $25,000."
14
+ "price of Tesla": "The Tesla starts at $60,000."
15
  }
16
 
17
  def transcribe(audio):
18
+ print(f"Transcribing audio: {type(audio)}")
19
+ try:
20
+ # Check if audio is a tuple (numpy array, sample rate)
21
+ if isinstance(audio, tuple):
22
+ audio_data, _ = audio # Extract numpy array, ignore sample rate
23
+ else:
24
+ audio_data = audio
25
+ result = whisper(audio_data)["text"]
26
+ print(f"Transcription result: {result}")
27
+ return result
28
+ except Exception as e:
29
+ print(f"Error in transcribe: {str(e)}")
30
+ import traceback
31
+ traceback.print_exc()
32
+ raise
33
 
34
  def text_to_speech(text):
35
+ print(f"Generating speech for text: {text}")
36
+ try:
37
+ tts = gTTS(text, lang="en")
38
+ tts.save("response.mp3")
39
+ print("Speech saved to response.mp3")
40
+ return "response.mp3"
41
+ except Exception as e:
42
+ print(f"Error in text_to_speech: {str(e)}")
43
+ import traceback
44
+ traceback.print_exc()
45
+ raise
46
 
47
  def answer_question(text):
48
+ print(f"Answering question: {text}")
49
+ try:
50
+ for key in knowledge_base:
51
+ if key in text.lower():
52
+ print(f"Found match for key: {key}")
53
+ return knowledge_base[key]
54
+ print("No match found in knowledge base")
55
+ return "Sorry, I can help with car availability and prices. Try again!"
56
+ except Exception as e:
57
+ print(f"Error in answer_question: {str(e)}")
58
+ import traceback
59
+ traceback.print_exc()
60
+ raise
61
 
62
  def process_audio(audio):
63
+ print(f"Processing audio: {type(audio)}")
64
+ try:
65
+ text = transcribe(audio)
66
+ response = answer_question(text)
67
+ audio_response = text_to_speech(response)
68
+ print(f"Process complete. Response: {response}, Audio: {audio_response}")
69
+ return response, audio_response
70
+ except Exception as e:
71
+ print(f"Error in process_audio: {str(e)}")
72
+ import traceback
73
+ traceback.print_exc()
74
+ raise
75
 
76
  # Gradio interface
77
  with gr.Blocks() as demo:
78
  gr.Markdown("# AI Support Agent: Car Dealership")
79
+ audio_input = gr.Audio(label="Speak to the Agent")
80
  text_output = gr.Textbox(label="Agent Response")
81
  audio_output = gr.Audio(label="Listen to Response")
82
  btn = gr.Button("Submit")
83
+ btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output])