Abhishek-D7 commited on
Commit
ecdf0ef
·
verified ·
1 Parent(s): 0e9b4cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -80,38 +80,42 @@ def ask_openai_with_rag(query):
80
  traceback.print_exc()
81
  return "[Error: Failed to generate response.]"
82
 
83
- def synthesize_speech(text, lang="en"):
84
  try:
85
- tts_pipeline = tts_models["en"]
86
- output = tts_pipeline(text)
87
- audio = output["audio"]
88
- # Ensure it's a numpy array (required by Gradio)
89
- if isinstance(audio, np.ndarray):
90
- return (audio, 22050)
91
- else:
92
- print("Unexpected audio type:", type(audio))
93
- return None
 
 
 
94
  except Exception as e:
95
  print("TTS error:", e)
96
  return None
97
 
98
 
 
99
  def full_pipeline(audio):
100
  transcription = transcribe_audio(audio)
101
- lang = detect_language(transcription)
102
 
103
  if "[Error" in transcription:
104
  return transcription, "Sorry, I couldn't understand that.", None
105
 
106
  reply = ask_openai_with_rag(transcription)
107
- tts_audio = synthesize_speech(reply, lang)
108
 
109
  # Ensure we don’t pass invalid audio to Gradio
110
- if not isinstance(tts_audio, tuple) or not isinstance(tts_audio[0], np.ndarray):
111
  return transcription, reply, None
112
 
113
  return transcription, reply, tts_audio
114
 
 
115
  # ---- GRADIO UI ----
116
  with gr.Blocks() as demo:
117
  gr.Markdown("# 🏠 Voice-Based Real Estate Assistant (Hindi + English)")
 
80
  traceback.print_exc()
81
  return "[Error: Failed to generate response.]"
82
 
83
+ def synthesize_speech(text):
84
  try:
85
+ # Always using English TTS
86
+ output = tts_models["en"](text)
87
+ audio = output["audio"] # usually float32 numpy array
88
+
89
+ # Normalize to int16 PCM
90
+ max_val = np.max(np.abs(audio))
91
+ if max_val > 0:
92
+ audio = audio / max_val
93
+ audio_int16 = (audio * 32767).astype(np.int16)
94
+
95
+ return (22050, audio_int16)
96
+
97
  except Exception as e:
98
  print("TTS error:", e)
99
  return None
100
 
101
 
102
+
103
  def full_pipeline(audio):
104
  transcription = transcribe_audio(audio)
 
105
 
106
  if "[Error" in transcription:
107
  return transcription, "Sorry, I couldn't understand that.", None
108
 
109
  reply = ask_openai_with_rag(transcription)
110
+ tts_audio = synthesize_speech(reply)
111
 
112
  # Ensure we don’t pass invalid audio to Gradio
113
+ if not isinstance(tts_audio, tuple) or not isinstance(tts_audio[1], np.ndarray):
114
  return transcription, reply, None
115
 
116
  return transcription, reply, tts_audio
117
 
118
+
119
  # ---- GRADIO UI ----
120
  with gr.Blocks() as demo:
121
  gr.Markdown("# 🏠 Voice-Based Real Estate Assistant (Hindi + English)")