ysharma HF staff commited on
Commit
22db94b
1 Parent(s): efdf05e
Files changed (1) hide show
  1. app.py +36 -27
app.py CHANGED
@@ -92,14 +92,7 @@ def whisper_stt(audio):
92
  # print the recognized text
93
  print(f"transcript is : {result_transc.text}")
94
  print(f"translation is : {result_transl.text}")
95
-
96
- # decode the audio
97
- #options = whisper.DecodingOptions(fp16 = False, language='en') #lang
98
- #result = whisper.decode(model, mel, options)
99
-
100
- # print the recognized text
101
- # print(f"transcript is : {result.text}")
102
- # return result.text, lang
103
  return result_transc.text, result_transl.text, lang
104
 
105
 
@@ -146,10 +139,10 @@ def lang_model_response(prompt, language):
146
  output = response.json()
147
  output_tmp = output[0]['generated_text']
148
  print(f"Bloom API Response is : {output_tmp}")
149
- if language == 'en':
150
- solution = output_tmp.split("Answer: ")[2].split("\n")[0]
151
- else:
152
- solution = output_tmp.split(".")[1]
153
  print(f"Final Bloom Response after splits is: {solution}")
154
  return solution
155
 
@@ -163,19 +156,35 @@ def tts(text, language):
163
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
164
  return fp.name
165
 
166
- #demo = gr.Blocks()
167
- #with demo:
168
- # gr.Markdown("<h1><center>Testing</center></h1>")
169
-
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- gr.Interface(
172
- title = 'Testing Whisper',
173
- fn=driver_fun,
174
- inputs=[
175
- gr.Audio(source="microphone", type="filepath"), #streaming = True,
176
- # "state"
177
- ],
178
- outputs=[
179
- "textbox", "textbox", "textbox", "textbox", "audio",
180
- ],
181
- live=True).launch()
 
 
 
 
 
92
  # print the recognized text
93
  print(f"transcript is : {result_transc.text}")
94
  print(f"translation is : {result_transl.text}")
95
+
 
 
 
 
 
 
 
96
  return result_transc.text, result_transl.text, lang
97
 
98
 
 
139
  output = response.json()
140
  output_tmp = output[0]['generated_text']
141
  print(f"Bloom API Response is : {output_tmp}")
142
+ #if language == 'en':
143
+ solution = output_tmp.split("Answer: ")[2].split("\n")[0]
144
+ #else:
145
+ # solution = output_tmp.split(".")[1]
146
  print(f"Final Bloom Response after splits is: {solution}")
147
  return solution
148
 
 
156
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
157
  return fp.name
158
 
159
+ demo = gr.Blocks()
160
+ with demo:
161
+ gr.Markdown("<h1><center>Talk to Your Multilingual AI Assistant</center></h1>")
162
+ gr.Markdown(
163
+ """Model pipeline consisting of - Whisper for Speech-to-text, Bloom for Text-generation, andd CoquiTTS for Text-To-Speech. <br> Front end using Gradio Block API.
164
+ """)
165
+ with gr.Row():
166
+ with gr.Column():
167
+ in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice here') #type='filepath'
168
+ b1 = gr.Button("AI response (Whisper - Bloom - Coqui pipeline)")
169
+ out_transcript = gr.Textbox(label= 'As is Transcript using OpenAI Whisper')
170
+ out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
171
+ with gr.Column():
172
+ out_audio = gr.Audio(label='AI response in Audio form in your preferred language')
173
+ out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
174
+ out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
175
 
176
+ b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text, out_audio])
177
+
178
+ demo.launch(enable_queue=True, debug=True)
179
+
180
+ #gr.Interface(
181
+ # title = 'Testing Whisper',
182
+ # fn=driver_fun,
183
+ # inputs=[
184
+ # gr.Audio(source="microphone", type="filepath"), #streaming = True,
185
+ # # "state"
186
+ # ],
187
+ # outputs=[
188
+ # "textbox", "textbox", "textbox", "textbox", "audio",
189
+ # ],
190
+ # live=True).launch()