update
Browse files
app.py
CHANGED
@@ -92,14 +92,7 @@ def whisper_stt(audio):
|
|
92 |
# print the recognized text
|
93 |
print(f"transcript is : {result_transc.text}")
|
94 |
print(f"translation is : {result_transl.text}")
|
95 |
-
|
96 |
-
# decode the audio
|
97 |
-
#options = whisper.DecodingOptions(fp16 = False, language='en') #lang
|
98 |
-
#result = whisper.decode(model, mel, options)
|
99 |
-
|
100 |
-
# print the recognized text
|
101 |
-
# print(f"transcript is : {result.text}")
|
102 |
-
# return result.text, lang
|
103 |
return result_transc.text, result_transl.text, lang
|
104 |
|
105 |
|
@@ -146,10 +139,10 @@ def lang_model_response(prompt, language):
|
|
146 |
output = response.json()
|
147 |
output_tmp = output[0]['generated_text']
|
148 |
print(f"Bloom API Response is : {output_tmp}")
|
149 |
-
if language == 'en':
|
150 |
-
|
151 |
-
else:
|
152 |
-
|
153 |
print(f"Final Bloom Response after splits is: {solution}")
|
154 |
return solution
|
155 |
|
@@ -163,19 +156,35 @@ def tts(text, language):
|
|
163 |
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
|
164 |
return fp.name
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
"
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
|
92 |
# print the recognized text
|
93 |
print(f"transcript is : {result_transc.text}")
|
94 |
print(f"translation is : {result_transl.text}")
|
95 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
return result_transc.text, result_transl.text, lang
|
97 |
|
98 |
|
|
|
139 |
output = response.json()
|
140 |
output_tmp = output[0]['generated_text']
|
141 |
print(f"Bloom API Response is : {output_tmp}")
|
142 |
+
#if language == 'en':
|
143 |
+
solution = output_tmp.split("Answer: ")[2].split("\n")[0]
|
144 |
+
#else:
|
145 |
+
# solution = output_tmp.split(".")[1]
|
146 |
print(f"Final Bloom Response after splits is: {solution}")
|
147 |
return solution
|
148 |
|
|
|
156 |
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
|
157 |
return fp.name
|
158 |
|
159 |
+
demo = gr.Blocks()
|
160 |
+
with demo:
|
161 |
+
gr.Markdown("<h1><center>Talk to Your Multilingual AI Assistant</center></h1>")
|
162 |
+
gr.Markdown(
|
163 |
+
"""Model pipeline consisting of - Whisper for Speech-to-text, Bloom for Text-generation, andd CoquiTTS for Text-To-Speech. <br> Front end using Gradio Block API.
|
164 |
+
""")
|
165 |
+
with gr.Row():
|
166 |
+
with gr.Column():
|
167 |
+
in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice here') #type='filepath'
|
168 |
+
b1 = gr.Button("AI response (Whisper - Bloom - Coqui pipeline)")
|
169 |
+
out_transcript = gr.Textbox(label= 'As is Transcript using OpenAI Whisper')
|
170 |
+
out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
171 |
+
with gr.Column():
|
172 |
+
out_audio = gr.Audio(label='AI response in Audio form in your preferred language')
|
173 |
+
out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
|
174 |
+
out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
175 |
|
176 |
+
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text, out_audio])
|
177 |
+
|
178 |
+
demo.launch(enable_queue=True, debug=True)
|
179 |
+
|
180 |
+
#gr.Interface(
|
181 |
+
# title = 'Testing Whisper',
|
182 |
+
# fn=driver_fun,
|
183 |
+
# inputs=[
|
184 |
+
# gr.Audio(source="microphone", type="filepath"), #streaming = True,
|
185 |
+
# # "state"
|
186 |
+
# ],
|
187 |
+
# outputs=[
|
188 |
+
# "textbox", "textbox", "textbox", "textbox", "audio",
|
189 |
+
# ],
|
190 |
+
# live=True).launch()
|