Spaces:
Build error
Build error
update
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ from neon_tts_plugin_coqui import CoquiTTS
|
|
11 |
# Whisper: Speech-to-text
|
12 |
model = whisper.load_model("base")
|
13 |
model_med = whisper.load_model("medium")
|
14 |
-
# Languages covered in Whisper - (exhaustive list) :
|
15 |
#"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
|
16 |
#"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
|
17 |
#"pl": "polish", "ca": "catalan", "nl": "dutch", "ar": "arabic", "sv": "swedish",
|
@@ -139,9 +139,12 @@ def lang_model_response(prompt, language):
|
|
139 |
output = response.json()
|
140 |
output_tmp = output[0]['generated_text']
|
141 |
print(f"Bloom API Response is : {output_tmp}")
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
145 |
# solution = output_tmp.split(".")[1]
|
146 |
print(f"Final Bloom Response after splits is: {solution}")
|
147 |
return solution
|
@@ -160,31 +163,23 @@ demo = gr.Blocks()
|
|
160 |
with demo:
|
161 |
gr.Markdown("<h1><center>Talk to Your Multilingual AI Assistant</center></h1>")
|
162 |
gr.Markdown(
|
163 |
-
"""
|
164 |
""")
|
165 |
with gr.Row():
|
166 |
with gr.Column():
|
167 |
in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice here') #type='filepath'
|
168 |
-
b1 = gr.Button("
|
169 |
out_transcript = gr.Textbox(label= 'As is Transcript using OpenAI Whisper')
|
170 |
out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
|
|
171 |
with gr.Column():
|
172 |
out_audio = gr.Audio(label='AI response in Audio form in your preferred language')
|
173 |
out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
|
174 |
out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
175 |
|
|
|
|
|
|
|
176 |
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text_en, out_audio])
|
177 |
|
178 |
-
demo.launch(enable_queue=True, debug=True)
|
179 |
-
|
180 |
-
#gr.Interface(
|
181 |
-
# title = 'Testing Whisper',
|
182 |
-
# fn=driver_fun,
|
183 |
-
# inputs=[
|
184 |
-
# gr.Audio(source="microphone", type="filepath"), #streaming = True,
|
185 |
-
# # "state"
|
186 |
-
# ],
|
187 |
-
# outputs=[
|
188 |
-
# "textbox", "textbox", "textbox", "textbox", "audio",
|
189 |
-
# ],
|
190 |
-
# live=True).launch()
|
|
|
11 |
# Whisper: Speech-to-text
|
12 |
model = whisper.load_model("base")
|
13 |
model_med = whisper.load_model("medium")
|
14 |
+
# Languages covered in Whisper - (exhaustive list) :
|
15 |
#"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
|
16 |
#"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
|
17 |
#"pl": "polish", "ca": "catalan", "nl": "dutch", "ar": "arabic", "sv": "swedish",
|
|
|
139 |
output = response.json()
|
140 |
output_tmp = output[0]['generated_text']
|
141 |
print(f"Bloom API Response is : {output_tmp}")
|
142 |
+
if language == 'en':
|
143 |
+
solution = output_tmp.split("Answer: ")[2].split("\n")[0]
|
144 |
+
elif language == 'es':
|
145 |
+
solution = output_tmp.split("Responder: ")[2].split("\n")[0]
|
146 |
+
elif language == 'fr':
|
147 |
+
solution = output_tmp.split("Réponse: ")[2].split("\n")[0]
|
148 |
# solution = output_tmp.split(".")[1]
|
149 |
print(f"Final Bloom Response after splits is: {solution}")
|
150 |
return solution
|
|
|
163 |
with demo:
|
164 |
gr.Markdown("<h1><center>Talk to Your Multilingual AI Assistant</center></h1>")
|
165 |
gr.Markdown(
|
166 |
+
"""Model pipeline consisting of - <br>- [**Whisper**](https://github.com/openai/whisper)for Speech-to-text, <br>- [**Bloom**](https://huggingface.co/bigscience/bloom) for Text-generation, and <br>- [**CoquiTTS**](https://huggingface.co/coqui) for Text-To-Speech. <br><br> Front end is built using [**Gradio Block API**](https://gradio.app/docs/#blocks).<br>All three models are Multilingual, however, there are only these three overlapping languages among them - Spanish (es), French(fr), and English(en). Hence it would be suggested to test using these languages to get the best results out of this ML-App. If an English voice input is given then both the textbox on the left-hand side would show the same transcripts. However, if the input is either in Spanish or French, then the first textbox would show the language transcript, while the next one would show its English translations.
|
167 |
""")
|
168 |
with gr.Row():
|
169 |
with gr.Column():
|
170 |
in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice here') #type='filepath'
|
171 |
+
b1 = gr.Button("Whisper") #- Bloom - Coqui pipeline)")
|
172 |
out_transcript = gr.Textbox(label= 'As is Transcript using OpenAI Whisper')
|
173 |
out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
174 |
+
out_lang = gr.Textbox(visible=False)
|
175 |
with gr.Column():
|
176 |
out_audio = gr.Audio(label='AI response in Audio form in your preferred language')
|
177 |
out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
|
178 |
out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
179 |
|
180 |
+
b1.click(whisper_stt, inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_lang])
|
181 |
+
b2.click(
|
182 |
+
|
183 |
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text_en, out_audio])
|
184 |
|
185 |
+
demo.launch(enable_queue=True, debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|