ysharma HF staff commited on
Commit
d85011f
1 Parent(s): 65b4927
Files changed (1) hide show
  1. app.py +14 -19
app.py CHANGED
@@ -11,7 +11,7 @@ from neon_tts_plugin_coqui import CoquiTTS
11
  # Whisper: Speech-to-text
12
  model = whisper.load_model("base")
13
  model_med = whisper.load_model("medium")
14
- # Languages covered in Whisper - (exhaustive list) :
15
  #"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
16
  #"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
17
  #"pl": "polish", "ca": "catalan", "nl": "dutch", "ar": "arabic", "sv": "swedish",
@@ -139,9 +139,12 @@ def lang_model_response(prompt, language):
139
  output = response.json()
140
  output_tmp = output[0]['generated_text']
141
  print(f"Bloom API Response is : {output_tmp}")
142
- #if language == 'en':
143
- solution = output_tmp.split("Answer: ")[2].split("\n")[0]
144
- #else:
 
 
 
145
  # solution = output_tmp.split(".")[1]
146
  print(f"Final Bloom Response after splits is: {solution}")
147
  return solution
@@ -160,31 +163,23 @@ demo = gr.Blocks()
160
  with demo:
161
  gr.Markdown("<h1><center>Talk to Your Multilingual AI Assistant</center></h1>")
162
  gr.Markdown(
163
- """This is just a duplicate repo of https://huggingface.co/spaces/ysharma/Talk_to_Multilingual_AI_WhisperBloomCoqui <br><br>Going forward this duplicated space will not be maintained.
164
  """)
165
  with gr.Row():
166
  with gr.Column():
167
  in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice here') #type='filepath'
168
- b1 = gr.Button("AI response (Whisper - Bloom - Coqui pipeline)")
169
  out_transcript = gr.Textbox(label= 'As is Transcript using OpenAI Whisper')
170
  out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
 
171
  with gr.Column():
172
  out_audio = gr.Audio(label='AI response in Audio form in your preferred language')
173
  out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
174
  out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
175
 
 
 
 
176
  b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text_en, out_audio])
177
 
178
- demo.launch(enable_queue=True, debug=True)
179
-
180
- #gr.Interface(
181
- # title = 'Testing Whisper',
182
- # fn=driver_fun,
183
- # inputs=[
184
- # gr.Audio(source="microphone", type="filepath"), #streaming = True,
185
- # # "state"
186
- # ],
187
- # outputs=[
188
- # "textbox", "textbox", "textbox", "textbox", "audio",
189
- # ],
190
- # live=True).launch()
 
11
  # Whisper: Speech-to-text
12
  model = whisper.load_model("base")
13
  model_med = whisper.load_model("medium")
14
+ # Languages covered in Whisper - (exhaustive list) :
15
  #"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
16
  #"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
17
  #"pl": "polish", "ca": "catalan", "nl": "dutch", "ar": "arabic", "sv": "swedish",
 
139
  output = response.json()
140
  output_tmp = output[0]['generated_text']
141
  print(f"Bloom API Response is : {output_tmp}")
142
+ if language == 'en':
143
+ solution = output_tmp.split("Answer: ")[2].split("\n")[0]
144
+ elif language == 'es':
145
+ solution = output_tmp.split("Responder: ")[2].split("\n")[0]
146
+ elif language == 'fr':
147
+ solution = output_tmp.split("Réponse: ")[2].split("\n")[0]
148
  # solution = output_tmp.split(".")[1]
149
  print(f"Final Bloom Response after splits is: {solution}")
150
  return solution
 
163
  with demo:
164
  gr.Markdown("<h1><center>Talk to Your Multilingual AI Assistant</center></h1>")
165
  gr.Markdown(
166
+ """Model pipeline consisting of - <br>- [**Whisper**](https://github.com/openai/whisper)for Speech-to-text, <br>- [**Bloom**](https://huggingface.co/bigscience/bloom) for Text-generation, and <br>- [**CoquiTTS**](https://huggingface.co/coqui) for Text-To-Speech. <br><br> Front end is built using [**Gradio Block API**](https://gradio.app/docs/#blocks).<br>All three models are Multilingual, however, there are only these three overlapping languages among them - Spanish (es), French(fr), and English(en). Hence it would be suggested to test using these languages to get the best results out of this ML-App. If an English voice input is given then both the textbox on the left-hand side would show the same transcripts. However, if the input is either in Spanish or French, then the first textbox would show the language transcript, while the next one would show its English translations.
167
  """)
168
  with gr.Row():
169
  with gr.Column():
170
  in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice here') #type='filepath'
171
+ b1 = gr.Button("Whisper") #- Bloom - Coqui pipeline)")
172
  out_transcript = gr.Textbox(label= 'As is Transcript using OpenAI Whisper')
173
  out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
174
+ out_lang = gr.Textbox(visible=False)
175
  with gr.Column():
176
  out_audio = gr.Audio(label='AI response in Audio form in your preferred language')
177
  out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
178
  out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
179
 
180
+ b1.click(whisper_stt, inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_lang])
181
+ b2.click(
182
+
183
  b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text_en, out_audio])
184
 
185
+ demo.launch(enable_queue=True, debug=True)