Spaces:

ysharma
/

Voice-to-jokes

Runtime error

App Files Files Community

ysharma HF staff commited on Oct 7, 2022

Commit

a4fd732

1 Parent(s): 2682f2f

update

Browse files

Files changed (1) hide show

app.py +18 -18

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ dataset = load_dataset("ysharma/short_jokes")
 # Whisper: Speech-to-text
 model = whisper.load_model("base")
-model_med = whisper.load_model("medium")
 # Languages covered in Whisper - (exhaustive list) :
 #"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
 #"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
@@ -49,18 +49,18 @@ print(f"Languages for Coqui are: {LANGUAGES}")
 # Driver function
 def driver_fun(audio) :
-  transcribe, translation, lang = whisper_stt(audio)
   #text1 = model.transcribe(audio)["text"]
   #For now only taking in English text for Bloom prompting as inference model is not high spec
   #text_generated = lang_model_response(transcribe, lang)
   #text_generated_en = lang_model_response(translation, 'en')
-  if lang in ['es', 'fr']:
-    speech = tts(transcribe, lang)
-  else:
-    speech = tts(translation, 'en') #'en')
-  return transcribe, translation, speech
 # Whisper - speech-to-text
@@ -79,16 +79,16 @@ def whisper_stt(audio):
   print(f"Detected language: {max(probs, key=probs.get)}")
   # decode the audio
-  options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
   options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
-  result_transc = whisper.decode(model_med, mel, options_transc)
   result_transl = whisper.decode(model_med, mel, options_transl)
   # print the recognized text
-  print(f"transcript is : {result_transc.text}")
   print(f"translation is : {result_transl.text}")
-  return result_transc.text, result_transl.text, lang
 # Coqui - Text-to-Speech
@@ -109,15 +109,15 @@ with demo:
         """)
   with gr.Row():
     with gr.Column():
-      in_audio = gr.Audio(source="microphone",  type="filepath", label='Record your voice command here in English, Spanish or French for best results-')  #type='filepath'
-      b1 = gr.Button("AI response pipeline (Whisper - Bloom - Coqui pipeline)")
       out_transcript = gr.Textbox(label= 'English/Spanish/French Transcript of your Audio using OpenAI Whisper')
-      out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
     with gr.Column():
-      out_audio = gr.Audio(label='AI response in Audio form in your language - This will be either in Spanish, or in French or in English for all other languages -')
-      out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
-      out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
-      b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text_en, out_audio])
 demo.launch(enable_queue=True, debug=True)

 # Whisper: Speech-to-text
 model = whisper.load_model("base")
+#model_med = whisper.load_model("medium")
 # Languages covered in Whisper - (exhaustive list) :
 #"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
 #"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
 # Driver function
 def driver_fun(audio) :
+  translation, lang = whisper_stt(audio)  # older : transcribe, translation, lang
   #text1 = model.transcribe(audio)["text"]
   #For now only taking in English text for Bloom prompting as inference model is not high spec
   #text_generated = lang_model_response(transcribe, lang)
   #text_generated_en = lang_model_response(translation, 'en')
+  #if lang in ['es', 'fr']:
+  #  speech = tts(transcribe, lang)
+  #else:
+  speech = tts(translation, 'en') #'en')
+  return translation, speech #transcribe,
 # Whisper - speech-to-text
   print(f"Detected language: {max(probs, key=probs.get)}")
   # decode the audio
+  #options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
   options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
+  #result_transc = whisper.decode(model_med, mel, options_transc)
   result_transl = whisper.decode(model_med, mel, options_transl)
   # print the recognized text
+  #print(f"transcript is : {result_transc.text}")
   print(f"translation is : {result_transl.text}")
+  return result_transl.text, lang #result_transc.text,
 # Coqui - Text-to-Speech
         """)
   with gr.Row():
     with gr.Column():
+      in_audio = gr.Audio(source="microphone",  type="filepath", label='Record your voice command here in English -')  #type='filepath'
+      b1 = gr.Button("AI Response")
       out_transcript = gr.Textbox(label= 'English/Spanish/French Transcript of your Audio using OpenAI Whisper')
+      #out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
     with gr.Column():
+      out_audio = gr.Audio(label='AI response in Audio form in English language')
+      #out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
+      #out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
+      b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
 demo.launch(enable_queue=True, debug=True)