ysharma HF staff commited on
Commit
a4fd732
1 Parent(s): 2682f2f
Files changed (1) hide show
  1. app.py +18 -18
app.py CHANGED
@@ -13,7 +13,7 @@ dataset = load_dataset("ysharma/short_jokes")
13
 
14
  # Whisper: Speech-to-text
15
  model = whisper.load_model("base")
16
- model_med = whisper.load_model("medium")
17
  # Languages covered in Whisper - (exhaustive list) :
18
  #"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
19
  #"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
@@ -49,18 +49,18 @@ print(f"Languages for Coqui are: {LANGUAGES}")
49
 
50
  # Driver function
51
  def driver_fun(audio) :
52
- transcribe, translation, lang = whisper_stt(audio)
53
  #text1 = model.transcribe(audio)["text"]
54
 
55
  #For now only taking in English text for Bloom prompting as inference model is not high spec
56
  #text_generated = lang_model_response(transcribe, lang)
57
  #text_generated_en = lang_model_response(translation, 'en')
58
 
59
- if lang in ['es', 'fr']:
60
- speech = tts(transcribe, lang)
61
- else:
62
- speech = tts(translation, 'en') #'en')
63
- return transcribe, translation, speech
64
 
65
 
66
  # Whisper - speech-to-text
@@ -79,16 +79,16 @@ def whisper_stt(audio):
79
  print(f"Detected language: {max(probs, key=probs.get)}")
80
 
81
  # decode the audio
82
- options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
83
  options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
84
- result_transc = whisper.decode(model_med, mel, options_transc)
85
  result_transl = whisper.decode(model_med, mel, options_transl)
86
 
87
  # print the recognized text
88
- print(f"transcript is : {result_transc.text}")
89
  print(f"translation is : {result_transl.text}")
90
 
91
- return result_transc.text, result_transl.text, lang
92
 
93
 
94
  # Coqui - Text-to-Speech
@@ -109,15 +109,15 @@ with demo:
109
  """)
110
  with gr.Row():
111
  with gr.Column():
112
- in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice command here in English, Spanish or French for best results-') #type='filepath'
113
- b1 = gr.Button("AI response pipeline (Whisper - Bloom - Coqui pipeline)")
114
  out_transcript = gr.Textbox(label= 'English/Spanish/French Transcript of your Audio using OpenAI Whisper')
115
- out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
116
  with gr.Column():
117
- out_audio = gr.Audio(label='AI response in Audio form in your language - This will be either in Spanish, or in French or in English for all other languages -')
118
- out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
119
- out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
120
 
121
- b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text_en, out_audio])
122
 
123
  demo.launch(enable_queue=True, debug=True)
 
13
 
14
  # Whisper: Speech-to-text
15
  model = whisper.load_model("base")
16
+ #model_med = whisper.load_model("medium")
17
  # Languages covered in Whisper - (exhaustive list) :
18
  #"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
19
  #"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
 
49
 
50
  # Driver function
51
  def driver_fun(audio) :
52
+ translation, lang = whisper_stt(audio) # older : transcribe, translation, lang
53
  #text1 = model.transcribe(audio)["text"]
54
 
55
  #For now only taking in English text for Bloom prompting as inference model is not high spec
56
  #text_generated = lang_model_response(transcribe, lang)
57
  #text_generated_en = lang_model_response(translation, 'en')
58
 
59
+ #if lang in ['es', 'fr']:
60
+ # speech = tts(transcribe, lang)
61
+ #else:
62
+ speech = tts(translation, 'en') #'en')
63
+ return translation, speech #transcribe,
64
 
65
 
66
  # Whisper - speech-to-text
 
79
  print(f"Detected language: {max(probs, key=probs.get)}")
80
 
81
  # decode the audio
82
+ #options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
83
  options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
84
+ #result_transc = whisper.decode(model_med, mel, options_transc)
85
  result_transl = whisper.decode(model_med, mel, options_transl)
86
 
87
  # print the recognized text
88
+ #print(f"transcript is : {result_transc.text}")
89
  print(f"translation is : {result_transl.text}")
90
 
91
+ return result_transl.text, lang #result_transc.text,
92
 
93
 
94
  # Coqui - Text-to-Speech
 
109
  """)
110
  with gr.Row():
111
  with gr.Column():
112
+ in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice command here in English -') #type='filepath'
113
+ b1 = gr.Button("AI Response")
114
  out_transcript = gr.Textbox(label= 'English/Spanish/French Transcript of your Audio using OpenAI Whisper')
115
+ #out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
116
  with gr.Column():
117
+ out_audio = gr.Audio(label='AI response in Audio form in English language')
118
+ #out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
119
+ #out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
120
 
121
+ b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
122
 
123
  demo.launch(enable_queue=True, debug=True)