ysharma HF staff commited on
Commit
4455967
1 Parent(s): a3b9251
Files changed (1) hide show
  1. app.py +7 -17
app.py CHANGED
@@ -29,21 +29,18 @@ def query(payload):
29
  # Language common in both the multilingual models - English, Chinese, Spanish, and French etc
30
  # Model 1: Whisper: Speech-to-text
31
  model = whisper.load_model("base")
32
- #model_med = whisper.load_model("medium")
33
 
34
 
35
  #Model 2: Text-to-Speech
36
  LANGUAGES = list(CoquiTTS.langs.keys())
37
  coquiTTS = CoquiTTS()
38
- print(f"Languages for Coqui are: {LANGUAGES}")
39
  #Languages for Coqui are: ['en', 'es', 'fr', 'de', 'pl', 'uk', 'ro', 'hu', 'el', 'bg', 'nl', 'fi', 'sl', 'lv', 'ga']
40
 
41
 
42
  # Driver function
43
  def driver_fun(audio) :
44
- #if audio is None:
45
 
46
- translation, lang = whisper_stt(audio) # older : transcribe, translation, lang
47
 
48
  random_val = random.randrange(0,231657)
49
  if random_val < 226657:
@@ -65,8 +62,8 @@ def driver_fun(audio) :
65
  joke = dataset_subset[indx_score]
66
  print(f"Joke is : {joke}")
67
 
68
- speech = tts(joke, 'en') #'en' # translation
69
- return translation, joke, speech #transcribe,
70
 
71
 
72
  # Whisper - speech-to-text
@@ -85,24 +82,18 @@ def whisper_stt(audio):
85
  print(f"Detected language: {max(probs, key=probs.get)}")
86
 
87
  # decode the audio
88
- #options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
89
  options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
90
- #result_transc = whisper.decode(model_med, mel, options_transc)
91
  result_transl = whisper.decode(model, mel, options_transl) #model_med
92
 
93
- # print the recognized text
94
- #print(f"transcript is : {result_transc.text}")
95
  print(f"translation is : {result_transl.text}")
96
 
97
- return result_transl.text, lang #result_transc.text,
98
 
99
 
100
  # Coqui - Text-to-Speech
101
  def tts(text, language):
102
  print(f"Inside tts - language is : {language}")
103
- #coqui_langs = ['en' ,'es' ,'fr' ,'de' ,'pl' ,'uk' ,'ro' ,'hu' ,'bg' ,'nl' ,'fi' ,'sl' ,'lv' ,'ga']
104
- #if language not in coqui_langs:
105
- # language = 'en'
106
  print(f"Text is : {text}")
107
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
108
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
@@ -119,12 +110,11 @@ with demo:
119
  in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice command here in English -') #type='filepath'
120
  b1 = gr.Button("AI Response")
121
  out_transcript = gr.Textbox(label= 'Transcript of your Audio using OpenAI Whisper')
122
- #out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
123
  with gr.Column():
124
  out_audio = gr.Audio(label='Audio response form CoquiTTS')
125
  out_generated_joke = gr.Textbox(label= 'Joke returned! ')
126
- #out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
127
-
128
  b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_generated_joke, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
129
  with gr.Row():
130
  gr.Markdown(
 
29
  # Language common in both the multilingual models - English, Chinese, Spanish, and French etc
30
  # Model 1: Whisper: Speech-to-text
31
  model = whisper.load_model("base")
 
32
 
33
 
34
  #Model 2: Text-to-Speech
35
  LANGUAGES = list(CoquiTTS.langs.keys())
36
  coquiTTS = CoquiTTS()
 
37
  #Languages for Coqui are: ['en', 'es', 'fr', 'de', 'pl', 'uk', 'ro', 'hu', 'el', 'bg', 'nl', 'fi', 'sl', 'lv', 'ga']
38
 
39
 
40
  # Driver function
41
  def driver_fun(audio) :
 
42
 
43
+ translation, lang = whisper_stt(audio)
44
 
45
  random_val = random.randrange(0,231657)
46
  if random_val < 226657:
 
62
  joke = dataset_subset[indx_score]
63
  print(f"Joke is : {joke}")
64
 
65
+ speech = tts(joke, 'en')
66
+ return translation, joke, speech
67
 
68
 
69
  # Whisper - speech-to-text
 
82
  print(f"Detected language: {max(probs, key=probs.get)}")
83
 
84
  # decode the audio
 
85
  options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
 
86
  result_transl = whisper.decode(model, mel, options_transl) #model_med
87
 
88
+ # print the transcribed text
 
89
  print(f"translation is : {result_transl.text}")
90
 
91
+ return result_transl.text, lang
92
 
93
 
94
  # Coqui - Text-to-Speech
95
  def tts(text, language):
96
  print(f"Inside tts - language is : {language}")
 
 
 
97
  print(f"Text is : {text}")
98
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
99
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
 
110
  in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice command here in English -') #type='filepath'
111
  b1 = gr.Button("AI Response")
112
  out_transcript = gr.Textbox(label= 'Transcript of your Audio using OpenAI Whisper')
113
+
114
  with gr.Column():
115
  out_audio = gr.Audio(label='Audio response form CoquiTTS')
116
  out_generated_joke = gr.Textbox(label= 'Joke returned! ')
117
+
 
118
  b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_generated_joke, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
119
  with gr.Row():
120
  gr.Markdown(