Spaces:
Runtime error
Runtime error
update
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ dataset = load_dataset("ysharma/short_jokes")
|
|
13 |
|
14 |
# Whisper: Speech-to-text
|
15 |
model = whisper.load_model("base")
|
16 |
-
model_med = whisper.load_model("medium")
|
17 |
# Languages covered in Whisper - (exhaustive list) :
|
18 |
#"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
|
19 |
#"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
|
@@ -49,18 +49,18 @@ print(f"Languages for Coqui are: {LANGUAGES}")
|
|
49 |
|
50 |
# Driver function
|
51 |
def driver_fun(audio) :
|
52 |
-
|
53 |
#text1 = model.transcribe(audio)["text"]
|
54 |
|
55 |
#For now only taking in English text for Bloom prompting as inference model is not high spec
|
56 |
#text_generated = lang_model_response(transcribe, lang)
|
57 |
#text_generated_en = lang_model_response(translation, 'en')
|
58 |
|
59 |
-
if lang in ['es', 'fr']:
|
60 |
-
|
61 |
-
else:
|
62 |
-
|
63 |
-
return
|
64 |
|
65 |
|
66 |
# Whisper - speech-to-text
|
@@ -79,16 +79,16 @@ def whisper_stt(audio):
|
|
79 |
print(f"Detected language: {max(probs, key=probs.get)}")
|
80 |
|
81 |
# decode the audio
|
82 |
-
options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
|
83 |
options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
|
84 |
-
result_transc = whisper.decode(model_med, mel, options_transc)
|
85 |
result_transl = whisper.decode(model_med, mel, options_transl)
|
86 |
|
87 |
# print the recognized text
|
88 |
-
print(f"transcript is : {result_transc.text}")
|
89 |
print(f"translation is : {result_transl.text}")
|
90 |
|
91 |
-
return
|
92 |
|
93 |
|
94 |
# Coqui - Text-to-Speech
|
@@ -109,15 +109,15 @@ with demo:
|
|
109 |
""")
|
110 |
with gr.Row():
|
111 |
with gr.Column():
|
112 |
-
in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice command here in English
|
113 |
-
b1 = gr.Button("AI
|
114 |
out_transcript = gr.Textbox(label= 'English/Spanish/French Transcript of your Audio using OpenAI Whisper')
|
115 |
-
out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
116 |
with gr.Column():
|
117 |
-
out_audio = gr.Audio(label='AI response in Audio form in
|
118 |
-
out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
|
119 |
-
out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
120 |
|
121 |
-
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text_en,
|
122 |
|
123 |
demo.launch(enable_queue=True, debug=True)
|
|
|
13 |
|
14 |
# Whisper: Speech-to-text
|
15 |
model = whisper.load_model("base")
|
16 |
+
#model_med = whisper.load_model("medium")
|
17 |
# Languages covered in Whisper - (exhaustive list) :
|
18 |
#"en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian",
|
19 |
#"ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish",
|
|
|
49 |
|
50 |
# Driver function
|
51 |
def driver_fun(audio) :
|
52 |
+
translation, lang = whisper_stt(audio) # older : transcribe, translation, lang
|
53 |
#text1 = model.transcribe(audio)["text"]
|
54 |
|
55 |
#For now only taking in English text for Bloom prompting as inference model is not high spec
|
56 |
#text_generated = lang_model_response(transcribe, lang)
|
57 |
#text_generated_en = lang_model_response(translation, 'en')
|
58 |
|
59 |
+
#if lang in ['es', 'fr']:
|
60 |
+
# speech = tts(transcribe, lang)
|
61 |
+
#else:
|
62 |
+
speech = tts(translation, 'en') #'en')
|
63 |
+
return translation, speech #transcribe,
|
64 |
|
65 |
|
66 |
# Whisper - speech-to-text
|
|
|
79 |
print(f"Detected language: {max(probs, key=probs.get)}")
|
80 |
|
81 |
# decode the audio
|
82 |
+
#options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
|
83 |
options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
|
84 |
+
#result_transc = whisper.decode(model_med, mel, options_transc)
|
85 |
result_transl = whisper.decode(model_med, mel, options_transl)
|
86 |
|
87 |
# print the recognized text
|
88 |
+
#print(f"transcript is : {result_transc.text}")
|
89 |
print(f"translation is : {result_transl.text}")
|
90 |
|
91 |
+
return result_transl.text, lang #result_transc.text,
|
92 |
|
93 |
|
94 |
# Coqui - Text-to-Speech
|
|
|
109 |
""")
|
110 |
with gr.Row():
|
111 |
with gr.Column():
|
112 |
+
in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice command here in English -') #type='filepath'
|
113 |
+
b1 = gr.Button("AI Response")
|
114 |
out_transcript = gr.Textbox(label= 'English/Spanish/French Transcript of your Audio using OpenAI Whisper')
|
115 |
+
#out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
116 |
with gr.Column():
|
117 |
+
out_audio = gr.Audio(label='AI response in Audio form in English language')
|
118 |
+
#out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
|
119 |
+
#out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
120 |
|
121 |
+
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
|
122 |
|
123 |
demo.launch(enable_queue=True, debug=True)
|