Spaces:
Build error
Build error
ss
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ LANGUAGES = list(CoquiTTS.langs.keys())
|
|
20 |
print(f"Languages for Coqui are: {LANGUAGES}")
|
21 |
coquiTTS = CoquiTTS()
|
22 |
|
23 |
-
|
24 |
def whisper_stt(audio):
|
25 |
# load audio and pad/trim it to fit 30 seconds
|
26 |
audio = whisper.load_audio(audio)
|
@@ -34,21 +34,22 @@ def whisper_stt(audio):
|
|
34 |
print(f"Detected language: {max(probs, key=probs.get)}")
|
35 |
|
36 |
# decode the audio
|
37 |
-
options = whisper.DecodingOptions()
|
38 |
result = whisper.decode(model, mel, options)
|
39 |
|
40 |
# print the recognized text
|
41 |
print(f"transcript is : {result.text}")
|
42 |
return result.text
|
43 |
|
44 |
-
#
|
45 |
def fun_engine(audio) :
|
46 |
text1 = whisper_stt(audio)
|
47 |
#text1 = model.transcribe(audio)["text"]
|
48 |
text2 = lang_model_response(text1)
|
49 |
speech = tts(text, 'en')
|
50 |
return text1, text2, speech
|
51 |
-
|
|
|
52 |
def lang_model_response(prompt):
|
53 |
print(f"*****Inside meme_generate - Prompt is :{prompt}")
|
54 |
if len(prompt) == 0:
|
@@ -77,7 +78,7 @@ def lang_model_response(prompt):
|
|
77 |
print(f"Final response after splits is: {solution}")
|
78 |
return solution
|
79 |
|
80 |
-
#Text-to-Speech
|
81 |
def tts(text, language):
|
82 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
83 |
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
|
|
|
20 |
print(f"Languages for Coqui are: {LANGUAGES}")
|
21 |
coquiTTS = CoquiTTS()
|
22 |
|
23 |
+
# Whisper - speeech-to-text
|
24 |
def whisper_stt(audio):
|
25 |
# load audio and pad/trim it to fit 30 seconds
|
26 |
audio = whisper.load_audio(audio)
|
|
|
34 |
print(f"Detected language: {max(probs, key=probs.get)}")
|
35 |
|
36 |
# decode the audio
|
37 |
+
options = whisper.DecodingOptions(fp16 = False)
|
38 |
result = whisper.decode(model, mel, options)
|
39 |
|
40 |
# print the recognized text
|
41 |
print(f"transcript is : {result.text}")
|
42 |
return result.text
|
43 |
|
44 |
+
# Driver function
|
45 |
def fun_engine(audio) :
|
46 |
text1 = whisper_stt(audio)
|
47 |
#text1 = model.transcribe(audio)["text"]
|
48 |
text2 = lang_model_response(text1)
|
49 |
speech = tts(text, 'en')
|
50 |
return text1, text2, speech
|
51 |
+
|
52 |
+
# LLM - Bloom Response
|
53 |
def lang_model_response(prompt):
|
54 |
print(f"*****Inside meme_generate - Prompt is :{prompt}")
|
55 |
if len(prompt) == 0:
|
|
|
78 |
print(f"Final response after splits is: {solution}")
|
79 |
return solution
|
80 |
|
81 |
+
# Coqui - Text-to-Speech
|
82 |
def tts(text, language):
|
83 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
84 |
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
|