Spaces:

ysharma
/

test_speech_to_text

Build error

ysharma HF staff commited on Sep 22, 2022

Commit

ea180c8

•

1 Parent(s): d325a90

ss

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ LANGUAGES = list(CoquiTTS.langs.keys())
 print(f"Languages for Coqui are: {LANGUAGES}")
 coquiTTS = CoquiTTS()
 def whisper_stt(audio):
   # load audio and pad/trim it to fit 30 seconds
   audio = whisper.load_audio(audio)
@@ -34,21 +34,22 @@ def whisper_stt(audio):
   print(f"Detected language: {max(probs, key=probs.get)}")
   # decode the audio
-  options = whisper.DecodingOptions()
   result = whisper.decode(model, mel, options)
   # print the recognized text
   print(f"transcript is : {result.text}")
   return result.text
-# Processing input Audio
 def fun_engine(audio) :
   text1 = whisper_stt(audio)
   #text1 = model.transcribe(audio)["text"]
   text2 = lang_model_response(text1)
   speech = tts(text, 'en')
   return text1, text2, speech
 def lang_model_response(prompt):
   print(f"*****Inside meme_generate - Prompt is :{prompt}")
   if len(prompt) == 0:
@@ -77,7 +78,7 @@ def lang_model_response(prompt):
   print(f"Final response after splits is: {solution}")
   return solution
-#Text-to-Speech
 def tts(text, language):
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         coquiTTS.get_tts(text, fp, speaker = {"language" : language})

 print(f"Languages for Coqui are: {LANGUAGES}")
 coquiTTS = CoquiTTS()
+# Whisper - speeech-to-text
 def whisper_stt(audio):
   # load audio and pad/trim it to fit 30 seconds
   audio = whisper.load_audio(audio)
   print(f"Detected language: {max(probs, key=probs.get)}")
   # decode the audio
+  options = whisper.DecodingOptions(fp16 = False)
   result = whisper.decode(model, mel, options)
   # print the recognized text
   print(f"transcript is : {result.text}")
   return result.text
+# Driver function
 def fun_engine(audio) :
   text1 = whisper_stt(audio)
   #text1 = model.transcribe(audio)["text"]
   text2 = lang_model_response(text1)
   speech = tts(text, 'en')
   return text1, text2, speech
+# LLM - Bloom Response
 def lang_model_response(prompt):
   print(f"*****Inside meme_generate - Prompt is :{prompt}")
   if len(prompt) == 0:
   print(f"Final response after splits is: {solution}")
   return solution
+# Coqui - Text-to-Speech
 def tts(text, language):
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         coquiTTS.get_tts(text, fp, speaker = {"language" : language})