Spaces:

ysharma
/

test_speech_to_text

Build error

App Files Files Community

ysharma HF staff commited on Sep 22, 2022

Commit

7a9df81

•

1 Parent(s): 8f045d7

ss

Browse files

Files changed (1) hide show

app.py +21 -8

app.py CHANGED Viewed

@@ -15,14 +15,29 @@ HF_TOKEN = os.environ["HF_TOKEN"]
 headers = {"Authorization": f"Bearer {HF_TOKEN}"}
 #Language covered in Bloom : en, fr, esp, arb, hn, portu, Indonesian, Vietnamese, Chinese, tamil, telugu, bengali
 # Text-to-Speech
 LANGUAGES = list(CoquiTTS.langs.keys())
 print(f"Languages for Coqui are: {LANGUAGES}")
 #Languages for Coqui are: ['en', 'es', 'fr', 'de', 'pl', 'uk', 'ro', 'hu', 'el', 'bg', 'nl', 'fi', 'sl', 'lv', 'ga']
 coquiTTS = CoquiTTS()
 # Whisper - speeech-to-text
 def whisper_stt(audio):
   # load audio and pad/trim it to fit 30 seconds
   audio = whisper.load_audio(audio)
   audio = whisper.pad_or_trim(audio)
@@ -41,19 +56,16 @@ def whisper_stt(audio):
   # print the recognized text
   print(f"transcript is : {result.text}")
-  return result.text
-# Driver function
-def fun_engine(audio) :
-  text1 = whisper_stt(audio)
-  #text1 = model.transcribe(audio)["text"]
-  text2 = lang_model_response(text1)
-  speech = tts(text2, 'en')
-  return text1, text2, speech
 # LLM - Bloom Response
 def lang_model_response(prompt):
   print(f"*****Inside lang_model_response - Prompt is :{prompt}")
   if len(prompt) == 0:
     prompt = """Can you help me please?"""
@@ -82,6 +94,7 @@ def lang_model_response(prompt):
 # Coqui - Text-to-Speech
 def tts(text, language):
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         coquiTTS.get_tts(text, fp, speaker = {"language" : language})
         return fp.name

 headers = {"Authorization": f"Bearer {HF_TOKEN}"}
 #Language covered in Bloom : en, fr, esp, arb, hn, portu, Indonesian, Vietnamese, Chinese, tamil, telugu, bengali
+prompt = """Instruction: Given a Statement, produce a response in one sentence.
+Statement:
+"""
 # Text-to-Speech
 LANGUAGES = list(CoquiTTS.langs.keys())
 print(f"Languages for Coqui are: {LANGUAGES}")
 #Languages for Coqui are: ['en', 'es', 'fr', 'de', 'pl', 'uk', 'ro', 'hu', 'el', 'bg', 'nl', 'fi', 'sl', 'lv', 'ga']
 coquiTTS = CoquiTTS()
+# Driver function
+def fun_engine(audio) :
+  text1, lang = whisper_stt(audio)
+  #text1 = model.transcribe(audio)["text"]
+  text2 = lang_model_response(text1)
+  speech = tts(text2, lang) #'en')
+  return text1, text2, speech
 # Whisper - speeech-to-text
 def whisper_stt(audio):
+  print("Inside Whisper TTS")
   # load audio and pad/trim it to fit 30 seconds
   audio = whisper.load_audio(audio)
   audio = whisper.pad_or_trim(audio)
   # print the recognized text
   print(f"transcript is : {result.text}")
+  return result.text, lang
 # LLM - Bloom Response
 def lang_model_response(prompt):
   print(f"*****Inside lang_model_response - Prompt is :{prompt}")
+  p = """Instruction: Given a Statement, produce a Response in one sentence.
+  Statement: """
+  prompt = p + prompt + "\n" + "Response: "
   if len(prompt) == 0:
     prompt = """Can you help me please?"""
 # Coqui - Text-to-Speech
 def tts(text, language):
+    print(f"Inside tts - language is : {language}")
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         coquiTTS.get_tts(text, fp, speaker = {"language" : language})
         return fp.name