ysharma HF staff commited on
Commit
ea180c8
1 Parent(s): d325a90
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -20,7 +20,7 @@ LANGUAGES = list(CoquiTTS.langs.keys())
20
  print(f"Languages for Coqui are: {LANGUAGES}")
21
  coquiTTS = CoquiTTS()
22
 
23
-
24
  def whisper_stt(audio):
25
  # load audio and pad/trim it to fit 30 seconds
26
  audio = whisper.load_audio(audio)
@@ -34,21 +34,22 @@ def whisper_stt(audio):
34
  print(f"Detected language: {max(probs, key=probs.get)}")
35
 
36
  # decode the audio
37
- options = whisper.DecodingOptions()
38
  result = whisper.decode(model, mel, options)
39
 
40
  # print the recognized text
41
  print(f"transcript is : {result.text}")
42
  return result.text
43
 
44
- # Processing input Audio
45
  def fun_engine(audio) :
46
  text1 = whisper_stt(audio)
47
  #text1 = model.transcribe(audio)["text"]
48
  text2 = lang_model_response(text1)
49
  speech = tts(text, 'en')
50
  return text1, text2, speech
51
-
 
52
  def lang_model_response(prompt):
53
  print(f"*****Inside meme_generate - Prompt is :{prompt}")
54
  if len(prompt) == 0:
@@ -77,7 +78,7 @@ def lang_model_response(prompt):
77
  print(f"Final response after splits is: {solution}")
78
  return solution
79
 
80
- #Text-to-Speech
81
  def tts(text, language):
82
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
83
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
 
20
  print(f"Languages for Coqui are: {LANGUAGES}")
21
  coquiTTS = CoquiTTS()
22
 
23
+ # Whisper - speeech-to-text
24
  def whisper_stt(audio):
25
  # load audio and pad/trim it to fit 30 seconds
26
  audio = whisper.load_audio(audio)
 
34
  print(f"Detected language: {max(probs, key=probs.get)}")
35
 
36
  # decode the audio
37
+ options = whisper.DecodingOptions(fp16 = False)
38
  result = whisper.decode(model, mel, options)
39
 
40
  # print the recognized text
41
  print(f"transcript is : {result.text}")
42
  return result.text
43
 
44
+ # Driver function
45
  def fun_engine(audio) :
46
  text1 = whisper_stt(audio)
47
  #text1 = model.transcribe(audio)["text"]
48
  text2 = lang_model_response(text1)
49
  speech = tts(text, 'en')
50
  return text1, text2, speech
51
+
52
+ # LLM - Bloom Response
53
  def lang_model_response(prompt):
54
  print(f"*****Inside meme_generate - Prompt is :{prompt}")
55
  if len(prompt) == 0:
 
78
  print(f"Final response after splits is: {solution}")
79
  return solution
80
 
81
+ # Coqui - Text-to-Speech
82
  def tts(text, language):
83
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
84
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})