ysharma HF staff commited on
Commit
fe33c17
1 Parent(s): cfc38a8
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -1,20 +1,30 @@
1
  import os
2
- import numpy as np
3
  import gradio as gr
4
  import whisper
5
  import requests
 
 
6
 
 
7
  model = whisper.load_model("base")
8
- ##Bloom
 
9
  API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
10
  HF_TOKEN = os.environ["HF_TOKEN"]
11
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
12
 
 
 
 
 
13
 
14
- def fun(audio) : #, state=''):
 
15
  text1 = model.transcribe(audio)["text"]
16
  text2 = lang_model_response(text1)
17
- return text1, text2
 
18
 
19
  def lang_model_response(prompt):
20
  print(f"*****Inside meme_generate - Prompt is :{prompt}")
@@ -40,17 +50,23 @@ def lang_model_response(prompt):
40
  print(f"output is : {output}")
41
  output_tmp = output[0]['generated_text']
42
  print(f"output_tmp is: {output_tmp}")
43
- solution = output_tmp[0] #output_tmp.split("\nQ:")[0]
44
  print(f"Final response after splits is: {solution}")
45
-
46
- #meme_image, new_prompt = write_on_image(solution)
47
  return solution
48
-
49
- def fun1(audio, state=''):
50
- text = model.transcribe(audio)["text"]
51
- state += text + " "
52
- return state, state
53
-
 
 
 
 
 
 
 
 
54
  gr.Interface(
55
  title = 'Testing Whisper',
56
  fn=fun,
@@ -59,6 +75,6 @@ gr.Interface(
59
  # "state"
60
  ],
61
  outputs=[
62
- "textbox", "textbox"
63
  ],
64
  live=True).launch()
 
1
  import os
2
+ #import numpy as np
3
  import gradio as gr
4
  import whisper
5
  import requests
6
+ import tempfile
7
+ from neon_tts_plugin_coqui import CoquiTTS
8
 
9
+ # Whisper: Speech-to-text
10
  model = whisper.load_model("base")
11
+
12
+ # The LLM : Bloom
13
  API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
14
  HF_TOKEN = os.environ["HF_TOKEN"]
15
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
16
 
17
+ # Text-to-Speech
18
+ LANGUAGES = list(CoquiTTS.langs.keys())
19
+ coquiTTS = CoquiTTS()
20
+
21
 
22
+ # Processing input Audio
23
+ def fun(audio) :
24
  text1 = model.transcribe(audio)["text"]
25
  text2 = lang_model_response(text1)
26
+ speech = tts(text, language):
27
+ return text1, text2, speech
28
 
29
  def lang_model_response(prompt):
30
  print(f"*****Inside meme_generate - Prompt is :{prompt}")
 
50
  print(f"output is : {output}")
51
  output_tmp = output[0]['generated_text']
52
  print(f"output_tmp is: {output_tmp}")
53
+ solution = output_tmp.split(".")[1]
54
  print(f"Final response after splits is: {solution}")
 
 
55
  return solution
56
+
57
+ #Text-to-Speech
58
+ def tts(text, language):
59
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
60
+ coquiTTS.get_tts(text, fp, speaker = {"language" : language})
61
+ return fp.name
62
+
63
+ #inputs = [gr.Textbox(label="Input", value=CoquiTTS.langs["en"]["sentence"], max_lines=3),
64
+ # gr.Radio(label="Language", choices=LANGUAGES, value="en")]
65
+ #outputs = gr.Audio(label="Output")
66
+
67
+ demo = gr.Interface(fn=tts, inputs=inputs, outputs=outputs)
68
+
69
+ demo.launch()
70
  gr.Interface(
71
  title = 'Testing Whisper',
72
  fn=fun,
 
75
  # "state"
76
  ],
77
  outputs=[
78
+ "textbox", "textbox", "audio",
79
  ],
80
  live=True).launch()