Spaces:
Build error
Build error
Commit
·
f5658d6
1
Parent(s):
1c989f3
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,56 @@ import numpy as np
|
|
14 |
from TTS.utils.manage import ModelManager
|
15 |
from TTS.utils.synthesizer import Synthesizer
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
#emotion_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
|
18 |
#emotion_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")
|
19 |
|
@@ -140,6 +190,37 @@ def greet(character,your_voice,message,history):
|
|
140 |
return html,history,"tts_output.wav"
|
141 |
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
def greet_textonly(character,message,history):
|
144 |
|
145 |
#gradios set_state/get_state had problems on embedded html!
|
@@ -192,6 +273,16 @@ examples=[['Gandalf','dragon.wav','Who are you sir?',{}]]
|
|
192 |
|
193 |
history = {"character": "None", "message_history" : [] }
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
interface_mic = gr.Interface(fn=greet,
|
196 |
inputs=[gr.inputs.Dropdown(personality_choices),
|
197 |
gr.inputs.Audio(source="microphone", type="filepath") ,
|
@@ -219,5 +310,5 @@ interface_file= gr.Interface(fn=greet_textonly,
|
|
219 |
|
220 |
|
221 |
|
222 |
-
appinterface = gr.TabbedInterface([interface_mic,interface_file, interface_text], ["Chat with Mic Record","Chat with Audio Upload" , "Chat Text only"])
|
223 |
appinterface.launch()
|
|
|
14 |
from TTS.utils.manage import ModelManager
|
15 |
from TTS.utils.synthesizer import Synthesizer
|
16 |
|
17 |
+
|
18 |
+
|
19 |
+
#### STT ###########
|
20 |
+
########### STT English ##############
|
21 |
+
state = gr.Variable()
|
22 |
+
|
23 |
+
REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-stt-models"
|
24 |
+
|
25 |
+
my_title = "STT-ChatGPT-TTS with Coqui"
|
26 |
+
my_description = "TODO add description and reference: STT base from mbarnig/lb-de-fr-en-pt-coqui-stt-models - 🐸 [Coqui.ai](https://https://coqui.ai/)."
|
27 |
+
|
28 |
+
STT_LANGUAGES = [
|
29 |
+
"English",
|
30 |
+
]
|
31 |
+
|
32 |
+
EXAMPLES = [
|
33 |
+
["examples/english.wav", "English", True, "Linda", "every window and roof which could command a view of the horrible performance was occupied"],
|
34 |
+
]
|
35 |
+
|
36 |
+
def reformat_freq(sr, y):
|
37 |
+
if sr not in (
|
38 |
+
48000,
|
39 |
+
16000,
|
40 |
+
): # Deepspeech only supports 16k, (we convert 48k -> 16k)
|
41 |
+
raise ValueError("Unsupported rate", sr)
|
42 |
+
if sr == 48000:
|
43 |
+
y = (
|
44 |
+
((y / max(np.max(y), 1)) * 32767)
|
45 |
+
.reshape((-1, 3))
|
46 |
+
.mean(axis=1)
|
47 |
+
.astype("int16")
|
48 |
+
)
|
49 |
+
sr = 16000
|
50 |
+
return sr, y
|
51 |
+
|
52 |
+
def stt_record(audio_record_buffer):
|
53 |
+
#using english model
|
54 |
+
acoustic_model = Model(hf_hub_download(repo_id = REPO_ID, filename = "english/model.tflite"))
|
55 |
+
scorer_path = hf_hub_download(repo_id = REPO_ID, filename = "english/huge-vocabulary.scorer")
|
56 |
+
_, y = reformat_freq(*audio_record_buffer)
|
57 |
+
scorer = True # use scorer
|
58 |
+
if scorer:
|
59 |
+
acoustic_model.enableExternalScorer(scorer_path)
|
60 |
+
result = acoustic_model.stt(y)
|
61 |
+
else:
|
62 |
+
acoustic_model.disableExternalScorer()
|
63 |
+
result = acoustic_model.stt(y)
|
64 |
+
return result
|
65 |
+
|
66 |
+
|
67 |
#emotion_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
|
68 |
#emotion_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")
|
69 |
|
|
|
190 |
return html,history,"tts_output.wav"
|
191 |
|
192 |
|
193 |
+
|
194 |
+
def greet_stt_to_tts(character,your_voice,message,history):
|
195 |
+
|
196 |
+
#gradios set_state/get_state had problems on embedded html!
|
197 |
+
history = history or {"character": character, "message_history" : [] }
|
198 |
+
#gradios set_state/get_state does not persist session for now using global
|
199 |
+
#global history
|
200 |
+
|
201 |
+
if history["character"] != character:
|
202 |
+
#switching character
|
203 |
+
history = {"character": character, "message_history" : [] }
|
204 |
+
|
205 |
+
# speech -> text (Whisper)
|
206 |
+
message = stt_record(your_voice)
|
207 |
+
|
208 |
+
response = get_chat_response(character,history=history["message_history"],input_txt=message)
|
209 |
+
os.system('tts --text "'+response+'" --model_name tts_models/multilingual/multi-dataset/your_tts --speaker_wav '+your_voice+' --language_idx "en"')
|
210 |
+
|
211 |
+
history["message_history"].append((message, response))
|
212 |
+
|
213 |
+
#emotion = get_emotion(response)
|
214 |
+
|
215 |
+
html = "<div class='chatbot'>"
|
216 |
+
for user_msg, resp_msg in history["message_history"]:
|
217 |
+
html += f"<div class='user_msg'>You: {user_msg}</div>"
|
218 |
+
html += f"<div class='resp_msg'>{character}: {resp_msg}</div>"
|
219 |
+
html += "</div>"
|
220 |
+
|
221 |
+
return html,history,"tts_output.wav"
|
222 |
+
|
223 |
+
|
224 |
def greet_textonly(character,message,history):
|
225 |
|
226 |
#gradios set_state/get_state had problems on embedded html!
|
|
|
273 |
|
274 |
history = {"character": "None", "message_history" : [] }
|
275 |
|
276 |
+
interface_full = gr.Interface(fn=greet_stt_to_tts,
|
277 |
+
inputs=[gr.inputs.Dropdown(personality_choices),
|
278 |
+
gr.inputs.Audio(source="microphone", type="filepath") ,
|
279 |
+
"text",
|
280 |
+
"state"],
|
281 |
+
outputs=["html","state",gr.outputs.Audio(type="file")],
|
282 |
+
css=css, title="Chat with Your Voice", description=description,article=article ,
|
283 |
+
live=True)
|
284 |
+
|
285 |
+
|
286 |
interface_mic = gr.Interface(fn=greet,
|
287 |
inputs=[gr.inputs.Dropdown(personality_choices),
|
288 |
gr.inputs.Audio(source="microphone", type="filepath") ,
|
|
|
310 |
|
311 |
|
312 |
|
313 |
+
appinterface = gr.TabbedInterface([interface_mic,interface_full,interface_file, interface_text], ["Chat with Mic Record","Chat Speech -> Speech", "Chat with Audio Upload" , "Chat Text only"])
|
314 |
appinterface.launch()
|