gorkemgoknar commited on
Commit
f5658d6
·
1 Parent(s): 1c989f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -1
app.py CHANGED
@@ -14,6 +14,56 @@ import numpy as np
14
  from TTS.utils.manage import ModelManager
15
  from TTS.utils.synthesizer import Synthesizer
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  #emotion_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
18
  #emotion_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")
19
 
@@ -140,6 +190,37 @@ def greet(character,your_voice,message,history):
140
  return html,history,"tts_output.wav"
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def greet_textonly(character,message,history):
144
 
145
  #gradios set_state/get_state had problems on embedded html!
@@ -192,6 +273,16 @@ examples=[['Gandalf','dragon.wav','Who are you sir?',{}]]
192
 
193
  history = {"character": "None", "message_history" : [] }
194
 
 
 
 
 
 
 
 
 
 
 
195
  interface_mic = gr.Interface(fn=greet,
196
  inputs=[gr.inputs.Dropdown(personality_choices),
197
  gr.inputs.Audio(source="microphone", type="filepath") ,
@@ -219,5 +310,5 @@ interface_file= gr.Interface(fn=greet_textonly,
219
 
220
 
221
 
222
- appinterface = gr.TabbedInterface([interface_mic,interface_file, interface_text], ["Chat with Mic Record","Chat with Audio Upload" , "Chat Text only"])
223
  appinterface.launch()
 
14
  from TTS.utils.manage import ModelManager
15
  from TTS.utils.synthesizer import Synthesizer
16
 
17
+
18
+
19
+ #### STT ###########
20
+ ########### STT English ##############
21
+ state = gr.Variable()
22
+
23
+ REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-stt-models"
24
+
25
+ my_title = "STT-ChatGPT-TTS with Coqui"
26
+ my_description = "TODO add description and reference: STT base from mbarnig/lb-de-fr-en-pt-coqui-stt-models - 🐸 [Coqui.ai](https://https://coqui.ai/)."
27
+
28
+ STT_LANGUAGES = [
29
+ "English",
30
+ ]
31
+
32
+ EXAMPLES = [
33
+ ["examples/english.wav", "English", True, "Linda", "every window and roof which could command a view of the horrible performance was occupied"],
34
+ ]
35
+
36
+ def reformat_freq(sr, y):
37
+ if sr not in (
38
+ 48000,
39
+ 16000,
40
+ ): # Deepspeech only supports 16k, (we convert 48k -> 16k)
41
+ raise ValueError("Unsupported rate", sr)
42
+ if sr == 48000:
43
+ y = (
44
+ ((y / max(np.max(y), 1)) * 32767)
45
+ .reshape((-1, 3))
46
+ .mean(axis=1)
47
+ .astype("int16")
48
+ )
49
+ sr = 16000
50
+ return sr, y
51
+
52
+ def stt_record(audio_record_buffer):
53
+ #using english model
54
+ acoustic_model = Model(hf_hub_download(repo_id = REPO_ID, filename = "english/model.tflite"))
55
+ scorer_path = hf_hub_download(repo_id = REPO_ID, filename = "english/huge-vocabulary.scorer")
56
+ _, y = reformat_freq(*audio_record_buffer)
57
+ scorer = True # use scorer
58
+ if scorer:
59
+ acoustic_model.enableExternalScorer(scorer_path)
60
+ result = acoustic_model.stt(y)
61
+ else:
62
+ acoustic_model.disableExternalScorer()
63
+ result = acoustic_model.stt(y)
64
+ return result
65
+
66
+
67
  #emotion_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
68
  #emotion_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")
69
 
 
190
  return html,history,"tts_output.wav"
191
 
192
 
193
+
194
+ def greet_stt_to_tts(character,your_voice,message,history):
195
+
196
+ #gradios set_state/get_state had problems on embedded html!
197
+ history = history or {"character": character, "message_history" : [] }
198
+ #gradios set_state/get_state does not persist session for now using global
199
+ #global history
200
+
201
+ if history["character"] != character:
202
+ #switching character
203
+ history = {"character": character, "message_history" : [] }
204
+
205
+ # speech -> text (Whisper)
206
+ message = stt_record(your_voice)
207
+
208
+ response = get_chat_response(character,history=history["message_history"],input_txt=message)
209
+ os.system('tts --text "'+response+'" --model_name tts_models/multilingual/multi-dataset/your_tts --speaker_wav '+your_voice+' --language_idx "en"')
210
+
211
+ history["message_history"].append((message, response))
212
+
213
+ #emotion = get_emotion(response)
214
+
215
+ html = "<div class='chatbot'>"
216
+ for user_msg, resp_msg in history["message_history"]:
217
+ html += f"<div class='user_msg'>You: {user_msg}</div>"
218
+ html += f"<div class='resp_msg'>{character}: {resp_msg}</div>"
219
+ html += "</div>"
220
+
221
+ return html,history,"tts_output.wav"
222
+
223
+
224
  def greet_textonly(character,message,history):
225
 
226
  #gradios set_state/get_state had problems on embedded html!
 
273
 
274
  history = {"character": "None", "message_history" : [] }
275
 
276
+ interface_full = gr.Interface(fn=greet_stt_to_tts,
277
+ inputs=[gr.inputs.Dropdown(personality_choices),
278
+ gr.inputs.Audio(source="microphone", type="filepath") ,
279
+ "text",
280
+ "state"],
281
+ outputs=["html","state",gr.outputs.Audio(type="file")],
282
+ css=css, title="Chat with Your Voice", description=description,article=article ,
283
+ live=True)
284
+
285
+
286
  interface_mic = gr.Interface(fn=greet,
287
  inputs=[gr.inputs.Dropdown(personality_choices),
288
  gr.inputs.Audio(source="microphone", type="filepath") ,
 
310
 
311
 
312
 
313
+ appinterface = gr.TabbedInterface([interface_mic,interface_full,interface_file, interface_text], ["Chat with Mic Record","Chat Speech -> Speech", "Chat with Audio Upload" , "Chat Text only"])
314
  appinterface.launch()