gptChatYourTTS

Build error

App Files Files Community

gorkemgoknar commited on Dec 12, 2022

Commit

f5658d6

1 Parent(s): 1c989f3

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -1

app.py CHANGED Viewed

@@ -14,6 +14,56 @@ import numpy as np
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
 #emotion_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 #emotion_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")
@@ -140,6 +190,37 @@ def greet(character,your_voice,message,history):
   return html,history,"tts_output.wav"
 def greet_textonly(character,message,history):
   #gradios set_state/get_state had problems on embedded html!
@@ -192,6 +273,16 @@ examples=[['Gandalf','dragon.wav','Who are you sir?',{}]]
 history =   {"character": "None", "message_history" : [] }
 interface_mic = gr.Interface(fn=greet,
                         inputs=[gr.inputs.Dropdown(personality_choices),
                                 gr.inputs.Audio(source="microphone", type="filepath") ,
@@ -219,5 +310,5 @@ interface_file= gr.Interface(fn=greet_textonly,
-appinterface = gr.TabbedInterface([interface_mic,interface_file, interface_text], ["Chat with Mic Record","Chat with Audio Upload" , "Chat Text only"])
 appinterface.launch()

 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
+#### STT ###########
+########### STT English ##############
+state = gr.Variable()
+REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-stt-models"
+my_title = "STT-ChatGPT-TTS with Coqui"
+my_description = "TODO add description and reference: STT base from mbarnig/lb-de-fr-en-pt-coqui-stt-models  - 🐸 [Coqui.ai](https://https://coqui.ai/)."
+STT_LANGUAGES = [
+   "English",
+]
+EXAMPLES = [
+  ["examples/english.wav", "English", True, "Linda", "every window and roof which could command a view of the horrible performance was occupied"],
+]
+def reformat_freq(sr, y):
+    if sr not in (
+        48000,
+        16000,
+    ):  # Deepspeech only supports 16k, (we convert 48k -> 16k)
+        raise ValueError("Unsupported rate", sr)
+    if sr == 48000:
+        y = (
+            ((y / max(np.max(y), 1)) * 32767)
+            .reshape((-1, 3))
+            .mean(axis=1)
+            .astype("int16")
+        )
+        sr = 16000
+    return sr, y
+def stt_record(audio_record_buffer):
+    #using english model
+    acoustic_model = Model(hf_hub_download(repo_id = REPO_ID, filename = "english/model.tflite"))
+    scorer_path =  hf_hub_download(repo_id = REPO_ID, filename = "english/huge-vocabulary.scorer")
+    _, y = reformat_freq(*audio_record_buffer)
+    scorer = True # use scorer
+    if scorer:
+        acoustic_model.enableExternalScorer(scorer_path)
+        result = acoustic_model.stt(y)
+    else:
+        acoustic_model.disableExternalScorer()
+        result = acoustic_model.stt(y)
+    return result
 #emotion_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 #emotion_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")
   return html,history,"tts_output.wav"
+def greet_stt_to_tts(character,your_voice,message,history):
+  #gradios set_state/get_state had problems on embedded html!
+  history = history or {"character": character, "message_history" : [] }
+  #gradios set_state/get_state does not persist session for now using global
+  #global history
+  if history["character"] != character:
+    #switching character
+    history = {"character": character, "message_history" : [] }
+  # speech -> text (Whisper)
+  message = stt_record(your_voice)
+  response = get_chat_response(character,history=history["message_history"],input_txt=message)
+  os.system('tts --text "'+response+'" --model_name tts_models/multilingual/multi-dataset/your_tts --speaker_wav '+your_voice+' --language_idx "en"')
+  history["message_history"].append((message, response))
+  #emotion = get_emotion(response)
+  html = "<div class='chatbot'>"
+  for user_msg, resp_msg in history["message_history"]:
+      html += f"<div class='user_msg'>You: {user_msg}</div>"
+      html += f"<div class='resp_msg'>{character}: {resp_msg}</div>"
+  html += "</div>"
+  return html,history,"tts_output.wav"
 def greet_textonly(character,message,history):
   #gradios set_state/get_state had problems on embedded html!
 history =   {"character": "None", "message_history" : [] }
+interface_full = gr.Interface(fn=greet_stt_to_tts,
+                        inputs=[gr.inputs.Dropdown(personality_choices),
+                                gr.inputs.Audio(source="microphone", type="filepath") ,
+                                "text",
+                                "state"],
+                        outputs=["html","state",gr.outputs.Audio(type="file")],
+                        css=css, title="Chat with Your Voice", description=description,article=article ,
+                        live=True)
 interface_mic = gr.Interface(fn=greet,
                         inputs=[gr.inputs.Dropdown(personality_choices),
                                 gr.inputs.Audio(source="microphone", type="filepath") ,
+appinterface = gr.TabbedInterface([interface_mic,interface_full,interface_file, interface_text], ["Chat with Mic Record","Chat Speech -> Speech", "Chat with Audio Upload" , "Chat Text only"])
 appinterface.launch()