ChatGPT-Speech

Runtime error

App Files Files Community

Yusin commited on Feb 1, 2023

Commit

dda3d27

•

1 Parent(s): cd20d75

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -60

app.py CHANGED Viewed

@@ -1,40 +1,33 @@
 import tempfile
 import gradio as gr
 from neon_tts_plugin_coqui import CoquiTTS
 LANGUAGES = list(CoquiTTS.langs.keys())
 LANGUAGES = LANGUAGES + ['cn', 'jp']
 default_lang = "en"
-#import whisper
-#whisper_model = whisper.load_model("small")
-#whisper = gr.Interface.load(name="spaces/abidlabs/whisper-large-v2")
 whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
-#cn_a_jp = gr.Blocks.load(name="spaces/Yusin/anime-tts_yusin")
-#chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
-#chatgpt = gr.Blocks.load(name="spaces/seawolf2357/chatgptclone")
-import os
-import json
-import openai
-#session_token = os.environ.get('SessionToken')
 api_key = os.environ.get('api_key')
 #if you have OpenAI API key as a string, enable the below
 openai.api_key = api_key
-title = "Speech to ChatGPT to Speech"
-#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
-#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
-coquiTTS = CoquiTTS()
 # ChatGPT
-def chat_hf(audio, custom_token, language):
     try:
         whisper_text = translate(audio)
         if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
             gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
         else:
-            #gpt_response = chatgpt(whisper_text, [], fn_index=0)
-            #print(gpt_response)
-            #gpt_response = gpt_response[0]
             gpt_response = openai_create(whisper_text)
     except:
@@ -42,35 +35,16 @@ def chat_hf(audio, custom_token, language):
         gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
     # to voice
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
-    return whisper_text, gpt_response, fp.name
-# whisper
-#def translate(audio):
-#    print("""
-#    —
-#    Sending audio to Whisper ...
-#    —
-#    """)
-#
-#    audio = whisper.load_audio(audio)
-#    audio = whisper.pad_or_trim(audio)
-#
-#    mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
-#
-#    _, probs = whisper_model.detect_language(mel)
-#
-#    transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
-#
-#    transcription = whisper.decode(whisper_model, mel, transcript_options)
-#
-#    print("language spoken: " + transcription.language)
-#    print("transcript: " + transcription.text)
-#    print("———————————————————————————————————————————")
-#
-#    return transcription.text
 def translate(audio):
     print("""
@@ -78,14 +52,13 @@ def translate(audio):
     Sending audio to Whisper ...
     —
     """)
-    #_, text_result = whisper(audio, "", fn_index=0)
     text_result = whisper(audio, None, "transcribe", fn_index=0)
     print(text_result)
     return text_result
 def openai_create(prompt):
     response = openai.Completion.create(
     model="text-davinci-003",
     prompt=prompt,
@@ -99,11 +72,9 @@ def openai_create(prompt):
     print(response.choices[0].text)
     return response.choices[0].text
 with gr.Blocks() as blocks:
-    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
-                + title
-                + "</h1>")
-    #gr.Markdown(description)
     radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
     with gr.Row(equal_height=True):# equal_height=False
         with gr.Column():# variant="panel"
@@ -115,18 +86,11 @@ with gr.Blocks() as blocks:
             text1 = gr.Textbox(label="Speech to Text")
             text2 = gr.Textbox(label="ChatGPT Response")
             audio = gr.Audio(label="Output", interactive=False)
-    #gr.Markdown(info)
-    #gr.Markdown("<center>"
-    #            +f'<img src={badge} alt="visitors badge"/>'
-    #            +"</center>")
     # actions
     submit.click(
         chat_hf,
         [audio_file, custom_token, radio],
         [text1, text2, audio],
     )
-    #radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)
 blocks.launch(debug=True)

+import os
+import json
+import openai
 import tempfile
 import gradio as gr
+import infer
+import config
 from neon_tts_plugin_coqui import CoquiTTS
+title = "Speech to ChatGPT to Speech"
+coquiTTS = CoquiTTS()
 LANGUAGES = list(CoquiTTS.langs.keys())
 LANGUAGES = LANGUAGES + ['cn', 'jp']
 default_lang = "en"
 whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
 api_key = os.environ.get('api_key')
 #if you have OpenAI API key as a string, enable the below
 openai.api_key = api_key
+pth_path = config.pth_path
+config_json = config.config_json
+net_g_ms, hps = infer.load_model(config_json, pth_path)
 # ChatGPT
+def chat_hf(audio, language):
     try:
         whisper_text = translate(audio)
         if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
             gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
         else:
             gpt_response = openai_create(whisper_text)
     except:
         gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
     # to voice
+    if language == 'cn' or 'jp':
+        text = infer.clean_text(gpt_response)
+        audio = infer.infer(text, net_g_ms, 2, "demo")
+        voice_out = (hps.data.sampling_rate, audio)
+    else:
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+            coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
+            voice_out = fp.name
+    return whisper_text, gpt_response, voice_out
 def translate(audio):
     print("""
     Sending audio to Whisper ...
     —
     """)
     text_result = whisper(audio, None, "transcribe", fn_index=0)
     print(text_result)
     return text_result
 def openai_create(prompt):
     response = openai.Completion.create(
     model="text-davinci-003",
     prompt=prompt,
     print(response.choices[0].text)
     return response.choices[0].text
 with gr.Blocks() as blocks:
+    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
     radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
     with gr.Row(equal_height=True):# equal_height=False
         with gr.Column():# variant="panel"
             text1 = gr.Textbox(label="Speech to Text")
             text2 = gr.Textbox(label="ChatGPT Response")
             audio = gr.Audio(label="Output", interactive=False)
     # actions
     submit.click(
         chat_hf,
         [audio_file, custom_token, radio],
         [text1, text2, audio],
     )
 blocks.launch(debug=True)