import tempfile import gradio as gr from neon_tts_plugin_coqui import CoquiTTS LANGUAGES = list(CoquiTTS.langs.keys()) LANGUAGES = LANGUAGES + ['cn', 'jp'] default_lang = "en" #import whisper #whisper_model = whisper.load_model("small") #whisper = gr.Interface.load(name="spaces/abidlabs/whisper-large-v2") whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2") #cn_a_jp = gr.Blocks.load(name="spaces/Yusin/anime-tts_yusin") #chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT") #chatgpt = gr.Blocks.load(name="spaces/seawolf2357/chatgptclone") import os import json import openai #session_token = os.environ.get('SessionToken') api_key = os.environ.get('api_key') #if you have OpenAI API key as a string, enable the below openai.api_key = api_key title = "Speech to ChatGPT to Speech" #info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)" #badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui" coquiTTS = CoquiTTS() # ChatGPT def chat_hf(audio, custom_token, language): try: whisper_text = translate(audio) if whisper_text == "ERROR: You have to either use the microphone or upload an audio file": gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)" else: #gpt_response = chatgpt(whisper_text, [], fn_index=0) #print(gpt_response) #gpt_response = gpt_response[0] gpt_response = openai_create(whisper_text) except: whisper_text = translate(audio) gpt_response = """Sorry, I'm quite busy right now, but please try again later :)""" # to voice with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language}) return whisper_text, gpt_response, fp.name # whisper #def translate(audio): # print(""" # — # Sending audio to Whisper ... # — # """) # # audio = whisper.load_audio(audio) # audio = whisper.pad_or_trim(audio) # # mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) # # _, probs = whisper_model.detect_language(mel) # # transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False) # # transcription = whisper.decode(whisper_model, mel, transcript_options) # # print("language spoken: " + transcription.language) # print("transcript: " + transcription.text) # print("———————————————————————————————————————————") # # return transcription.text def translate(audio): print(""" — Sending audio to Whisper ... — """) #_, text_result = whisper(audio, "", fn_index=0) text_result = whisper(audio, None, "transcribe", fn_index=0) print(text_result) return text_result def openai_create(prompt): response = openai.Completion.create( model="text-chat-davinci-002-20221122", prompt=prompt, temperature=0.9, max_tokens=150, top_p=1, frequency_penalty=0, presence_penalty=0.6, stop=[" Human:", " AI:"] ) print(response.choices[0].text) return response.choices[0].text with gr.Blocks() as blocks: gr.Markdown("

" + title + "

") #gr.Markdown(description) radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang) with gr.Row(equal_height=True):# equal_height=False with gr.Column():# variant="panel" audio_file = gr.Audio(source="microphone", type="filepath") custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token") with gr.Row():# mobile_collapse=False submit = gr.Button("Submit", variant="primary") with gr.Column(): text1 = gr.Textbox(label="Speech to Text") text2 = gr.Textbox(label="ChatGPT Response") audio = gr.Audio(label="Output", interactive=False) #gr.Markdown(info) #gr.Markdown("
" # +f'visitors badge' # +"
") # actions submit.click( chat_hf, [audio_file, custom_token, radio], [text1, text2, audio], ) #radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2) blocks.launch(debug=True)