import tempfile import gradio as gr from neon_tts_plugin_coqui import CoquiTTS #LANGUAGES = list(CoquiTTS.langs.keys()) #default_lang = "en" # ChatGPT from pyChatGPT import ChatGPT import whisper whisper_model = whisper.load_model("small") import os session_token = os.environ.get('SessionToken') title = "Speech to ChatGPT to Speech" #info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)" #badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui" coquiTTS = CoquiTTS() # ChatGPT def chat_hf(audio, custom_token, language): try: whisper_text = translate(audio) api = ChatGPT(session_token) resp = api.send_message(whisper_text) api.refresh_auth() # refresh the authorization token api.reset_conversation() # reset the conversation gpt_response = resp['message'] except: whisper_text = translate(audio) api = ChatGPT(custom_token) resp = api.send_message(whisper_text) api.refresh_auth() # refresh the authorization token api.reset_conversation() # reset the conversation gpt_response = resp['message'] # to voice with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language}) return whisper_text, gpt_response, fp.name # whisper def translate(audio): print(""" — Sending audio to Whisper ... — """) audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) _, probs = whisper_model.detect_language(mel) transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False) transcription = whisper.decode(whisper_model, mel, transcript_options) print("language spoken: " + transcription.language) print("transcript: " + transcription.text) print("———————————————————————————————————————————") return transcription.text with gr.Blocks() as blocks: gr.Markdown("

" + title + "

") #gr.Markdown(description) with gr.Row(equal_height=True):# equal_height=False with gr.Column():# variant="panel" radio = gr.Radio( label="Language", choices=LANGUAGES, value=default_lang ) audio_file = gr.inputs.Audio(source="microphone", type="filepath") custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token") with gr.Row():# mobile_collapse=False submit = gr.Button("Submit", variant="primary") with gr.Column(): text1 = gr.Textbox(label="Speech to Text") text2 = gr.Textbox(label="chatGPT response") audio = gr.Audio(label="Output", interactive=False) #gr.Markdown(info) #gr.Markdown("" # +f'

' # +"") # actions submit.click( chat_hf, [audio_file, custom_token, radio], [text1, text2, audio], ) radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2) blocks.launch(debug=True) text.submit(translate_language, inputs=[text], outputs=[language_tips_text, status_text, trigger_component, translated_language]) btn.click(translate_language, inputs=[text], outputs=[language_tips_text, status_text, trigger_component, translated_language]) trigger_component.change(fn=inference, inputs=[translated_language, styles], outputs=[status_text, gallery]) gr.HTML( """

Prompt公式

Prompt = 图片主体，细节词，修饰词关于各部分的构造方式和效果，可以参考YouPromptMe指南。更多的模型，请关注 PaddleHub 官方Repo ，如果你觉得不错，请star收藏吧。

同时，可以在 aistudio 上使用免费的GPU体验更多案例。

Prompt format

Prompt = object, details, description For more details, please refer to YouPromptMe Guide. There are more interesting models in PaddleHub, if you think it's great, welcome to star PaddleHub.

Besides, you can use free GPU resourses in aistudio to enjoy more cases, have fun.

""" )