import tempfile import gradio as gr from neon_tts_plugin_coqui import CoquiTTS LANGUAGES = list(CoquiTTS.langs.keys()) default_lang = "en" # ChatGPT from pyChatGPT import ChatGPT import whisper whisper_model = whisper.load_model("small") #whisper = gr.Blocks.load(name="spaces/sanchit-gandhi/whisper-large-v2") import os session_token = os.environ.get('SessionToken') title = "Speech to ChatGPT to Speech" #info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)" #badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui" coquiTTS = CoquiTTS() # ChatGPT def chat_hf(audio, custom_token, language): try: whisper_text = translate(audio) api = ChatGPT(session_token) resp = api.send_message(whisper_text) api.refresh_auth() # refresh the authorization token api.reset_conversation() # reset the conversation gpt_response = resp['message'] except: whisper_text = translate(audio) api = ChatGPT(custom_token) resp = api.send_message(whisper_text) api.refresh_auth() # refresh the authorization token api.reset_conversation() # reset the conversation gpt_response = resp['message'] # to voice with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language}) return whisper_text, gpt_response, fp.name # whisper def translate(audio): print(""" — Sending audio to Whisper ... — """) audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) _, probs = whisper_model.detect_language(mel) transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False) transcription = whisper.decode(whisper_model, mel, transcript_options) print("language spoken: " + transcription.language) print("transcript: " + transcription.text) print("———————————————————————————————————————————") return transcription.text #def translate(audio): # print(""" # — # Sending audio to Whisper ... # — # """) # # text_result = whisper(audio, None, "transcribe", fn_index=0) # print(text_result) # return text_result with gr.Blocks() as blocks: gr.Markdown("