import tempfile import gradio as gr from neon_tts_plugin_coqui import CoquiTTS LANGUAGES = list(CoquiTTS.langs.keys()) default_lang = "en" import telnetlib #import whisper #whisper_model = whisper.load_model("small") #whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2") chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT") import os import json session_token = os.environ.get('SessionToken') bypass_node = "https://gpt.pawan.krd" #api_endpoint = os.environ.get('API_EndPoint') # ChatGPT #from revChatGPT.ChatGPT import Chatbot #chatbot = Chatbot({"session_token": session_token}) # You can start a custom conversation import asyncio from pygpt import PyGPT import argparse import sys import asyncio from ChatGPT_lite.ChatGPT import Chatbot import os os.system("pip install numpy==1.23.1") os.system("pip install gradio==2.7.5.2") os.system("python -m pip install paddlepaddle-gpu==2.2.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html") os.system("hub install wav2lip==1.0.0") import gradio as gr import paddlehub as hub module = hub.Module(name="wav2lip") def inference(image,audio): module.wav2lip_transfer(face=image, audio=audio, output_dir='.', use_gpu=False) return "result.mp4" title = "" description = "" article = "" examples=[['monatest.jpeg',"game.wav"]] title = "Speech to ChatGPT to Speech" #info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)" #badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui" coquiTTS = CoquiTTS() chat_id = {'conversation_id': None, 'parent_id': None} headers = {'Authorization': 'yusin'} ############################################################## async def async_main(prompt): if session_token is None: print("Please provide a session token") print(session_token, bypass_node) chat = Chatbot(session_token, bypass_node) await asyncio.gather(chat.wait_for_ready()) while True: response = await chat.ask(prompt) print(f"\nBot: {response['answer']}\n") # Close sockets chat.close() # exit #sys.exit(0) print(response) return response['answer'] def sync_main(prompt): chat = Chatbot(session_token, bypass_node) # Create loop loop = asyncio.new_event_loop() # Set asyncio.set_event_loop(loop) # Run loop.run_until_complete(chat.wait_for_ready()) while True: response = loop.run_until_complete(chat.ask(prompt)) print(f"\nBot: {response['answer']}\n") # Close sockets chat.close() # stop asyncio event loop loop.stop() # exit #sys.exit(0) print(response) return response['answer'] ########################################### parser = argparse.ArgumentParser() parser.add_argument('--session_token', type=str, default=None) parser.add_argument('--bypass_node', type=str, default="https://gpt.pawan.krd") parser.add_argument('--async_mode', action='store_true') args = parser.parse_args() args.session_token = session_token ########################################## async def chat_gpt_ask(prompt): print(session_token) chat_gpt = PyGPT(session_token) await chat_gpt.connect() await chat_gpt.wait_for_ready() print(prompt) answer = await chat_gpt.ask(prompt) print(answer) await chat_gpt.disconnect() # ChatGPT def chat_hf(audio, custom_token, language): output = chatgpt(audio, "transcribe", fn_index=0) whisper_text, gpt_response = output[0], output[1] #whisper_text = translate(audio) #gpt_response = asyncio.run(async_main(whisper_text)) ''' try: whisper_text = translate(audio) if whisper_text == "ERROR: You have to either use the microphone or upload an audio file": gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)" else: #gpt_response = chatbot.ask(whisper_text, conversation_id=conversation_id, parent_id=None) #gpt_response = asyncio.run(chat_gpt_ask(whisper_text)) gpt_response = asyncio.run(async_main(whisper_text)) #gpt_response = async_main(whisper_text) #if chat_id['conversation_id'] != None: # data = {"content": whisper_text, "conversation_id": chat_id['conversation_id'], "parent_id": chat_id['parent_id']} #else: # data = {"content": whisper_text} #print(data) #res = requests.get('http://myip.ipip.net', timeout=5).text #print(res) #response = requests.post('api_endpoint', headers=headers, json=data, verify=False, timeout=5) #print('this is my answear', response.text) #chat_id['parent_id'] = response.json()["response_id"] #chat_id['conversation_id'] = response.json()["conversation_id"] #gpt_response = response.json()["content"] #response = requests.get('https://api.pawan.krd/chat/gpt?text=' + whisper_text + '&cache=false', verify=False, timeout=5) #print(response.text) #whisper_text = translate(audio) #api = ChatGPT(session_token) #resp = api.send_message(whisper_text) #api.refresh_auth() # refresh the authorization token #api.reset_conversation() # reset the conversation #gpt_response = resp['message'] except: whisper_text = translate(audio) gpt_response = """Sorry, I'm quite busy right now, but please try again later :)""" #whisper_text = translate(audio) #api = ChatGPT(custom_token) #resp = api.send_message(whisper_text) #api.refresh_auth() # refresh the authorization token #api.reset_conversation() # reset the conversation #gpt_response = resp['message'] ''' # to voice with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language}) return whisper_text, gpt_response, fp.name # whisper #def translate(audio): # print(""" # — # Sending audio to Whisper ... # — # """) # # audio = whisper.load_audio(audio) # audio = whisper.pad_or_trim(audio) # # mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) # # _, probs = whisper_model.detect_language(mel) # # transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False) # # transcription = whisper.decode(whisper_model, mel, transcript_options) # # print("language spoken: " + transcription.language) # print("transcript: " + transcription.text) # print("———————————————————————————————————————————") # # return transcription.text def translate(audio): print(""" — Sending audio to Whisper ... — """) text_result = whisper(audio, None, "transcribe", fn_index=0) print(text_result) return text_result with gr.Blocks() as blocks: gr.Markdown("

" + title + "

") #gr.Markdown(description) radio = gr.Radio(label="Language",choices=LANGUAGES,value=default_lang) with gr.Row(equal_height=True):# equal_height=False with gr.Column():# variant="panel" audio_file = gr.Audio(source="microphone",type="filepath") custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token") with gr.Row():# mobile_collapse=False submit = gr.Button("Submit", variant="primary") with gr.Column(): text1 = gr.Textbox(label="Speech to Text") text2 = gr.Textbox(label="ChatGPT Response") audio = gr.Audio(label="Output", interactive=False) iface = gr.Interface(inference, [gr.inputs.Image(type="filepath"),audio], outputs=gr.outputs.Video(label="Output Video"),article=article,description=description) #gr.Markdown(info) #gr.Markdown("
" # +f'visitors badge' # +"
") # actions submit.click( chat_hf, [audio_file, custom_token, radio], [text1, text2, audio], ) radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2) blocks.launch(debug=True,cache_examples=True,enable_queue=True)