Speech-ChatGPT-Speech

Runtime error

App Files Files Community

Speech-ChatGPT-Speech / app.py

Yusin

Update app.py

7cc2adc almost 2 years ago

raw

history blame

3.91 kB

	import tempfile
	import gradio as gr
	from neon_tts_plugin_coqui import CoquiTTS
	LANGUAGES = list(CoquiTTS.langs.keys())
	default_lang = "en"

	# ChatGPT
	from pyChatGPT import ChatGPT
	#import whisper
	#whisper_model = whisper.load_model("small")
	whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
	import os
	session_token = os.environ.get('SessionToken')

	title = "Speech to ChatGPT to Speech"
	#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
	#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"

	coquiTTS = CoquiTTS()


	# ChatGPT
	def chat_hf(audio, custom_token, language):
	try:
	whisper_text = translate(audio)
	api = ChatGPT(session_token)
	resp = api.send_message(whisper_text)

	api.refresh_auth() # refresh the authorization token
	api.reset_conversation() # reset the conversation
	gpt_response = resp['message']

	except:
	whisper_text = translate(audio)
	api = ChatGPT(custom_token)
	resp = api.send_message(whisper_text)

	api.refresh_auth() # refresh the authorization token
	api.reset_conversation() # reset the conversation
	gpt_response = resp['message']

	# to voice
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})

	return whisper_text, gpt_response, fp.name

	# whisper
	#def translate(audio):
	# print("""
	# —
	# Sending audio to Whisper ...
	# —
	# """)
	#
	# audio = whisper.load_audio(audio)
	# audio = whisper.pad_or_trim(audio)
	#
	# mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
	#
	# _, probs = whisper_model.detect_language(mel)
	#
	# transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
	#
	# transcription = whisper.decode(whisper_model, mel, transcript_options)
	#
	# print("language spoken: " + transcription.language)
	# print("transcript: " + transcription.text)
	# print("———————————————————————————————————————————")
	#
	# return transcription.text

	def translate(audio):
	print("""
	—
	Sending audio to Whisper ...
	—
	""")

	text_result = whisper(audio, None, "transcribe", fn_index=0)
	print(text_result)
	return text_result


	with gr.Blocks() as blocks:
	gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
	+ title
	+ "</h1>")
	#gr.Markdown(description)
	with gr.Row(equal_height=True):# equal_height=False
	with gr.Column():# variant="panel"
	radio = gr.Radio(
	label="Language",
	choices=LANGUAGES,
	value=default_lang
	)
	audio_file = gr.inputs.Audio(source="microphone", type="filepath")
	custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
	with gr.Row():# mobile_collapse=False
	submit = gr.Button("Submit", variant="primary")
	with gr.Column():
	text1 = gr.Textbox(label="Speech to Text")
	text2 = gr.Textbox(label="ChatGPT response")
	audio = gr.Audio(label="Output", interactive=False)
	#gr.Markdown(info)
	#gr.Markdown("<center>"
	# +f'<img src={badge} alt="visitors badge"/>'
	# +"</center>")

	# actions
	submit.click(
	chat_hf,
	[audio_file, custom_token, radio],
	[text1, text2, audio],
	)
	radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)


	blocks.launch(debug=True)