ChatGPT-Speech

Runtime error

App Files Files Community

ChatGPT-Speech / app.py

Yusin

Update app.py

e0fcf8f over 1 year ago

raw

history blame contribute delete

No virus

3.35 kB

	import os
	import json
	import openai
	import tempfile
	import gradio as gr
	import infer
	import config
	from neon_tts_plugin_coqui import CoquiTTS
	title = "Speech to ChatGPT to Speech"
	coquiTTS = CoquiTTS()

	LANGUAGES = list(CoquiTTS.langs.keys())
	LANGUAGES = LANGUAGES + ['cn', 'jp']
	default_lang = "en"
	whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
	api_key = os.environ.get('api_key')
	#if you have OpenAI API key as a string, enable the below
	openai.api_key = api_key

	pth_path = config.pth_path
	config_json = config.config_json
	net_g_ms, hps = infer.load_model(config_json, pth_path)


	# ChatGPT
	def chat_hf(audio, custom_token, language):
	try:
	whisper_text = translate(audio)
	if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
	gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
	else:
	gpt_response = openai_create(whisper_text)

	except:
	whisper_text = translate(audio)
	gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""

	# to voice
	print(language)
	if language in ['cn', 'jp']:
	text = gpt_response.strip().replace(' ', '').replace('\n', '').replace('\r', '')
	text = infer.clean_text(text)
	audio = infer.infer(text, net_g_ms, 0, "demo")
	voice_out = (hps.data.sampling_rate, audio)
	return whisper_text, gpt_response, voice_out
	else:
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
	return whisper_text, gpt_response, fp.name



	def translate(audio):
	print("""
	—
	Sending audio to Whisper ...
	—
	""")

	text_result = whisper(audio, None, "transcribe", fn_index=0)
	print(text_result)
	return text_result


	def openai_create(prompt):
	print("""
	—
	Giving response from ai ...
	—
	""")
	response = openai.Completion.create(
	model="text-davinci-003",
	prompt=prompt,
	temperature=0.9,
	max_tokens=150,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0.6,
	stop=[" Human:", " AI:"]
	)
	print(response.choices[0].text)
	return response.choices[0].text


	with gr.Blocks() as blocks:
	gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
	radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
	with gr.Row(equal_height=True):# equal_height=False
	with gr.Column():# variant="panel"
	audio_file = gr.Audio(source="microphone", type="filepath")
	custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
	with gr.Row():# mobile_collapse=False
	submit = gr.Button("Submit", variant="primary")
	with gr.Column():
	text1 = gr.Textbox(label="Speech to Text")
	text2 = gr.Textbox(label="ChatGPT Response")
	audio = gr.Audio(label="Output", interactive=False)
	# actions
	submit.click(
	chat_hf,
	[audio_file, custom_token, radio],
	[text1, text2, audio],
	)

	blocks.launch(debug=True)