WhisperDemo

Running

App Files Files Community

WhisperDemo / app-backup.py

dgongor

Update app-backup.py

7f58389 over 2 years ago

raw

history blame contribute delete

4.02 kB

	import os
	import gradio as gr
	import openai
	from gtts import gTTS # Google Text To Speech

	# load the api key
	openai.api_key = ''
	# takes an audio file from the microphone
	# submits the raw audio to OpenAI for
	# Speech to Text Translation
	# input from Microphone Component
	# output to User Input - Textbox Component
	def transcribe(audio):
	audio_file = open(audio, "rb")
	# Call the transcribe method with the file-like object
	transcript = openai.Audio.transcribe("whisper-1", audio_file)

	return transcript["text"]



	# Create a Gradio App using Blocks
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# bienvenido al Chat Bot!
	"""
	)
	with gr.Accordion("Click for Instructions:"):
	gr.Markdown(
	"""
	* Dime en lo que te puedo ayudar
	""")


	# First message as instructions to OpenAI
	# Establishes a State object to create a
	# unique state for each user and on reload
	#DGG messages = gr.State(value=[{"role": "system", "content": "You are a therapist. Respond in less than 5 sentences."}])
	messages = gr.State(value=[{"role": "system", "content": "Eres un cajero bancario. Responde brevemente."}])

	# Takes the users transcribed audio as a string
	# Takes the messages list as a reference
	# Sends the ongoing chat log to OpenAI
	# input from User Input - Textbox Component
	# output to Chat Log - Textbox Component
	def botResponse(user_input, messages):
	# adds the user input to the ongoing chat log
	# and submits the log to OpenAI
	messages.append({"role": "user", "content": user_input})
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo-0301",
	messages=messages
	)

	# Parse the response from OpenAI and store
	# it in the chat log
	system_message = response["choices"][0]["message"]["content"]
	messages.append({"role": "assistant", "content": system_message})

	# Process the messages list to get the
	# chat log into a string. Exclude the
	# System responses from the string
	chat_transcript = ""
	for message in messages:
	if (message["role"] != "system"):
	chat_transcript += message["role"] + ": " + message["content"] + "\n\n"

	return chat_transcript

	# Gets the last message in the
	# chat log and uses GTTS to
	# convert the last response into
	# an audio file. Returns a path to
	# the converted text as an mp3 file
	# input from messages as a reference
	# output to GPT Voice - Audio Component
	def giveVoice(messages):
	bot_message=messages[-1]

	myobj = gTTS(text=bot_message["content"])
	myobj.save("temp.mp3")

	dir = os.getcwd()
	new_path = os.path.join(dir, "temp.mp3")

	return new_path

	# Creates the Gradio interface objects
	# The submit button triggers a cascade of
	# events that each engage a different
	# component as input/output
	with gr.Row():
	with gr.Column(scale=1):
	user_audio = gr.Audio(source="microphone", type="filepath", label="Input Phrase")
	submit_btn = gr.Button(value="Transcribe Audio")
	#DGGsubmit_btn2 = gr.Button(value="Submit Text")
	gpt_voice = gr.Audio(label="Consejero")
	with gr.Column(scale=2):
	user_transcript = gr.Text(label="Audio Translation", interactive=False)
	#DGGuser_text = gr.Text(label="Text Input")
	gpt_transcript = gr.Text(label="Chat Transcript")
	submit_btn.click(transcribe, user_audio, user_transcript)
	#DGGsubmit_btn2.click(botResponse, [user_text, messages], gpt_transcript)
	user_transcript.change(botResponse, [user_transcript, messages], gpt_transcript)
	gpt_transcript.change(giveVoice, messages, gpt_voice)


	# creates a local web server
	# if share=True creates a public
	# demo on huggingface.co
	demo.launch(share=False)