Spaces:

akshayvkt
/

talk-To-SteveJobs

Running

App Files Files Community

talk-To-SteveJobs / app.py

akshayvkt

Update app.py

6bac00a over 1 year ago

raw

history blame

No virus

3.45 kB

	import gradio as gr
	import openai
	import requests
	import json
	import os

	openai.api_key = os.environ.get('OPENAI_API_KEY')


	messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}]

	# Set up the API endpoint URL and headers
	url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream"
	headers = {
	"accept": "/",
	"xi-api-key": os.environ.get('elevenlabs_api_key'),
	"Content-Type": "application/json",
	}

	# Define a function to handle the Gradio input and generate the response
	def transcribe(audio):
	global messages

	# Use OpenAI to transcribe the user's audio input
	# API call 1
	audio_file = open(audio, "rb")
	transcript = openai.Audio.transcribe("whisper-1", audio_file)

	# Append the user's message to the message history
	messages.append({"role": "user", "content": transcript["text"]})

	# Generate a response using OpenAI's chat API
	#API call 2
	response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)

	# Extract the system message from the API response and append it to the message history
	system_message = response["choices"][0]["message"]
	messages.append(system_message)


	#API Call 3
	# Use the voice synthesis API to generate an audio response from the system message
	data = {
	"text": system_message["content"],
	"voice_settings": {
	"stability": 0,
	"similarity_boost": 0
	}
	}
	response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)

	# Save the audio response to a file
	if response.ok:
	with open("output.wav", "wb") as f:
	for chunk in response.iter_content(chunk_size=1024):
	f.write(chunk)
	else:
	print(f"Error: {response.status_code} - {response.reason}")

	# IPython.display.display(IPython.display.Audio('output.wav'))

	# Generate a chat transcript for display in the Gradio UI
	chat_transcript = ""
	for message in messages:
	if message['role'] != 'system':
	chat_transcript += message['role'] + ": " + message['content'] + "\n\n"

	return chat_transcript,'output.wav'

	# css = """
	# #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
	# #header {text-align: center;}
	# }
	# """

	# with gr.Blocks(css=css) as ui:


	# with gr.Column(elem_id="col-container"):
	# gr.Markdown("""## Talk to AI Steve Jobs: Audio-to-Text+Audio generation
	# Powered by ChatGPT + Whisper + ElevenLabs + HuggingFace <br>
	# <br>
	# """,
	# elem_id="header")

	# Define the Gradio UI interface
	# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
	ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio'],title='Talk to AI Steve Jobs', description = """Click on Record from microphone and start speaking,
	and when you're done, click on Stop Recording. Then click on Submit. AI Steve will then answer your question. You can continue to ask follow-up questions by clicking on Clear, and then
	using Record from microphone -> Stop Recording -> Submit AI Steve Jobs will also remember the previous questions and answers.""")
	ui.launch(debug=True)