Spaces:

akshayvkt
/

talk-To-SteveJobs

Running

App Files Files Community

talk-To-SteveJobs / app.py

akshayvkt

Update app.py

76f7d8c over 1 year ago

raw

history blame

2.52 kB

	import gradio as gr
	import openai
	import requests
	import json
	import os

	openai.api_key = os.environ.get('OPENAI_API_KEY')


	messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}]

	# Set up the API endpoint URL and headers
	url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream"
	headers = {
	"accept": "/",
	"xi-api-key": os.environ.get('elevenlabs_api_key'),
	"Content-Type": "application/json",
	}

	# Define a function to handle the Gradio input and generate the response
	def transcribe(audio):
	global messages

	# Use OpenAI to transcribe the user's audio input
	# API call 1
	audio_file = open(audio, "rb")
	transcript = openai.Audio.transcribe("whisper-1", audio_file)

	# Append the user's message to the message history
	messages.append({"role": "user", "content": transcript["text"]})

	# Generate a response using OpenAI's chat API
	#API call 2
	response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)

	# Extract the system message from the API response and append it to the message history
	system_message = response["choices"][0]["message"]
	messages.append(system_message)


	#API Call 3
	# Use the voice synthesis API to generate an audio response from the system message
	data = {
	"text": system_message["content"],
	"voice_settings": {
	"stability": 0,
	"similarity_boost": 0
	}
	}
	response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)

	# Save the audio response to a file
	if response.ok:
	with open("output.wav", "wb") as f:
	for chunk in response.iter_content(chunk_size=1024):
	f.write(chunk)
	else:
	print(f"Error: {response.status_code} - {response.reason}")

	IPython.display.display(IPython.display.Audio('output.wav'))

	# Generate a chat transcript for display in the Gradio UI
	chat_transcript = ""
	for message in messages:
	if message['role'] != 'system':
	chat_transcript += message['role'] + ": " + message['content'] + "\n\n"

	return chat_transcript,'output.wav'

	# Define the Gradio UI interface
	# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
	ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio'])
	ui.launch(debug=True)