Spaces:

nbiish
/

boozhoo_bot_prelangchain

Sleeping

App Files Files Community

boozhoo_bot_prelangchain / app.py

K3NW48

release

62b117d over 1 year ago

raw

history blame contribute delete

No virus

3.86 kB

	# Import the required libraries
	import gradio as gr
	import openai
	from gtts import gTTS
	from pydub import AudioSegment
	import os


	messages = [{"role": "system", "content": 'You are the Anishinaabe hero Nanaboozhoo. Not only do you answer with profound wisdom but you will continue the conversation by answering like this, Boozhoo: (your answer)'}]
	full_transcript = []
	openai.api_key = ""
	audio_file = 'response.mp3'


	def set_api(my_key):
	openai.api_key = my_key


	def create_image(response):
	# Send text to be summarized
	dalle_prompt = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "user", "content": f'Summarize this text "{response["choices"][0]["message"]["content"]}" into a short and concise Dall-E 2 prompt starting with "A Professional photograph of an Anishinaabe person saying :(summarization)".'}
	]
	)
	# Use summary as prompt for pic
	dalle_summary = openai.Image.create(
	prompt = dalle_prompt["choices"][0]["message"]["content"],
	size="512x512"
	)
	image_url = dalle_summary['data'][0]['url']
	return image_url


	def speak(system_message):
	global audio_file
	content = system_message['content']
	tts = gTTS(content, lang='en', slow=False)
	tts.save("response.mp3")
	return "response.mp3"



	def transcribe(gradio_input, api_key):
	global messages
	global full_transcript
	global audio_file
	set_api(api_key)

	#Transcribe audio
	input_audio = AudioSegment.from_file(gradio_input)
	input_audio.export("input_audio.wav", format="wav")
	with open("input_audio.wav", "rb") as audio_file:
	print(f"Audio file format: {os.path.splitext(audio_file.name)[1]}\n")
	transcript = openai.Audio.transcribe("whisper-1", audio_file)


	#Append content to messages
	full_transcript.append(transcript["text"])
	messages.append({"role": "user", "content": transcript["text"]})


	#Send the latest set of messages to OpenAI to get a response
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages
	)
	# Extract the latest system message from the response and add it as a new message to the messages list
	system_message = response["choices"][0]["message"]
	messages.append(system_message)


	pic_url = create_image(response)
	speech = speak(system_message)


	# Combine all messages in the messages list to create a chat transcript
	chat_transcript = ""
	for message in messages:
	if message['role'] != 'system':
	chat_transcript += message['role'] + ": " + message['content'] + "\n\n"


	return speech, chat_transcript, pic_url



	MY_INFO = '\nSupport me at my [Linktree](https://linktr.ee/Nbiish).'
	API_INFO = 'Get your api key at [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys)'


	# Create a Gradio interface
	demo = gr.Interface(
	fn=transcribe,
	inputs=[
	gr.Audio(source="microphone", type="filepath", show_label=False),
	gr.Textbox(
	label="OpenAI API Key",
	lines=1,
	placeholder="Enter your OpenAI API key",
	default=None,
	type="password",
	fn=set_api,
	),
	],
	outputs=[
	gr.Audio(show_label=False),
	gr.Textbox(label="Transcript:"),
	gr.Image(show_label=False),
	],
	title="Boozhoo Bot",
	description=f"""
	Anishinaabe Chatbot

	Applies OpenAI's Whisper to transcribe audio input.
	GPT-3.5 Turbo to generate a response.
	Dall-E 2.0 to generate an image.
	gTTS to generate audio response.

	1) Record to get started
	2) Press X near recording to keep going
	3) Refresh page to restart

	{MY_INFO}
	{API_INFO}

	""",
	)


	if __name__ == "__main__":
	demo.queue().launch()