Spaces:

jeremierostan
/

Malcolm_X

Sleeping

App Files Files Community

Malcolm_X / app.py

jeremierostan

Update app.py

fe4dcd5 verified about 2 months ago

raw

history blame

3.26 kB

	import gradio as gr
	from openai import OpenAI
	from typing import IO
	from io import BytesIO
	from elevenlabs import VoiceSettings
	from elevenlabs.client import ElevenLabs
	import tempfile
	from PIL import Image
	import requests
	import os

	username = os.getenv("USERNAME")
	password = os.getenv("PASSWORD")

	# Initialize Clients
	openai_api_key = os.getenv("OPENAI_API_KEY")
	client_openai = OpenAI(api_key=openai_api_key)

	elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
	client_elevenlabs = ElevenLabs(api_key=elevenlabs_api_key)

	def text_to_speech_stream(text: str) -> IO[bytes]:
	# Perform the text-to-speech conversion using ElevenLabs
	response = client_elevenlabs.text_to_speech.convert( # Use client_elevenlabs
	voice_id="VQE7uwgOwnnTI8oKNjqz", # Digitalized voice of Malcolm X
	optimize_streaming_latency="0",
	output_format="mp3_22050_32",
	text=text,
	model_id="eleven_multilingual_v2",
	voice_settings=VoiceSettings(
	stability=0.0,
	similarity_boost=1.0,
	style=0.0,
	use_speaker_boost=True,
	),
	)

	# Create a BytesIO object to hold the audio data in memory
	audio_stream = BytesIO()

	# Write each chunk of audio data to the stream
	for chunk in response:
	if chunk:
	audio_stream.write(chunk)

	# Reset stream position to the beginning
	audio_stream.seek(0)

	# Return the stream for further use
	return audio_stream


	def generate_assistant_response(user_message):
	assistant = client_openai.beta.assistants.retrieve( # Use client_openai
	assistant_id="asst_EzgIYI1atVqvV4tRvy6YmQni"
	)

	thread = client_openai.beta.threads.create()
	client_openai.beta.threads.messages.create(
	thread_id=thread.id,
	role="user",
	content=user_message
	)

	run = client_openai.beta.threads.runs.create(
	thread_id=thread.id,
	assistant_id=assistant.id
	)

	while True:
	run_status = client_openai.beta.threads.runs.retrieve(
	thread_id=thread.id,
	run_id=run.id
	)
	if run_status.status == 'completed':
	break

	messages = client_openai.beta.threads.messages.list(thread_id=thread.id)
	assistant_response = messages.data[0].content[0].text.value

	# Convert to voice using ElevenLabs
	audio_stream = text_to_speech_stream(assistant_response)
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
	temp_file.write(audio_stream.getvalue())
	temp_audio_path = temp_file.name

	return temp_audio_path # Return the temporary file path

	# URL of the illustrative image
	image_url = "image.png"

	with gr.Blocks() as interface:
	gr.Markdown("## Malcolm X")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Image(image_url, elem_id="illustrative-image") # Add the illustrative image here
	with gr.Column(scale=3):
	input_text = gr.Textbox(label="Your message")
	output_audio = gr.Audio(label="Assistant's Response")
	btn = gr.Button("Generate Response")
	btn.click(generate_assistant_response, inputs=input_text, outputs=output_audio)

	interface.launch(auth=(username, password))