import gradio as gr from openai import OpenAI from typing import IO from io import BytesIO from elevenlabs import VoiceSettings from elevenlabs.client import ElevenLabs import tempfile from PIL import Image import requests import os username = os.getenv("USERNAME") password = os.getenv("PASSWORD") # Initialize Clients openai_api_key = os.getenv("OPENAI_API_KEY") client_openai = OpenAI(api_key=openai_api_key) elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY") client_elevenlabs = ElevenLabs(api_key=elevenlabs_api_key) def text_to_speech_stream(text: str) -> IO[bytes]: # Perform the text-to-speech conversion using ElevenLabs response = client_elevenlabs.text_to_speech.convert( # Use client_elevenlabs voice_id="VQE7uwgOwnnTI8oKNjqz", # Digitalized voice of Malcolm X optimize_streaming_latency="0", output_format="mp3_22050_32", text=text, model_id="eleven_multilingual_v2", voice_settings=VoiceSettings( stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True, ), ) # Create a BytesIO object to hold the audio data in memory audio_stream = BytesIO() # Write each chunk of audio data to the stream for chunk in response: if chunk: audio_stream.write(chunk) # Reset stream position to the beginning audio_stream.seek(0) # Return the stream for further use return audio_stream def generate_assistant_response(user_message): assistant = client_openai.beta.assistants.retrieve( # Use client_openai assistant_id="asst_EzgIYI1atVqvV4tRvy6YmQni" ) thread = client_openai.beta.threads.create() client_openai.beta.threads.messages.create( thread_id=thread.id, role="user", content=user_message ) run = client_openai.beta.threads.runs.create( thread_id=thread.id, assistant_id=assistant.id ) while True: run_status = client_openai.beta.threads.runs.retrieve( thread_id=thread.id, run_id=run.id ) if run_status.status == 'completed': break messages = client_openai.beta.threads.messages.list(thread_id=thread.id) assistant_response = messages.data[0].content[0].text.value # Convert to voice using ElevenLabs audio_stream = text_to_speech_stream(assistant_response) with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file: temp_file.write(audio_stream.getvalue()) temp_audio_path = temp_file.name return temp_audio_path # Return the temporary file path # URL of the illustrative image image_url = "image.png" with gr.Blocks() as interface: gr.Markdown("## Malcolm X") with gr.Row(): with gr.Column(scale=1): gr.Image(image_url, elem_id="illustrative-image") # Add the illustrative image here with gr.Column(scale=3): input_text = gr.Textbox(label="Your message") output_audio = gr.Audio(label="Assistant's Response") btn = gr.Button("Generate Response") btn.click(generate_assistant_response, inputs=input_text, outputs=output_audio) interface.launch(auth=(username, password))