Spaces:
Sleeping
Sleeping
import gradio as gr | |
from openai import OpenAI | |
from typing import IO | |
from io import BytesIO | |
from elevenlabs import VoiceSettings | |
from elevenlabs.client import ElevenLabs | |
import tempfile | |
from PIL import Image | |
import requests | |
import os | |
username = os.getenv("USERNAME") | |
password = os.getenv("PASSWORD") | |
# Initialize Clients | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
client_openai = OpenAI(api_key=openai_api_key) | |
elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY") | |
client_elevenlabs = ElevenLabs(api_key=elevenlabs_api_key) | |
def text_to_speech_stream(text: str) -> IO[bytes]: | |
# Perform the text-to-speech conversion using ElevenLabs | |
response = client_elevenlabs.text_to_speech.convert( # Use client_elevenlabs | |
voice_id="VQE7uwgOwnnTI8oKNjqz", # Digitalized voice of Malcolm X | |
optimize_streaming_latency="0", | |
output_format="mp3_22050_32", | |
text=text, | |
model_id="eleven_multilingual_v2", | |
voice_settings=VoiceSettings( | |
stability=0.0, | |
similarity_boost=1.0, | |
style=0.0, | |
use_speaker_boost=True, | |
), | |
) | |
# Create a BytesIO object to hold the audio data in memory | |
audio_stream = BytesIO() | |
# Write each chunk of audio data to the stream | |
for chunk in response: | |
if chunk: | |
audio_stream.write(chunk) | |
# Reset stream position to the beginning | |
audio_stream.seek(0) | |
# Return the stream for further use | |
return audio_stream | |
def generate_assistant_response(user_message): | |
assistant = client_openai.beta.assistants.retrieve( # Use client_openai | |
assistant_id="asst_EzgIYI1atVqvV4tRvy6YmQni" | |
) | |
thread = client_openai.beta.threads.create() | |
client_openai.beta.threads.messages.create( | |
thread_id=thread.id, | |
role="user", | |
content=user_message | |
) | |
run = client_openai.beta.threads.runs.create( | |
thread_id=thread.id, | |
assistant_id=assistant.id | |
) | |
while True: | |
run_status = client_openai.beta.threads.runs.retrieve( | |
thread_id=thread.id, | |
run_id=run.id | |
) | |
if run_status.status == 'completed': | |
break | |
messages = client_openai.beta.threads.messages.list(thread_id=thread.id) | |
assistant_response = messages.data[0].content[0].text.value | |
# Convert to voice using ElevenLabs | |
audio_stream = text_to_speech_stream(assistant_response) | |
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file: | |
temp_file.write(audio_stream.getvalue()) | |
temp_audio_path = temp_file.name | |
return temp_audio_path # Return the temporary file path | |
# URL of the illustrative image | |
image_url = "image.png" | |
with gr.Blocks() as interface: | |
gr.Markdown("## Malcolm X") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Image(image_url, elem_id="illustrative-image") # Add the illustrative image here | |
with gr.Column(scale=3): | |
input_text = gr.Textbox(label="Your message") | |
output_audio = gr.Audio(label="Assistant's Response") | |
btn = gr.Button("Generate Response") | |
btn.click(generate_assistant_response, inputs=input_text, outputs=output_audio) | |
interface.launch(auth=(username, password)) |