import gradio as gr
from openai import OpenAI
from typing import IO
from io import BytesIO
from elevenlabs import VoiceSettings
from elevenlabs.client import ElevenLabs
import tempfile
from PIL import Image
import requests
import os

username = os.getenv("USERNAME")
password = os.getenv("PASSWORD")

# Initialize Clients
openai_api_key = os.getenv("OPENAI_API_KEY")
client_openai = OpenAI(api_key=openai_api_key)

elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
client_elevenlabs = ElevenLabs(api_key=elevenlabs_api_key)

def text_to_speech_stream(text: str) -> IO[bytes]:
    # Perform the text-to-speech conversion using ElevenLabs
    response = client_elevenlabs.text_to_speech.convert(  # Use client_elevenlabs
        voice_id="VQE7uwgOwnnTI8oKNjqz", # Digitalized voice of Malcolm X
        optimize_streaming_latency="0",
        output_format="mp3_22050_32",
        text=text,
        model_id="eleven_multilingual_v2",
        voice_settings=VoiceSettings(
            stability=0.0,
            similarity_boost=1.0,
            style=0.0,
            use_speaker_boost=True,
        ),
    )

    # Create a BytesIO object to hold the audio data in memory
    audio_stream = BytesIO()

    # Write each chunk of audio data to the stream
    for chunk in response:
        if chunk:
            audio_stream.write(chunk)

    # Reset stream position to the beginning
    audio_stream.seek(0)

    # Return the stream for further use
    return audio_stream


def generate_assistant_response(user_message):
    assistant = client_openai.beta.assistants.retrieve( # Use client_openai
        assistant_id="asst_EzgIYI1atVqvV4tRvy6YmQni"
    )

    thread = client_openai.beta.threads.create()
    client_openai.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=user_message
    )

    run = client_openai.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    while True:
        run_status = client_openai.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id
        )
        if run_status.status == 'completed':
            break

    messages = client_openai.beta.threads.messages.list(thread_id=thread.id)
    assistant_response = messages.data[0].content[0].text.value

    # Convert to voice using ElevenLabs
    audio_stream = text_to_speech_stream(assistant_response)
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
        temp_file.write(audio_stream.getvalue())
        temp_audio_path = temp_file.name

    return temp_audio_path  # Return the temporary file path

# URL of the illustrative image
image_url = "image.png"

with gr.Blocks() as interface:
    gr.Markdown("## Malcolm X")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Image(image_url, elem_id="illustrative-image")  # Add the illustrative image here
        with gr.Column(scale=3):
            input_text = gr.Textbox(label="Your message")
            output_audio = gr.Audio(label="Assistant's Response")
            btn = gr.Button("Generate Response")
            btn.click(generate_assistant_response, inputs=input_text, outputs=output_audio)

interface.launch(auth=(username, password))