import gradio as gr
import openai
import requests
import json
import os

openai.api_key = os.environ.get('OPENAI_API_KEY')

    
messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}]

# Set up the API endpoint URL and headers
url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream"
headers = {
    "accept": "*/*",
    "xi-api-key": os.environ.get('elevenlabs_api_key'),
    "Content-Type": "application/json",
}

# Define a function to handle the Gradio input and generate the response
def transcribe(audio):
    global messages

    # Use OpenAI to transcribe the user's audio input
    # API call 1
    audio_file = open(audio, "rb")
    transcript = openai.Audio.transcribe("whisper-1", audio_file)

    # Append the user's message to the message history
    messages.append({"role": "user", "content": transcript["text"]})

    # Generate a response using OpenAI's chat API
    #API call 2
    response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)

    # Extract the system message from the API response and append it to the message history
    system_message = response["choices"][0]["message"]
    messages.append(system_message)
    
    
    #API Call 3
    # Use the voice synthesis API to generate an audio response from the system message
    data = {
        "text": system_message["content"],
        "voice_settings": {
            "stability": 0,
            "similarity_boost": 0
        }
    }
    response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)

    # Save the audio response to a file
    if response.ok:
        with open("output.wav", "wb") as f:
            for chunk in response.iter_content(chunk_size=1024):
                f.write(chunk)
    else:
        print(f"Error: {response.status_code} - {response.reason}")
        
    # IPython.display.display(IPython.display.Audio('output.wav'))

    # Generate a chat transcript for display in the Gradio UI
    chat_transcript = ""
    for message in messages:
        if message['role'] != 'system':
            chat_transcript += message['role'] + ": " + message['content'] + "\n\n"

    return chat_transcript,'output.wav'

# css = """
#       #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
#       #header {text-align: center;}
#         }
#         """

# with gr.Blocks(css=css) as ui:
    
    
#     with gr.Column(elem_id="col-container"):
#         gr.Markdown("""## Talk to AI Steve Jobs: Audio-to-Text+Audio generation
#                     Powered by ChatGPT + Whisper + ElevenLabs + HuggingFace <br>
#                     <br>
#                     """,
#                     elem_id="header")

# Define the Gradio UI interface
# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio'],title='Talk to AI Steve Jobs', description = """Click on Record from microphone and start speaking, 
and when you're done, click on Stop Recording. Then click on Submit. AI Steve will then answer your question. You can continue to ask follow-up questions by clicking on Clear, and then 
using Record from microphone -> Stop Recording -> Submit  AI Steve Jobs will also remember the previous questions and answers.""")
ui.launch(debug=True)