from gradio_tools import StableDiffusionTool, ImageCaptioningTool, StableDiffusionPromptGeneratorTool, TextToVideoTool
from langchain import OpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.memory import ConversationBufferMemory
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import ChatPromptTemplate
import gradio as gr

def initialize_llm(openai_api_key):
    try:
        llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

    except Exception as e:
        print(e)
        return [None, False]

    memory = ConversationBufferMemory(memory_key="chat_history")
    tools = [StableDiffusionTool().langchain, ImageCaptioningTool().langchain,
             StableDiffusionPromptGeneratorTool().langchain, TextToVideoTool().langchain]
    agent = initialize_agent(tools, llm, memory=memory, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

    return [agent, True]


def diffuse(query, openai_api_key):
    structured_prompt = """
    You are a program that converts a prompt to a suitable stable diffusion prompt. You generate an image with this prompt using stable diffusion. Then you generate a video of the generated image. 
    The prompt is {input_prompt}

    you have to structure it using {structure}
    """

    video_path = ResponseSchema(name="video_path",
                                description="Generate a video of the prompt and return its path.")

    image_path = ResponseSchema(name="image_path",
                                description="Generate an image of the prompt and return its path")

    output_parser = StructuredOutputParser.from_response_schemas(
        [
            video_path,
            image_path,
        ]
    )

    format_instructions = output_parser.get_format_instructions()
    structured_prompt_template = ChatPromptTemplate.from_template(structured_prompt)

    my_message = structured_prompt_template.format_messages(
        input_prompt=query,
        structure=format_instructions
    )

    agent, success = initialize_llm(openai_api_key)

    if success:
        res = agent.run(my_message)
        output_dict = output_parser.parse(res.content)
        vid_path = output_dict.get(video_path)
        img_path = output_dict.get(image_path)
        print(vid_path, img_path)
        return [vid_path, img_path]
    else:
        print("Something went wrong")


with gr.Blocks() as demo:
    gr.Markdown(
        """
        My stable diffusion demo
        """
    )

    with gr.Row():
        openai_api_key = gr.Textbox(label="OpenAI API Key", type='password')

    with gr.Row():
        input = gr.Textbox(label="Input")
    with gr.Row():
        result_video = gr.Video(label='Result', show_label=False, elem_id='gallery')
        result_image = gr.Image(label='Result', show_label=False)
    with gr.Row():
        generate_btn = gr.Button("Generate")
        generate_btn.click(fn=diffuse, inputs=[input, openai_api_key], outputs=[result_video, result_image],
                           api_name="stableDiffusion")
demo.launch()