import spaces

from diffusers import DiffusionPipeline

import gradio as gr

from PIL import Image

import torch
from transformers import pipeline

# Initialize Caption Generation Model
get_caption = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base", device=0)

# Initialize Image Generation Model
generate_pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", )
pipe = generate_pipeline.to("cuda")

def captioner(input: Image.Image) -> str:
    """
    Generate a descriptive caption for the given input image using the BLIP-IMAGE-CAPTIONING-BASE model.

    Args:
        input (Image.Image): The input image for which to generate a caption.

    Returns:
        str: The generated caption describing the input image.
    """
    output = get_caption(input)
    return output[0]['generated_text']

def generate(prompt: str) -> Image.Image:
    """
    Generate an image based on the given textual prompt using the Stable Diffusion model.

    Args:
        prompt (str): The textual description based on which to generate an image.

    Returns:
        Image.Image: The generated image corresponding to the given prompt.
    """
    return pipe(prompt).images[0]

@spaces.GPU(duration=300)
def caption_and_generate(image: Image.Image) -> list:
    """
    Generate a caption for the given image and then generate a new image based on that caption.

    Args:
        image (Image.Image): The input image for which to generate a caption and subsequently a new image.

    Returns:
        list: A list containing the generated caption (str) and the generated image (Image.Image).
    """
    caption = captioner(image)
    image = generate(caption)
    return [caption, image]

####### GRADIO APP #######

with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game 🖍️")
    image_upload = gr.Image(label="Your first image",type="pil")
    btn_all = gr.Button("Caption and generate")
    caption = gr.Textbox(label="Generated caption")
    image_output = gr.Image(label="Generated Image")

    btn_all.click(fn=caption_and_generate, inputs=[image_upload], outputs=[caption, image_output])

demo.launch()