import gradio as gr
import torch
from PIL import Image
from diffusers import AutoPipelineForText2Image, DDIMScheduler
from transformers import CLIPVisionModelWithProjection
import numpy as np
import spaces

# Initialize the image encoder and pipeline outside the function
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
    "h94/IP-Adapter",
    subfolder="models/image_encoder",
    torch_dtype=torch.float16,
)

pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    image_encoder=image_encoder,
)
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-plus-face_sdxl_vit-h.safetensors"])
pipeline.enable_model_cpu_offload()

@spaces.GPU
def transform_image(face_image, soy_strength, face_strength):
    generator = torch.Generator(device="cpu").manual_seed(0)

    if isinstance(face_image, Image.Image):
        processed_face_image = face_image
    elif isinstance(face_image, np.ndarray):
        processed_face_image = Image.fromarray(face_image)
    else:
        raise ValueError("Unsupported image format")

    style_image_path = "examples/soyjak2.jpg"
    style_image = Image.open(style_image_path)

    # Set the IP adapter scale dynamically based on the sliders
    pipeline.set_ip_adapter_scale([soy_strength, face_strength])

    image = pipeline(
        prompt="soyjak",
        ip_adapter_image=[style_image, processed_face_image],
        negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
        num_inference_steps=50,
        generator=generator,
    ).images[0]

    return image

# Gradio interface setup with dynamic sliders
demo = gr.Interface(
    fn=transform_image,
    inputs=[
        gr.Image(label="Upload your face image"),
        gr.Slider(minimum=0, maximum=1, step=0.05, value=0.7, label="Soy Strength"),
        gr.Slider(minimum=0, maximum=1, step=0.05, value=1.0, label="Face Strength")  # Renamed to Face Strength
    ],
    outputs=gr.Image(label="Your Soyjak"),
    title="InstaSoyjak - turn anyone into a Soyjak",
    description="All you need to do is upload an image and adjust the strengths. **Please use responsibly.**",
)

demo.queue(max_size=20)
demo.launch()