#import spaces
import gradio as gr
from PIL import Image
import torch
import os
import gc
from pathlib import Path

from model import (
    process_images,
    DEVICE,
    DIMENSION,
    MODEL_ID
)

import logging
logging.disable(logging.WARNING)

def create_interface():
    with gr.Blocks() as demo:
        gr.Markdown("""
        # Diffusion Style Transfer with Feature, Style, and Latent Loss

        This tool combines Stable Diffusion with VGG feature control and latent matching to create styled images. 
        """)
        with gr.Row():
            with gr.Column(scale=1):
                init_image = gr.Image(
                    label="Initial Image",
                    type="pil",
                    height=384
                )
                style_image = gr.Image(
                    label="Style Image",
                    type="pil",
                    height=384
                )
            
            with gr.Column(scale=1):
                prompt = gr.Textbox(
                    label="Prompt",
                    lines=2
                )
                negative_prompt = gr.Textbox(
                    label="Negative Prompt",
                    value="extra details, jpeg artifacts, chromatic aberration"
                )
                with gr.Row():
                    inference_steps = gr.Slider(
                        minimum=20,
                        maximum=100,
                        value=60,
                        step=1,
                        label="Inference Steps"
                    )
                    strength = gr.Slider(
                        minimum=0.0,
                        maximum=1.0,
                        value=0.2,
                        step=0.05,
                        label="Initial Image Strength"
                    )
                
                gr.Markdown("### Gram Matrix Based Style Guidance Scales")
                with gr.Row():
                    style_guidance_1 = gr.Slider(minimum=0.0, maximum=2.0, value=1., step=0.1, label="Texture Fundamentals")
                with gr.Row():
                    style_guidance_2 = gr.Slider(minimum=0.0, maximum=2.0, value=1., step=0.1, label="Pattern Assembly")
                with gr.Row():
                    style_guidance_3 = gr.Slider(minimum=0.0, maximum=2.0, value=1., step=0.1, label="Style Motifs")
                with gr.Row():
                    style_guidance_4 = gr.Slider(minimum=0.0, maximum=2.0, value=1., step=0.1, label="Compositional Grammar")
                with gr.Row():
                    style_guidance_5 = gr.Slider(minimum=0.0, maximum=2.0, value=1., step=0.1, label="Style Signature")

                gr.Markdown("### Feature/Perceptual Based Content Guidance Scales")
                with gr.Row():
                    content_guidance_1 = gr.Slider(minimum=0.0, maximum=20, value=1.0, step=0.5, label="Edge Detector")
                with gr.Row():
                    content_guidance_2 = gr.Slider(minimum=0.0, maximum=20, value=1.0, step=0.5, label="Shape Assembler")
                with gr.Row():
                    content_guidance_3 = gr.Slider(minimum=0.0, maximum=20, value=0.0, step=0.5, label="Part Recognizer")
                with gr.Row():
                    content_guidance_4 = gr.Slider(minimum=0.0, maximum=20, value=0.0, step=0.5, label="Object Former")
                with gr.Row():
                    content_guidance_5 = gr.Slider(minimum=0.0, maximum=20, value=0.0, step=0.5, label="Scene Integrator")

                gr.Markdown("### Latent Space Guidance")
                latent_guidance = gr.Slider(
                    minimum=0,
                    maximum=1000,
                    value=24, 
                    step=1,
                    label="Latent Space Closeness",
                    info="Controls how closely the output matches the style image in latent space"
                )

                run_button = gr.Button("Generate")
            
            with gr.Column(scale=1):
                output = gr.Image(
                    label="Output",
                    height=384
                )

            gr.Markdown("""
        The process works in three main steps:

        1. **Initial Image Processing**
        - Your input image is partially noised based on the 'Initial Image Strength'
        - Higher strength means more noise, allowing more creative freedom but less preservation of original content

        2. **Diffusion Denoising**
        - The image is gradually denoised using Stable Diffusion
        - The process is guided by your prompt and negative prompt
        - Takes place over the specified number of inference steps

        3. **Multi-Level Style and Content Control**
        - During denoising, three types of guidance shape the output:

        **Style Features** (VGG16 layers):
        * Texture Fundamentals: Basic textures and edges
        * Pattern Assembly: Repeated elements and patterns
        * Style Motifs: Distinctive style elements
        * Compositional Grammar: Arrangement of elements
        * Style Signature: Overall artistic style
        
        **Content Features** (VGG16 layers):
        * Edge Detector: Basic lines and boundaries
        * Shape Assembler: Simple geometric forms
        * Part Recognizer: Complex shapes and parts
        * Object Former: Complete object representations
        * Scene Integrator: Overall composition

        **Latent Space Matching**:
        * Controls how closely the output matches the style image in Stable Diffusion's latent space
        * Higher values enforce stronger similarity to the style image's overall structure
        * Lower values allow more freedom for the other guidance systems

        ### Tips for Best Results
        - Start with lower strength (0.2-0.4) to preserve more of your initial image
        - Use higher style guidance for stronger artistic effect
        - Focus content guidance on middle layers for best object preservation
        - Balance latent loss weight:
        * Higher (30-50) for closer style image matching
        * Lower (10-20) for more creative interpretations
                        
        [Read more here in my blog post](http://christhomas.co.uk/blog/2025/02/17/how-to-guide-stable-diffusion-with-vgg-features-style-loss-and-latent-mae/)   
        [Github repo](https://github.com/chris-thomas/steering-stable-diffusion)                                  
        """)
        
        run_button.click(
            fn=process_images,
            inputs=[
                init_image,
                style_image,
                prompt,
                negative_prompt,
                inference_steps,
                strength,
                style_guidance_1,
                style_guidance_2,
                style_guidance_3,
                style_guidance_4,
                style_guidance_5,
                content_guidance_1,
                content_guidance_2,
                content_guidance_3,
                content_guidance_4,
                content_guidance_5,
                latent_guidance
            ],
            outputs=output
        )

    
    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()