Spaces:

Gertie01
/

app-pzeyhe-14

Runtime error

App Files Files Community

Gertie01 commited on 28 days ago

Commit

8645d6f

verified ·

1 Parent(s): e08be35

Deploy Gradio app with multiple files

Browse files

Files changed (3) hide show

app.py +53 -0
models.py +141 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import gradio as gr
+import models
+# Global flag to ensure models are loaded and compiled only once
+# In a multi-file setup, load_and_compile_models should be called once globally
+# before the Gradio app is launched.
+# This assumes models.py gets imported and its global functions run.
+# Alternatively, it could be called within a gr.Blocks.load event, but that's per-session.
+# For AoT, it must be during startup.
+with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as demo:
+    gr.HTML("""
+        <div style="text-align: center; margin-bottom: 20px;">
+            <h1 style="font-size: 2.5em; color: #333;">🎨 SDXL IP-Adapter Image Remixer</h1>
+            <p style="font-size: 1.1em; color: #555;">Drag up to three reference images, add a text prompt, and let the AI remix them into something new!</p>
+            <p style="font-size: 0.9em; color: #777;">Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #007bff; text-decoration: none;">anycoder</a></p>
+        </div>
+    """)
+    with gr.Column(elem_classes="container"):
+        with gr.Row():
+            image_input_1 = gr.Image(label="Reference Image 1 (Optional)", type="pil", height=256, sources=["upload", "clipboard"], interactive=True)
+            image_input_2 = gr.Image(label="Reference Image 2 (Optional)", type="pil", height=256, sources=["upload", "clipboard"], interactive=True)
+            image_input_3 = gr.Image(label="Reference Image 3 (Optional)", type="pil", height=256, sources=["upload", "clipboard"], interactive=True)
+        prompt_input = gr.Textbox(
+            label="Prompt",
+            placeholder="A whimsical creature made of clouds and starlight, fantastical, vivid colors, highly detailed, 4k",
+            lines=2,
+            interactive=True,
+        )
+        generate_btn = gr.Button("Remix Images", variant="primary")
+        output_gallery = gr.Gallery(
+            label="Generated Images",
+            columns=2, rows=1, height=512, object_fit="contain",
+            allow_preview=True,
+            interactive=False,
+        )
+        # Event listener for the generate button
+        generate_btn.click(
+            fn=models.remix_images,
+            inputs=[prompt_input, image_input_1, image_input_2, image_input_3],
+            outputs=output_gallery,
+            api_name="remix_images",
+            queue=True,
+            show_progress="full",
+        )
+if __name__ == "__main__":
+    demo.launch(max_threads=10)

models.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import spaces
+import torch
+from diffusers import DiffusionPipeline, AutoencoderKL
+from ip_adapter import IPAdapter
+from PIL import Image
+import gradio as gr
+# --- Configuration Constants ---
+SDXL_BASE_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
+IP_ADAPTER_MODEL_ID = "h94/IP-Adapter-Plus-SDXL"
+IP_ADAPTER_WEIGHT_NAME = "ip-adapter-plus_sdxl_vit-h.bin"
+# --- Global Model Instances ---
+# These will be initialized and compiled during startup
+pipe_global: DiffusionPipeline = None
+ip_adapter_global: IPAdapter = None
+@spaces.GPU(duration=1500)  # Allocate maximum time for startup compilation
+def load_and_compile_models():
+    """
+    Loads the SDXL and IP-Adapter models and performs Ahead-of-Time (AoT) compilation
+    of the UNet for performance optimization using ZeroGPU.
+    This function is called once during application startup.
+    """
+    global pipe_global, ip_adapter_global
+    print("🚀 Starting model loading and compilation...")
+    # 1. Load SDXL base pipeline
+    print(f"Loading SDXL base model: {SDXL_BASE_MODEL_ID}")
+    pipe_global = DiffusionPipeline.from_pretrained(
+        SDXL_BASE_MODEL_ID,
+        torch_dtype=torch.float16,
+        add_watermarker=False,  # Disable watermarking for potential speedup
+        variant="fp16" # Use fp16 variant if available for better performance
+    )
+    # Load VAE separately as recommended for stabilityai models
+    pipe_global.vae = AutoencoderKL.from_pretrained(
+        "stabilityai/sdxl-vae", torch_dtype=torch.float16, variant="fp16"
+    )
+    pipe_global.to("cuda")
+    print("SDXL base model loaded and moved to CUDA.")
+    # 2. Load IP-Adapter
+    print(f"Loading IP-Adapter from: {IP_ADAPTER_MODEL_ID}/{IP_ADAPTER_WEIGHT_NAME}")
+    ip_adapter_global = IPAdapter(
+        pipe_global,
+        image_encoder_path=IP_ADAPTER_MODEL_ID,
+        ip_ckpt=IP_ADAPTER_WEIGHT_NAME,
+        device="cuda"
+    )
+    print("IP-Adapter loaded and integrated into the pipeline.")
+    # 3. Perform AoT compilation for the UNet (main generation component)
+    print("Starting Ahead-of-Time (AoT) compilation for pipe_global.unet with IP-Adapter...")
+    # Prepare dummy inputs for capturing UNet's forward pass.
+    # We need to call a function that internally uses pipe_global.unet
+    # and has IP-Adapter inputs integrated. The `ip_adapter_global.generate` method
+    # is designed for this. We use minimal steps for tracing.
+    dummy_prompt = "a photorealistic image of a beautiful landscape"
+    dummy_ip_image = Image.new('RGB', (224, 224), color = 'red') # IP-Adapter typically uses 224x224 or 256x256 input
+    with spaces.aoti_capture(ip_adapter_global.pipe.unet) as call:
+        # Execute a minimal generation using the IP-Adapter's generate method.
+        # This will trigger the forward pass of `pipe_global.unet` with
+        # all the necessary IP-Adapter embeddings, allowing `aoti_capture` to trace it.
+        _ = ip_adapter_global.generate(
+            prompt=dummy_prompt,
+            images=[dummy_ip_image],  # Provide a dummy image to trace the IP-Adapter path
+            height=1024, width=1024,
+            num_inference_steps=2,  # Use minimal steps for fast tracing
+            guidance_scale=7.5,
+            num_images_per_prompt=1,
+            output_type="pil",
+        ).images[0]
+    # Export the captured UNet module
+    print("Exporting UNet...")
+    exported_unet = torch.export.export(
+        ip_adapter_global.pipe.unet,
+        args=call.args,
+        kwargs=call.kwargs,
+    )
+    # Compile the exported UNet module
+    print("Compiling UNet...")
+    compiled_unet = spaces.aoti_compile(exported_unet)
+    print("UNet compilation complete.")
+    # Apply the compiled module back to the pipeline's UNet
+    spaces.aoti_apply(compiled_unet, ip_adapter_global.pipe.unet)
+    print("AoT compiled UNet applied to the pipeline.")
+    print("✅ Models loaded and compiled successfully!")
+# Call the loading and compilation function once when this module is imported
+load_and_compile_models()
+@spaces.GPU(duration=60)  # Allocate up to 60 seconds for actual image generation
+def remix_images(
+    prompt: str,
+    image1: Image.Image | None,
+    image2: Image.Image | None,
+    image3: Image.Image | None
+) -> list[Image.Image]:
+    """
+    Generates images based on a text prompt and up to three input images using SDXL with IP-Adapter.
+    Args:
+        prompt (str): The text prompt for image generation.
+        image1 (PIL.Image.Image | None): The first input image.
+        image2 (PIL.Image.Image | None): The second input image.
+        image3 (PIL.Image.Image | None): The third input image.
+    Returns:
+        list[PIL.Image.Image]: A list of generated images.
+    """
+    if not prompt:
+        raise gr.Error("Prompt cannot be empty! Please provide a textual description.")
+    # Filter out None images to create a list of valid input images
+    input_images = [img for img in [image1, image2, image3] if img is not None]
+    print(f"Generating image(s) for prompt: '{prompt}'")
+    print(f"Using {len(input_images)} input images for IP-Adapter.")
+    # Call the IP-Adapter's generate method.
+    # The `ip-adapter` library's `generate` method is designed to handle
+    # an empty `images` list by falling back to pure text-to-image generation.
+    generated_images = ip_adapter_global.generate(
+        prompt=prompt,
+        images=input_images,  # This can be an empty list
+        height=1024, width=1024,
+        num_inference_steps=30,  # Standard number of inference steps
+        guidance_scale=7.5,      # Classifier-free guidance scale
+        num_images_per_prompt=1, # Generate one image per request
+        output_type="pil",       # Ensure output is PIL Image objects
+        # No seed is used as per requirement
+    ).images
+    return generated_images

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio
+torch
+git+https://github.com/huggingface/diffusers
+git+https://github.com/huggingface/transformers
+accelerate
+Pillow
+safetensors
+xformers
+spaces
+ip-adapter