Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from diffusers import StableDiffusionControlNetPipeline, ControlNetModel | |
| from controlnet_aux import CannyDetector | |
| import gc | |
| import numpy as np | |
| from PIL import Image | |
| # Initialize the canny edge detector | |
| canny = CannyDetector() | |
| def create_pipeline(): | |
| # Clear CUDA cache | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| # Load ControlNet | |
| controlnet = ControlNetModel.from_pretrained( | |
| "lllyasviel/sd-controlnet-canny", | |
| torch_dtype=torch.float16, | |
| use_safetensors=True | |
| ) | |
| # Load pipeline | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| "nitrosocke/Ghibli-Diffusion", | |
| controlnet=controlnet, | |
| torch_dtype=torch.float16, | |
| safety_checker=None | |
| ) | |
| if torch.cuda.is_available(): | |
| pipe.enable_model_cpu_offload() | |
| pipe.enable_attention_slicing(1) | |
| return pipe | |
| # Create pipeline | |
| pipe = create_pipeline() | |
| def enhance_prompt(base_prompt): | |
| """Add detailed Ghibli-specific style keywords to the prompt""" | |
| style_elements = [ | |
| "Studio Ghibli masterpiece", | |
| "hand-painted animation style", | |
| "Hayao Miyazaki inspired", | |
| "soft detailed lighting", | |
| "gentle color palette", | |
| "delicate line art", | |
| "atmospheric background" | |
| ] | |
| return f"{', '.join(style_elements)}, {base_prompt}, high quality, detailed features, smooth lines" | |
| def preprocess_image(image): | |
| """Preprocess image to ensure consistent dimensions""" | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| # Resize image to a maximum size while maintaining aspect ratio | |
| max_size = 512 | |
| ratio = max_size / max(image.size) | |
| new_size = tuple([int(x * ratio) for x in image.size]) | |
| image = image.resize(new_size, Image.Resampling.LANCZOS) | |
| # Create a new image with padding to make it square | |
| new_image = Image.new("RGB", (max_size, max_size), (255, 255, 255)) | |
| offset = ((max_size - new_size[0]) // 2, (max_size - new_size[1]) // 2) | |
| new_image.paste(image, offset) | |
| return new_image | |
| def process_image_for_canny(image): | |
| """Optimize image for better edge detection""" | |
| # Convert to numpy array if it's a PIL Image | |
| if isinstance(image, Image.Image): | |
| image = np.array(image) | |
| # Ensure image is in RGB format | |
| if len(image.shape) == 2: # If grayscale | |
| image = np.stack([image] * 3, axis=-1) | |
| return image | |
| def generate_image(input_image, prompt): | |
| try: | |
| if input_image is None: | |
| raise gr.Error("Please upload an image") | |
| if not prompt: | |
| raise gr.Error("Please enter a prompt") | |
| # Clear CUDA cache | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| # Preprocess the input image first | |
| preprocessed_image = preprocess_image(input_image) | |
| # Process image for better edge detection | |
| processed_image = process_image_for_canny(preprocessed_image) | |
| # Generate canny edge detection with optimized parameters | |
| canny_image = canny(processed_image, low_threshold=100, high_threshold=200) | |
| # Enhance prompt with style elements | |
| enhanced_prompt = enhance_prompt(prompt) | |
| # Generate image with optimized parameters | |
| with torch.inference_mode(): | |
| output_image = pipe( | |
| prompt=enhanced_prompt, | |
| image=canny_image, | |
| num_inference_steps=30, # Increased for better detail | |
| guidance_scale=8.5, # Increased for stronger adherence to prompt | |
| controlnet_conditioning_scale=1.0, # Balance between control and creativity | |
| negative_prompt="blurry, low quality, broken lines, distorted features, asymmetrical" | |
| ).images[0] | |
| return output_image, enhanced_prompt | |
| except Exception as e: | |
| raise gr.Error(str(e)) | |
| finally: | |
| # Clear memory | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| # Create Gradio interface | |
| with gr.Blocks(css="style.css") as demo: | |
| gr.Markdown(""" | |
| # 🎨 Enhanced Ghibli Art Generator | |
| Transform your images into the magical style of Studio Ghibli with improved detail and quality | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image( | |
| type="pil", | |
| label="Upload Image", | |
| elem_id="input-image" | |
| ) | |
| prompt = gr.Textbox( | |
| label="Enter your prompt", | |
| placeholder="A peaceful mountain cabin surrounded by nature...", | |
| elem_id="prompt-input" | |
| ) | |
| with gr.Row(): | |
| generate_btn = gr.Button("🎨 Generate", variant="primary", elem_id="generate-btn") | |
| clear_btn = gr.Button("🗑️ Clear", elem_id="clear-btn") | |
| with gr.Column(): | |
| output_image = gr.Image(label="Generated Image", elem_id="output-image") | |
| used_prompt = gr.Textbox( | |
| label="Enhanced Prompt", | |
| elem_id="enhanced-prompt", | |
| interactive=False | |
| ) | |
| gr.Markdown(""" | |
| ## 🌟 Improved Features | |
| - Enhanced detail with 30 inference steps | |
| - Stronger style adherence with 8.5 guidance scale | |
| - Optimized edge detection | |
| - Rich Ghibli-style prompt enhancement | |
| ## 💡 Tips | |
| - Use clear, well-lit images | |
| - Be specific in your prompts | |
| - Include mood and atmosphere descriptions | |
| - Expect 15-20 seconds for generation | |
| """) | |
| # Set up event handlers | |
| generate_btn.click( | |
| fn=generate_image, | |
| inputs=[input_image, prompt], | |
| outputs=[output_image, used_prompt] | |
| ) | |
| clear_btn.click( | |
| lambda: [None, ""], | |
| outputs=[output_image, used_prompt] | |
| ) | |
| # Launch with minimal queue and custom queue message | |
| demo.queue(max_size=5, concurrency_count=1).launch( | |
| share=False, | |
| debug=True, | |
| show_error=True | |
| ) |