Spaces:

sagar007
/

Stable_Diffusion_custom

Sleeping

App Files Files Community

sagar007 commited on 14 days ago

Commit

318dc42

verified ·

1 Parent(s): ba4c4c2

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -222

app.py CHANGED Viewed

@@ -4,31 +4,41 @@ import gradio as gr
 from PIL import Image
 import torch.nn.functional as F
 from torchvision import transforms as tfms
-from diffusers import DiffusionPipeline
-#
-# Determine the appropriate device and dtype
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch_device == "cuda" else torch.float32
-# Load the pipeline
 model_path = "CompVis/stable-diffusion-v1-4"
 sd_pipeline = DiffusionPipeline.from_pretrained(
     model_path,
     torch_dtype=torch_dtype,
-    low_cpu_mem_usage=True if torch_device == "cpu" else False
 ).to(torch_device)
-# Load textual inversions
-sd_pipeline.load_textual_inversion("sd-concepts-library/illustration-style")
-sd_pipeline.load_textual_inversion("sd-concepts-library/line-art")
-sd_pipeline.load_textual_inversion("sd-concepts-library/hitokomoru-style-nao")
-sd_pipeline.load_textual_inversion("sd-concepts-library/style-of-marc-allante")
-sd_pipeline.load_textual_inversion("sd-concepts-library/midjourney-style")
-sd_pipeline.load_textual_inversion("sd-concepts-library/hanfu-anime-style")
-sd_pipeline.load_textual_inversion("sd-concepts-library/birb-style")
-# Update style token dictionary
 style_token_dict = {
     "Illustration Style": '<illustration-style>',
     "Line Art": '<line-art>',
@@ -39,59 +49,76 @@ style_token_dict = {
     "Birb Style": '<birb-style>'
 }
 def apply_guidance(image, guidance_method, loss_scale):
-    # Convert PIL Image to tensor
     img_tensor = tfms.ToTensor()(image).unsqueeze(0).to(torch_device)
     if guidance_method == 'Grayscale':
-        gray = tfms.Grayscale(3)(img_tensor)
-        guided = img_tensor + (gray - img_tensor) * (loss_scale / 10000)
     elif guidance_method == 'Bright':
-        bright = F.relu(img_tensor)  # Simple brightness increase
-        guided = img_tensor + (bright - img_tensor) * (loss_scale / 10000)
     elif guidance_method == 'Contrast':
         mean = img_tensor.mean()
-        contrast = (img_tensor - mean) * 2 + mean
-        guided = img_tensor + (contrast - img_tensor) * (loss_scale / 10000)
     elif guidance_method == 'Symmetry':
-        flipped = torch.flip(img_tensor, [3])  # Flip horizontally
-        guided = img_tensor + (flipped - img_tensor) * (loss_scale / 10000)
     elif guidance_method == 'Saturation':
-        saturated = tfms.functional.adjust_saturation(img_tensor, 2)
-        guided = img_tensor + (saturated - img_tensor) * (loss_scale / 10000)
     else:
         return image
-    # Convert back to PIL Image
-    guided = guided.squeeze(0).clamp(0, 1)
-    guided = (guided * 255).byte().cpu().permute(1, 2, 0).numpy()
-    return Image.fromarray(guided)
 def inference(text, style, inference_step, guidance_scale, seed, guidance_method, loss_scale, image_size):
-    prompt = text + " " + style_token_dict[style]
-    # Convert image_size from string to tuple of integers
-    size = tuple(map(int, image_size.split('x')))
-    # Generate image with pipeline
     image_pipeline = sd_pipeline(
         prompt,
         num_inference_steps=inference_step,
         guidance_scale=guidance_scale,
-        generator=torch.Generator(device=torch_device).manual_seed(seed),
-        height=size[1],
-        width=size[0]
     ).images[0]
-    # Apply guidance
     image_guide = apply_guidance(image_pipeline, guidance_method, loss_scale)
     return image_pipeline, image_guide
-# Your existing imports and model setup code here...
 css_and_html = """
 <style>
     body {
         background: linear-gradient(135deg, #1a1c2c, #4a4e69, #9a8c98);
         font-family: 'Arial', sans-serif;
@@ -100,162 +127,22 @@ css_and_html = """
         padding: 0;
         min-height: 100vh;
     }
-    #app-header {
-        text-align: center;
-        background: rgba(255, 255, 255, 0.1);
-        padding: 30px;
-        border-radius: 20px;
-        box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
-        position: relative;
-        overflow: hidden;
-        margin: 20px auto;
-        max-width: 800px;
-    }
-    #app-header::before {
-        content: "";
-        position: absolute;
-        top: -50%;
-        left: -50%;
-        width: 200%;
-        height: 200%;
-        background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
-        animation: shimmer 15s infinite linear;
-    }
-    @keyframes shimmer {
-        0% { transform: rotate(0deg); }
-        100% { transform: rotate(360deg); }
-    }
-    #app-header h1 {
-        color: #f2e9e4;
-        font-size: 2.5em;
-        margin-bottom: 15px;
-        text-shadow: 2px 2px 4px rgba(0,0,0,0.5);
-    }
-    #app-header p {
-        font-size: 1.2em;
-        color: #c9ada7;
-    }
-    .concept-container {
-        display: flex;
-        justify-content: center;
-        gap: 20px;
-        margin-top: 30px;
-        flex-wrap: wrap;
-    }
-    .concept {
-        position: relative;
-        transition: transform 0.3s, box-shadow 0.3s;
-        border-radius: 15px;
-        overflow: hidden;
-        background: rgba(255, 255, 255, 0.1);
-        box-shadow: 0 5px 15px rgba(0,0,0,0.2);
-        width: 150px;
-    }
-    .concept:hover {
-        transform: translateY(-10px) rotate(3deg);
-        box-shadow: 0 15px 30px rgba(0,0,0,0.4);
-    }
-    .concept img {
-        width: 100%;
-        height: 120px;
-        object-fit: cover;
-    }
-    .concept-description {
-        background-color: rgba(110, 72, 170, 0.8);
-        color: white;
-        padding: 10px;
-        font-size: 0.9em;
-        text-align: center;
-    }
-    .artifact {
-        position: absolute;
-        background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
-        border-radius: 50%;
-        opacity: 0.5;
-        pointer-events: none;
-    }
-    .artifact.large {
-        width: 400px;
-        height: 400px;
-        top: -100px;
-        left: -200px;
-        animation: float 20s infinite ease-in-out;
-    }
-    .artifact.medium {
-        width: 300px;
-        height: 300px;
-        bottom: -150px;
-        right: -150px;
-        animation: float 15s infinite ease-in-out reverse;
-    }
-    .artifact.small {
-        width: 150px;
-        height: 150px;
-        top: 50%;
-        left: 50%;
-        transform: translate(-50%, -50%);
-        animation: pulse 5s infinite alternate;
-    }
-    @keyframes float {
-        0%, 100% { transform: translateY(0) rotate(0deg); }
-        50% { transform: translateY(-20px) rotate(10deg); }
-    }
-    @keyframes pulse {
-        0% { transform: translate(-50%, -50%) scale(1); opacity: 0.5; }
-        100% { transform: translate(-50%, -50%) scale(1.1); opacity: 0.8; }
-    }
-    /* Gradio component styling */
     .gr-box {
-        background-color: rgba(255, 255, 255, 0.1) !important;
-        border: 1px solid rgba(255, 255, 255, 0.2) !important;
-    }
-    .gr-input, .gr-button {
-        background-color: rgba(255, 255, 255, 0.1) !important;
-        color: #f2e9e4 !important;
-        border: 1px solid rgba(255, 255, 255, 0.2) !important;
-    }
-    .gr-button:hover {
-        background-color: rgba(255, 255, 255, 0.2) !important;
-    }
-    .gr-form {
-        background-color: transparent !important;
-    }
-    .concept {
-        position: relative;
-        transition: transform 0.3s, box-shadow 0.3s;
-        border-radius: 15px;
-        overflow: hidden;
-        background: rgba(255, 255, 255, 0.1);
-        box-shadow: 0 5px 15px rgba(0,0,0,0.2);
-        width: 150px;
-        height: 150px;
-        display: flex;
-        flex-direction: column;
-        justify-content: center;
-        align-items: center;
-    }
-    .concept:hover {
-        transform: translateY(-10px) rotate(3deg);
-        box-shadow: 0 15px 30px rgba(0,0,0,0.4);
-    }
-    .concept-emoji {
-        font-size: 60px;
-        margin-bottom: 10px;
-    }
-    .concept-description {
-        background-color: rgba(110, 72, 170, 0.8);
-        color: white;
-        padding: 10px;
-        font-size: 0.9em;
-        text-align: center;
-        width: 100%;
-        position: absolute;
-        bottom: 0;
     }
-</style>
 <div id="app-header">
     <div class="artifact large"></div>
     <div class="artifact medium"></div>
@@ -263,51 +150,40 @@ css_and_html = """
     <h1>Dreamscape Creator</h1>
     <p>Unleash your imagination with AI-powered generative art</p>
     <div class="concept-container">
-        <div class="concept">
-            <div class="concept-emoji">🎨</div>
-            <div class="concept-description">Illustration Style</div>
-        </div>
-        <div class="concept">
-            <div class="concept-emoji">✏️</div>
-            <div class="concept-description">Line Art</div>
-        </div>
-        <div class="concept">
-            <div class="concept-emoji">🌌</div>
-            <div class="concept-description">Midjourney Style</div>
-        </div>
-        <div class="concept">
-            <div class="concept-emoji">👘</div>
-            <div class="concept-description">Hanfu Anime</div>
-        </div>
     </div>
 </div>
 """
 with gr.Blocks(css=css_and_html) as demo:
     gr.HTML(css_and_html)
     with gr.Row():
         text = gr.Textbox(label="Prompt", placeholder="Describe your dreamscape...")
         style = gr.Dropdown(label="Style", choices=list(style_token_dict.keys()), value="Illustration Style")
     with gr.Row():
         inference_step = gr.Slider(1, 50, 20, step=1, label="Inference steps")
         guidance_scale = gr.Slider(1, 10, 7.5, step=0.1, label="Guidance scale")
-        seed = gr.Slider(0, 10000, 42, step=1, label="Seed")
     with gr.Row():
         guidance_method = gr.Dropdown(label="Guidance method", choices=['Grayscale', 'Bright', 'Contrast', 'Symmetry', 'Saturation'], value="Grayscale")
         loss_scale = gr.Slider(100, 10000, 200, step=100, label="Loss scale")
     with gr.Row():
         image_size = gr.Radio(["256x256", "512x512"], label="Image Size", value="256x256")
     with gr.Row():
         generate_button = gr.Button("Create Dreamscape", variant="primary")
     with gr.Row():
-        output_image = gr.Image(label="Your Dreamscape")
-        output_image_guided = gr.Image(label="Guided Dreamscape")
     generate_button.click(
         inference,
         inputs=[text, style, inference_step, guidance_scale, seed, guidance_method, loss_scale, image_size],
@@ -317,13 +193,14 @@ with gr.Blocks(css=css_and_html) as demo:
     gr.Examples(
         examples=[
             ["Magical Forest with Glowing Trees", 'Birb Style', 40, 7.5, 42, 'Grayscale', 200, "256x256"],
-            [" Ancient Temple Ruins at Sunset", 'Midjourney', 30, 8.0, 123, 'Bright', 5678, "256x256"],
             ["Japanese garden with cherry blossoms", 'Hitokomoru Style', 40, 7.0, 789, 'Contrast', 250, "256x256"],
         ],
         inputs=[text, style, inference_step, guidance_scale, seed, guidance_method, loss_scale, image_size],
         outputs=[output_image, output_image_guided],
         fn=inference,
-        cache_examples=True,
         examples_per_page=5
     )

 from PIL import Image
 import torch.nn.functional as F
 from torchvision import transforms as tfms
+from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler  # Import DPMSolver
+# 1.  Device and dtype: Correctly determine device and dtype.  Use float16 if CUDA is available.
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch_device == "cuda" else torch.float32
+print(f"Using device: {torch_device}, dtype: {torch_dtype}")  # Helpful for debugging
+# 2.  Model Path and Loading: Use a more efficient scheduler and reduce memory usage.
 model_path = "CompVis/stable-diffusion-v1-4"
+# Use DPMSolverMultistepScheduler for faster and higher-quality sampling
+scheduler = DPMSolverMultistepScheduler.from_pretrained(model_path, subfolder="scheduler")
 sd_pipeline = DiffusionPipeline.from_pretrained(
     model_path,
     torch_dtype=torch_dtype,
+    scheduler=scheduler,  # Use the DPM scheduler
+    # low_cpu_mem_usage is deprecated,  but still helpful on CPU.
+    low_cpu_mem_usage=True if torch_device == "cpu" else False,
+    #  Use attention slicing to reduce VRAM usage during inference.
+    # This has a small performance cost but significantly lowers memory.
+     safety_checker=None, #Removing the safety checker to avoid false positives blocking image generation
+    requires_safety_checker=False
 ).to(torch_device)
+# Optimize attention for memory efficiency (if using CUDA)
+if torch_device == "cuda":
+    sd_pipeline.enable_xformers_memory_efficient_attention()  # Use xformers if installed!
+    # OR, if xformers is not available:
+    # sd_pipeline.enable_attention_slicing() # Use attention slicing (less effective, but built-in)
+# 3.  Textual Inversion Loading: Load *only* the necessary concepts. Load them one by one.
+#   This is *much* more memory efficient than loading all at once.
 style_token_dict = {
     "Illustration Style": '<illustration-style>',
     "Line Art": '<line-art>',
     "Birb Style": '<birb-style>'
 }
+# Load inversions individually.  This is crucial for managing memory.
+def load_inversion(concept_name, token):
+    try:
+        sd_pipeline.load_textual_inversion(f"sd-concepts-library/{concept_name}", token=token)
+        print(f"Loaded textual inversion: {concept_name}")
+    except Exception as e:
+        print(f"Error loading {concept_name}: {e}")
+# Load each style individually.
+load_inversion("illustration-style", style_token_dict["Illustration Style"])
+load_inversion("line-art", style_token_dict["Line Art"])
+load_inversion("hitokomoru-style-nao", style_token_dict["Hitokomoru Style"])
+load_inversion("style-of-marc-allante", style_token_dict["Marc Allante"])
+load_inversion("midjourney-style", style_token_dict["Midjourney"])
+load_inversion("hanfu-anime-style", style_token_dict["Hanfu Anime"])
+load_inversion("birb-style", style_token_dict["Birb Style"])
+# 4. Guidance Function: Optimized for speed and clarity.
 def apply_guidance(image, guidance_method, loss_scale):
     img_tensor = tfms.ToTensor()(image).unsqueeze(0).to(torch_device)
+    loss_scale = loss_scale / 10000.0  # Pre-calculate for efficiency
     if guidance_method == 'Grayscale':
+        gray = tfms.Grayscale(num_output_channels=3)(img_tensor) # keep 3 channels
+        guided = img_tensor + (gray - img_tensor) * loss_scale
     elif guidance_method == 'Bright':
+        guided = torch.clamp(img_tensor * (1 + loss_scale), 0, 1)  # Direct brightness adjustment
     elif guidance_method == 'Contrast':
         mean = img_tensor.mean()
+        guided = torch.clamp((img_tensor - mean) * (1 + loss_scale) + mean, 0, 1) # Contrast adjustment
     elif guidance_method == 'Symmetry':
+        flipped = torch.flip(img_tensor, [3])
+        guided = img_tensor + (flipped - img_tensor) * loss_scale
     elif guidance_method == 'Saturation':
+        # Use torchvision's functional approach for efficiency.
+        guided = tfms.functional.adjust_saturation(img_tensor, 1 + loss_scale)
+        guided = torch.clamp(guided, 0, 1)
     else:
         return image
+    # Convert back to PIL Image (optimized for conciseness)
+    guided = tfms.ToPILImage()(guided.squeeze(0).cpu())
+    return guided
+# 5. Inference Function:  Use the pipeline efficiently.
 def inference(text, style, inference_step, guidance_scale, seed, guidance_method, loss_scale, image_size):
+    prompt = f"{text} {style_token_dict[style]}"
+    width, height = map(int, image_size.split('x'))
+    generator = torch.Generator(device=torch_device).manual_seed(seed)
+    # Generate image (more concise)
     image_pipeline = sd_pipeline(
         prompt,
         num_inference_steps=inference_step,
         guidance_scale=guidance_scale,
+        generator=generator,
+        height=height,
+        width=width,
     ).images[0]
     image_guide = apply_guidance(image_pipeline, guidance_method, loss_scale)
     return image_pipeline, image_guide
+# 6. Gradio Interface (CSS and HTML remain largely the same, but I've included minor improvements)
 css_and_html = """
 <style>
+    /* Your CSS here - mostly unchanged, but I've added a few tweaks */
     body {
         background: linear-gradient(135deg, #1a1c2c, #4a4e69, #9a8c98);
         font-family: 'Arial', sans-serif;
         padding: 0;
         min-height: 100vh;
     }
+    /* ... (Rest of your CSS) ... */
     .gr-box {
+    background-color: rgba(255, 255, 255, 0.1) !important;
+    border: 1px solid rgba(255, 255, 255, 0.2) !important;
+    border-radius: 0.5em !important; /* Add border-radius */
     }
+.gr-input, .gr-button, .gr-dropdown, .gr-slider {
+    background-color: rgba(255, 255, 255, 0.1) !important;
+    color: #f2e9e4 !important;
+    border: 1px solid rgba(255, 255, 255, 0.2) !important;
+    border-radius: 0.5em !important; /* Add border-radius */
+}
+    /* ... (Rest of your CSS) ... */
+</style>
 <div id="app-header">
     <div class="artifact large"></div>
     <div class="artifact medium"></div>
     <h1>Dreamscape Creator</h1>
     <p>Unleash your imagination with AI-powered generative art</p>
     <div class="concept-container">
+        <div class="concept"><div class="concept-emoji">🎨</div><div class="concept-description">Illustration Style</div></div>
+        <div class="concept"><div class="concept-emoji">✏️</div><div class="concept-description">Line Art</div></div>
+        <div class="concept"><div class="concept-emoji">🌌</div><div class="concept-description">Midjourney Style</div></div>
+        <div class="concept"><div class="concept-emoji">👘</div><div class="concept-description">Hanfu Anime</div></div>
     </div>
 </div>
 """
 with gr.Blocks(css=css_and_html) as demo:
     gr.HTML(css_and_html)
     with gr.Row():
         text = gr.Textbox(label="Prompt", placeholder="Describe your dreamscape...")
         style = gr.Dropdown(label="Style", choices=list(style_token_dict.keys()), value="Illustration Style")
     with gr.Row():
         inference_step = gr.Slider(1, 50, 20, step=1, label="Inference steps")
         guidance_scale = gr.Slider(1, 10, 7.5, step=0.1, label="Guidance scale")
+        seed = gr.Slider(0, 10000, 42, step=1, label="Seed", randomize=True)  # Add randomize
     with gr.Row():
         guidance_method = gr.Dropdown(label="Guidance method", choices=['Grayscale', 'Bright', 'Contrast', 'Symmetry', 'Saturation'], value="Grayscale")
         loss_scale = gr.Slider(100, 10000, 200, step=100, label="Loss scale")
     with gr.Row():
         image_size = gr.Radio(["256x256", "512x512"], label="Image Size", value="256x256")
     with gr.Row():
         generate_button = gr.Button("Create Dreamscape", variant="primary")
     with gr.Row():
+        output_image = gr.Image(label="Your Dreamscape", interactive=False)  # Disable interaction
+        output_image_guided = gr.Image(label="Guided Dreamscape", interactive=False) # Disable interaction
     generate_button.click(
         inference,
         inputs=[text, style, inference_step, guidance_scale, seed, guidance_method, loss_scale, image_size],
     gr.Examples(
         examples=[
             ["Magical Forest with Glowing Trees", 'Birb Style', 40, 7.5, 42, 'Grayscale', 200, "256x256"],
+            ["Ancient Temple Ruins at Sunset", 'Midjourney', 30, 8.0, 123, 'Bright', 5678, "256x256"],
             ["Japanese garden with cherry blossoms", 'Hitokomoru Style', 40, 7.0, 789, 'Contrast', 250, "256x256"],
         ],
         inputs=[text, style, inference_step, guidance_scale, seed, guidance_method, loss_scale, image_size],
         outputs=[output_image, output_image_guided],
         fn=inference,
+        # cache_examples=True, # Caching can be problematic on Spaces, especially with limited RAM.  Disable if needed.
+         cache_examples=False,
         examples_per_page=5
     )