Spaces:

lazerkat
/

RandomDiffusion

Sleeping

App Files Files Community

lazerkat commited on 10 days ago

Commit

1748b4f

verified ·

1 Parent(s): 7531a96

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -52

app.py CHANGED Viewed

@@ -1,105 +1,178 @@
 import gradio as gr
-import json
 import os
 import urllib.request
-from pathlib import Path
 import torch
 from PIL import Image
 import numpy as np
 # Global variables
 model = None
-checkpoint = None
 device = None
-# Download and load the model
 def initialize_model():
-    global model, checkpoint, device
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model_url = "https://huggingface.co/lazerkat/randomdiffusion/resolve/main/newest.pth"
     model_path = "newest.pth"
-    # Download if not already present
     if not os.path.exists(model_path):
-        gr.Info("Downloading model...")
         urllib.request.urlretrieve(model_url, model_path)
-    # Load checkpoint
     checkpoint = torch.load(model_path, map_location=device)
-    # Recreate the model architecture
-    from train import DiffusionUNet  # Import directly from training script
-    model = DiffusionUNet(vocab_size=checkpoint['vocab_size']).to(device)
     model.load_state_dict(checkpoint['model_state_dict'])
     model.eval()
-    return "Model loaded successfully!"
-# Generate image from prompt
-def generate_image(prompt):
-    global model, checkpoint, device
     if model is None:
-        return None, "Model not loaded yet. Please wait for initialization."
-    # Tokenize prompt using the saved vocab
-    vocab_data = checkpoint['word_to_idx']
-    max_len = 20
-    words = [w.strip('.,!?"\'') for w in prompt.lower().split()][:max_len]
-    indices = [vocab_data.get(w, 1) for w in words]
-    indices += [0] * (max_len - len(indices))
-    text_tokens = torch.tensor(indices).unsqueeze(0).to(device)
-    # Diffusion sampling
-    from train import Diffusion
-    diffusion = Diffusion(timesteps=500, device=device)
     with torch.no_grad():
-        generated = diffusion.sample(model, text_tokens, image_size=64, batch_size=1)
-    # Convert to PIL image
     image = generated.cpu().squeeze(0)
     image = (image + 1) / 2
     image = image.clamp(0, 1)
     image = image.permute(1, 2, 0).numpy()
     image = (image * 255).astype(np.uint8)
-    img = Image.fromarray(image)
-    return img, f"Generated image for: '{prompt}'"
-# Create the interface
-with gr.Blocks(title="RandomDiffusion", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# RandomDiffusion")
-    gr.Markdown("Text-to-Image Diffusion Model")
-    # Model status
-    status = gr.Textbox(label="Model Status", value="Initializing...", interactive=False)
-    # Image generation
     with gr.Row():
-        with gr.Column():
-            prompt = gr.Textbox(label="Enter Prompt", placeholder="a beautiful landscape")
-            generate_btn = gr.Button("Generate")
-        with gr.Column():
-            output_image = gr.Image(label="Generated Image", type="pil")
-            result_text = gr.Textbox(label="Result")
-    # Load model on startup
     demo.load(
         lambda: initialize_model(),
-        inputs=[],
         outputs=[status]
     )
-    # Generate on button click
     generate_btn.click(
         generate_image,
-        inputs=[prompt],
-        outputs=[output_image, result_text]
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
 import os
 import urllib.request
 import torch
+import torch.nn as nn
+import torch.nn.functional as F
 from PIL import Image
 import numpy as np
+# ============================================================================
+# DIFFUSION Model Architecture
+# ============================================================================
+class Diffusion:
+    def __init__(self, timesteps=1000, beta_start=1e-4, beta_end=0.02, device='cuda'):
+        self.timesteps = timesteps
+        self.device = device
+        self.betas = torch.linspace(beta_start, beta_end, timesteps).to(device)
+        self.alphas = 1 - self.betas
+        self.alpha_bars = torch.cumprod(self.alphas, dim=0)
+    @torch.no_grad()
+    def sample(self, model, x, steps=None):
+        model.eval()
+        if steps is None:
+            steps = self.timesteps
+        for t in reversed(range(steps)):
+            t_batch = torch.full((x.shape[0],), t, device=self.device, dtype=torch.long)
+            predicted_noise = model(x, t_batch)
+            alpha = self.alphas[t]
+            alpha_bar = self.alpha_bars[t]
+            beta = self.betas[t]
+            if t > 0:
+                noise = torch.randn_like(x)
+            else:
+                noise = 0
+            x = (1 / torch.sqrt(alpha)) * (x - ((1 - alpha) / torch.sqrt(1 - alpha_bar)) * predicted_noise)
+            x = x + torch.sqrt(beta) * noise
+        model.train()
+        return x
+class UNet(nn.Module):
+    def __init__(self, in_channels=3, out_channels=3):
+        super().__init__()
+        # Encoder
+        self.enc1 = self.conv_block(in_channels, 64)
+        self.enc2 = self.conv_block(64, 128)
+        self.enc3 = self.conv_block(128, 256)
+        # Bottleneck
+        self.bottleneck = self.conv_block(256, 512)
+        # Decoder
+        self.dec3 = self.conv_block(512 + 256, 256)
+        self.dec2 = self.conv_block(256 + 128, 128)
+        self.dec1 = self.conv_block(128 + 64, 64)
+        # Time embedding
+        self.time_embed = nn.Sequential(
+            nn.Linear(1, 128),
+            nn.ReLU(),
+            nn.Linear(128, 128)
+        )
+        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.final = nn.Conv2d(64, out_channels, 1)
+        self.pool = nn.MaxPool2d(2)
+    def conv_block(self, in_ch, out_ch):
+        return nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, 3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_ch, out_ch, 3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x, t):
+        # Time embedding
+        t_embed = self.time_embed(t.float().unsqueeze(-1))
+        t_embed = t_embed.unsqueeze(-1).unsqueeze(-1)
+        # Encoder
+        e1 = self.enc1(x)
+        e2 = self.enc2(self.pool(e1))
+        e3 = self.enc3(self.pool(e2))
+        # Bottleneck
+        b = self.bottleneck(self.pool(e3))
+        b = b + t_embed.repeat(1, 1, b.shape[2], b.shape[3]) if b.shape[1] == t_embed.shape[1] else b
+        # Decoder
+        d3 = self.dec3(torch.cat([self.up(b), e3], dim=1))
+        d2 = self.dec2(torch.cat([self.up(d3), e2], dim=1))
+        d1 = self.dec1(torch.cat([self.up(d2), e1], dim=1))
+        return self.final(d1)
 # Global variables
 model = None
 device = None
+# Download and load model
 def initialize_model():
+    global model, device
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model_url = "https://huggingface.co/lazerkat/randomdiffusion/resolve/main/newest.pth"
     model_path = "newest.pth"
     if not os.path.exists(model_path):
         urllib.request.urlretrieve(model_url, model_path)
     checkpoint = torch.load(model_path, map_location=device)
+    model = UNet().to(device)
     model.load_state_dict(checkpoint['model_state_dict'])
     model.eval()
+    return "✅ Model loaded successfully!"
+# Generate image
+def generate_image():
+    global model, device
     if model is None:
+        return None
+    diffusion = Diffusion(timesteps=1000, device=device)
     with torch.no_grad():
+        noise = torch.randn(1, 3, 64, 64).to(device)
+        generated = diffusion.sample(model, noise, steps=100)
+    # Convert to image
     image = generated.cpu().squeeze(0)
     image = (image + 1) / 2
     image = image.clamp(0, 1)
     image = image.permute(1, 2, 0).numpy()
     image = (image * 255).astype(np.uint8)
+    return Image.fromarray(image)
+# Create interface
+with gr.Blocks(title="RandomDiffusion") as demo:
+    gr.Markdown("# 🎨 RandomDiffusion")
+    gr.Markdown("Random image generation using diffusion")
+    status = gr.Textbox(label="Status", value="Loading model...", interactive=False)
     with gr.Row():
+        generate_btn = gr.Button("Generate Random Image", variant="primary")
+    output_image = gr.Image(label="Generated Image", type="pil")
     demo.load(
         lambda: initialize_model(),
         outputs=[status]
     )
     generate_btn.click(
         generate_image,
+        outputs=[output_image]
     )
 if __name__ == "__main__":
+    demo.launch()