Spaces:

twarner
/

dcode

Running on Zero

App Files Files Community

twarner commited on 24 days ago

Commit

bbd6111

1 Parent(s): f1b3e74

Update to latent-gcode diffusion model

Browse files

Files changed (3) hide show

README.md +22 -7
app.py +157 -89
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -4,24 +4,39 @@ emoji: ✏️
 colorFrom: gray
 colorTo: green
 sdk: gradio
-sdk_version: 5.9.1
 app_file: app.py
 pinned: false
 license: mit
-short_description: Text to Polargraph Gcode
 ---
 # dcode
-Generate polargraph-compatible gcode from text prompts using finetuned diffusion models.
 ## Usage
-1. Enter a prompt (e.g., "drawing of a cat")
-2. Adjust temperature (higher = more creative)
 3. Click Generate
-4. View preview and download gcode
 ## Model
-Finetuned Flan-T5-base on 175k image-gcode pairs.

 colorFrom: gray
 colorTo: green
 sdk: gradio
+sdk_version: "4.44.0"
 app_file: app.py
 pinned: false
 license: mit
+hardware: t4-small
+short_description: Text to Polargraph Gcode via Latent Diffusion
 ---
 # dcode
+Generate polargraph-compatible gcode from text prompts using latent diffusion.
+## How it works
+1. **Text → Latent**: Stable Diffusion generates a latent representation from your text prompt
+2. **Latent → Gcode**: Custom transformer decoder converts the latent to gcode commands
+3. **Validation**: Coordinates are clamped to machine bounds
 ## Usage
+1. Enter a prompt (e.g., "line drawing of a cat")
+2. Adjust diffusion steps and guidance scale
 3. Click Generate
+4. View preview and copy gcode
 ## Model
+- Base: Stable Diffusion 2.1
+- Decoder: 6-layer transformer trained on 175k image-gcode pairs
+- Final loss: 0.107
+## Links
+- [Model](https://huggingface.co/twarner/dcode-latent-gcode)
+- [Dataset](https://huggingface.co/datasets/twarner/dcode-polargraph-gcode)
+- [GitHub](https://github.com/Twarner491/dcode)

app.py CHANGED Viewed

@@ -1,40 +1,113 @@
-"""dcode Gradio Space - Text to Gcode inference with visual preview."""
 import re
 import gradio as gr
 import torch
-from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoTokenizer
-# Available models
-MODELS = {
-    "flan-t5-base (best)": "twarner/dcode-flan-t5-base",
-}
 # Machine limits
 BOUNDS = {"left": -420.5, "right": 420.5, "top": 594.5, "bottom": -594.5}
-# Cache loaded models
-_model_cache = {}
-def get_model(model_name: str):
-    """Load and cache model."""
-    if model_name not in _model_cache:
-        model_id = MODELS[model_name]
         device = "cuda" if torch.cuda.is_available() else "cpu"
         dtype = torch.float16 if device == "cuda" else torch.float32
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        if "gpt2" in model_id or "codegen" in model_id:
-            model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype).to(device)
-        else:
-            model = AutoModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=dtype).to(device)
-        model.eval()
-        _model_cache[model_name] = (model, tokenizer, device)
-    return _model_cache[model_name]
 def validate_gcode(gcode: str) -> str:
@@ -73,14 +146,11 @@ def gcode_to_svg(gcode: str) -> str:
     x, y = 0.0, 0.0
     pen_down = False
-    # Split on newlines first, then also split commands that may be on same line
-    # Handle gcode that's all on one line by splitting on G0/G1/M commands
     lines = []
     for line in gcode.split("\n"):
         line = line.strip()
         if not line:
             continue
-        # Split on gcode commands (G0, G1, G28, M280, etc.)
         parts = re.split(r'(?=[GM]\d)', line)
         for part in parts:
             part = part.strip()
@@ -88,19 +158,16 @@ def gcode_to_svg(gcode: str) -> str:
                 lines.append(part)
     for line in lines:
-        # Pen state from M280 servo commands
         if "M280" in line.upper():
             match = re.search(r"S(\d+)", line, re.IGNORECASE)
             if match:
                 angle = int(match.group(1))
                 was_down = pen_down
-                pen_down = angle < 50  # 40 = down, 90 = up
                 if was_down and not pen_down and len(current_path) > 1:
                     paths.append(current_path[:])
                     current_path = []
-        # Position from G0/G1 commands
         x_match = re.search(r"X([-\d.]+)", line, re.IGNORECASE)
         y_match = re.search(r"Y([-\d.]+)", line, re.IGNORECASE)
@@ -121,7 +188,6 @@ def gcode_to_svg(gcode: str) -> str:
     if len(current_path) > 1:
         paths.append(current_path)
-    # Build SVG - light mode with dark lines
     w = BOUNDS["right"] - BOUNDS["left"]
     h = BOUNDS["top"] - BOUNDS["bottom"]
     padding = 20
@@ -129,91 +195,92 @@ def gcode_to_svg(gcode: str) -> str:
     svg = f'''<svg xmlns="http://www.w3.org/2000/svg"
                   viewBox="{BOUNDS["left"] - padding} {-BOUNDS["top"] - padding} {w + 2*padding} {h + 2*padding}"
                   style="background: #fafafa; width: 100%; height: 500px; border-radius: 8px; border: 1px solid #e5e5e5;">
-        <!-- Work area border -->
         <rect x="{BOUNDS["left"]}" y="{-BOUNDS["top"]}" width="{w}" height="{h}"
               fill="#fff" stroke="#ccc" stroke-width="2"/>
-        <!-- Center crosshair -->
         <line x1="0" y1="{-BOUNDS["top"]}" x2="0" y2="{-BOUNDS["bottom"]}" stroke="#ddd" stroke-width="1"/>
         <line x1="{BOUNDS["left"]}" y1="0" x2="{BOUNDS["right"]}" y2="0" stroke="#ddd" stroke-width="1"/>
-        <!-- Grid -->
-        <defs>
-            <pattern id="grid" width="100" height="100" patternUnits="userSpaceOnUse">
-                <path d="M 100 0 L 0 0 0 100" fill="none" stroke="#eee" stroke-width="0.5"/>
-            </pattern>
-        </defs>
-        <rect x="{BOUNDS["left"]}" y="{-BOUNDS["top"]}" width="{w}" height="{h}" fill="url(#grid)"/>
     '''
-    # Draw paths - dark lines
     for path in paths:
         if len(path) < 2:
             continue
-        # SVG Y is inverted
         d = " ".join(f"{'M' if i == 0 else 'L'}{p[0]:.1f},{-p[1]:.1f}" for i, p in enumerate(path))
         svg += f'<path d="{d}" fill="none" stroke="#1a1a1a" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>'
-    # Stats
     total_points = sum(len(p) for p in paths)
     svg += f'''
         <text x="{BOUNDS["left"] + 10}" y="{-BOUNDS["top"] + 25}" fill="#666" font-family="monospace" font-size="14">
             Paths: {len(paths)} | Points: {total_points}
         </text>
     '''
     svg += "</svg>"
     return svg
-def generate(prompt: str, model_name: str, temperature: float, max_tokens: int):
-    """Generate gcode from prompt and return both code and visualization."""
     if not prompt or not prompt.strip():
-        empty_svg = gcode_to_svg("")
-        return "Enter a prompt to generate gcode", empty_svg
     try:
-        model, tokenizer, device = get_model(model_name)
-        model_id = MODELS[model_name]
-        inputs = tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True)
-        inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=max_tokens,
-                do_sample=True,
-                temperature=temperature,
-                top_p=0.9,
-                pad_token_id=tokenizer.eos_token_id,
             )
-        # For causal models, skip the input tokens
-        if "gpt2" in model_id or "codegen" in model_id:
-            gcode = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
-        else:
-            gcode = tokenizer.decode(outputs[0], skip_special_tokens=True)
         gcode = validate_gcode(gcode)
         line_count = len(gcode.split("\n"))
-        # Generate SVG preview
         svg = gcode_to_svg(gcode)
-        gcode_with_header = f"; dcode output - {line_count} lines\n; Model: {model_name}\n; Machine validated\n\n{gcode}"
         return gcode_with_header, svg
     except Exception as e:
-        error_svg = gcode_to_svg("")
-        return f"; Error: {e}", error_svg
 # Custom CSS
 custom_css = """
-#preview-container {
-    background: #0a0a0a;
-    border-radius: 8px;
-    padding: 0;
-}
 .gradio-container {
     max-width: 1200px !important;
 }
@@ -222,9 +289,11 @@ custom_css = """
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="emerald")) as demo:
     gr.Markdown("""
     # dcode
-    **Text → Polargraph Gcode** | Generate machine-compatible gcode from natural language.
-    [GitHub](https://github.com/Twarner491/dcode) | [Model](https://huggingface.co/twarner/dcode-flan-t5-base) | [Dataset](https://huggingface.co/datasets/twarner/dcode-polargraph-gcode)
     """)
     with gr.Row():
@@ -234,24 +303,24 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="emerald")) as d
                 placeholder="drawing of a cat, abstract spiral, portrait...",
                 lines=2
             )
-            model_dropdown = gr.Dropdown(
-                choices=list(MODELS.keys()),
-                value="flan-t5-base (best)",
-                label="Model"
-            )
             with gr.Row():
-                temperature = gr.Slider(0.1, 1.5, value=0.8, label="Temperature", info="Higher = more creative")
-                max_tokens = gr.Slider(256, 2048, value=1024, step=256, label="Max Tokens")
             generate_btn = gr.Button("Generate", variant="primary", size="lg")
             gr.Examples(
                 examples=[
-                    ["drawing of a cat"],
                     ["abstract spiral pattern"],
                     ["simple house with chimney"],
-                    ["portrait of a woman"],
-                    ["geometric shapes"],
                 ],
                 inputs=prompt,
             )
@@ -260,7 +329,6 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="emerald")) as d
             preview = gr.HTML(
                 value=gcode_to_svg(""),
                 label="Preview",
-                elem_id="preview-container"
             )
     with gr.Accordion("Gcode Output", open=False):
@@ -273,12 +341,12 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="emerald")) as d
     generate_btn.click(
         generate,
-        [prompt, model_dropdown, temperature, max_tokens],
         [gcode_output, preview]
     )
     prompt.submit(
         generate,
-        [prompt, model_dropdown, temperature, max_tokens],
         [gcode_output, preview]
     )

+"""dcode Gradio Space - Text to Gcode via Latent Diffusion."""
 import re
 import gradio as gr
 import torch
+from pathlib import Path
 # Machine limits
 BOUNDS = {"left": -420.5, "right": 420.5, "top": 594.5, "bottom": -594.5}
+# Model caches
+_generator = None
+def get_generator():
+    """Load and cache the latent-gcode generator."""
+    global _generator
+    if _generator is None:
+        from diffusers import StableDiffusionPipeline, AutoencoderKL
+        from transformers import AutoTokenizer
+        import torch.nn as nn
         device = "cuda" if torch.cuda.is_available() else "cpu"
         dtype = torch.float16 if device == "cuda" else torch.float32
+        print("Loading Stable Diffusion pipeline...")
+        pipe = StableDiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-2-1-base",
+            torch_dtype=dtype,
+            safety_checker=None,
+        ).to(device)
+        print("Loading gcode decoder...")
+        from huggingface_hub import hf_hub_download
+        # Download model files
+        model_path = hf_hub_download("twarner/dcode-latent-gcode", "pytorch_model.bin")
+        config_path = hf_hub_download("twarner/dcode-latent-gcode", "config.json")
+        import json
+        with open(config_path) as f:
+            config = json.load(f)
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+        # Build decoder model
+        class LatentProjector(nn.Module):
+            def __init__(self, latent_dim, hidden_size):
+                super().__init__()
+                self.proj = nn.Sequential(
+                    nn.Linear(latent_dim, hidden_size * 2),
+                    nn.GELU(),
+                    nn.Linear(hidden_size * 2, hidden_size),
+                    nn.LayerNorm(hidden_size),
+                )
+            def forward(self, x):
+                return self.proj(x)
+        class GcodeDecoder(nn.Module):
+            def __init__(self, hidden_size, vocab_size, num_layers, num_heads, max_seq_len):
+                super().__init__()
+                self.embed = nn.Embedding(vocab_size, hidden_size)
+                self.pos_embed = nn.Embedding(max_seq_len, hidden_size)
+                layer = nn.TransformerDecoderLayer(hidden_size, num_heads, hidden_size * 4, batch_first=True)
+                self.decoder = nn.TransformerDecoder(layer, num_layers)
+                self.head = nn.Linear(hidden_size, vocab_size)
+                self.max_seq_len = max_seq_len
+            def forward(self, tgt, memory, tgt_mask=None):
+                pos = torch.arange(tgt.size(1), device=tgt.device)
+                x = self.embed(tgt) + self.pos_embed(pos)
+                x = self.decoder(x, memory, tgt_mask=tgt_mask)
+                return self.head(x)
+        # Initialize models
+        latent_dim = 4 * 64 * 64
+        hidden_size = config.get("hidden_size", 512)
+        vocab_size = tokenizer.vocab_size
+        num_layers = config.get("num_layers", 6)
+        num_heads = config.get("num_heads", 8)
+        max_seq_len = config.get("max_seq_len", 1024)
+        projector = LatentProjector(latent_dim, hidden_size).to(device, dtype)
+        decoder = GcodeDecoder(hidden_size, vocab_size, num_layers, num_heads, max_seq_len).to(device, dtype)
+        # Load weights
+        state_dict = torch.load(model_path, map_location=device)
+        proj_state = {k.replace("projector.", ""): v for k, v in state_dict.items() if k.startswith("projector.")}
+        dec_state = {k.replace("decoder.", ""): v for k, v in state_dict.items() if k.startswith("decoder.")}
+        projector.load_state_dict(proj_state)
+        decoder.load_state_dict(dec_state)
+        projector.eval()
+        decoder.eval()
+        _generator = {
+            "pipe": pipe,
+            "projector": projector,
+            "decoder": decoder,
+            "tokenizer": tokenizer,
+            "device": device,
+            "dtype": dtype,
+            "max_seq_len": max_seq_len,
+        }
+        print("Models loaded!")
+    return _generator
 def validate_gcode(gcode: str) -> str:
     x, y = 0.0, 0.0
     pen_down = False
     lines = []
     for line in gcode.split("\n"):
         line = line.strip()
         if not line:
             continue
         parts = re.split(r'(?=[GM]\d)', line)
         for part in parts:
             part = part.strip()
                 lines.append(part)
     for line in lines:
         if "M280" in line.upper():
             match = re.search(r"S(\d+)", line, re.IGNORECASE)
             if match:
                 angle = int(match.group(1))
                 was_down = pen_down
+                pen_down = angle < 50
                 if was_down and not pen_down and len(current_path) > 1:
                     paths.append(current_path[:])
                     current_path = []
         x_match = re.search(r"X([-\d.]+)", line, re.IGNORECASE)
         y_match = re.search(r"Y([-\d.]+)", line, re.IGNORECASE)
     if len(current_path) > 1:
         paths.append(current_path)
     w = BOUNDS["right"] - BOUNDS["left"]
     h = BOUNDS["top"] - BOUNDS["bottom"]
     padding = 20
     svg = f'''<svg xmlns="http://www.w3.org/2000/svg"
                   viewBox="{BOUNDS["left"] - padding} {-BOUNDS["top"] - padding} {w + 2*padding} {h + 2*padding}"
                   style="background: #fafafa; width: 100%; height: 500px; border-radius: 8px; border: 1px solid #e5e5e5;">
         <rect x="{BOUNDS["left"]}" y="{-BOUNDS["top"]}" width="{w}" height="{h}"
               fill="#fff" stroke="#ccc" stroke-width="2"/>
         <line x1="0" y1="{-BOUNDS["top"]}" x2="0" y2="{-BOUNDS["bottom"]}" stroke="#ddd" stroke-width="1"/>
         <line x1="{BOUNDS["left"]}" y1="0" x2="{BOUNDS["right"]}" y2="0" stroke="#ddd" stroke-width="1"/>
     '''
     for path in paths:
         if len(path) < 2:
             continue
         d = " ".join(f"{'M' if i == 0 else 'L'}{p[0]:.1f},{-p[1]:.1f}" for i, p in enumerate(path))
         svg += f'<path d="{d}" fill="none" stroke="#1a1a1a" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>'
     total_points = sum(len(p) for p in paths)
     svg += f'''
         <text x="{BOUNDS["left"] + 10}" y="{-BOUNDS["top"] + 25}" fill="#666" font-family="monospace" font-size="14">
             Paths: {len(paths)} | Points: {total_points}
         </text>
     '''
     svg += "</svg>"
     return svg
+def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, guidance: float):
+    """Generate gcode from text prompt via latent diffusion."""
     if not prompt or not prompt.strip():
+        return "Enter a prompt to generate gcode", gcode_to_svg("")
     try:
+        gen = get_generator()
+        pipe = gen["pipe"]
+        projector = gen["projector"]
+        decoder = gen["decoder"]
+        tokenizer = gen["tokenizer"]
+        device = gen["device"]
+        dtype = gen["dtype"]
+        max_seq_len = gen["max_seq_len"]
+        # 1. Text -> Latent via Stable Diffusion
         with torch.no_grad():
+            result = pipe(
+                prompt,
+                num_inference_steps=num_steps,
+                guidance_scale=guidance,
+                output_type="latent",
             )
+            latent = result.images  # [1, 4, 64, 64]
+        # 2. Latent -> Gcode via decoder
+        with torch.no_grad():
+            # Flatten and project latent
+            latent_flat = latent.view(1, -1).to(dtype)  # [1, 4*64*64]
+            memory = projector(latent_flat).unsqueeze(1)  # [1, 1, hidden]
+            # Autoregressive decoding
+            bos_id = tokenizer.bos_token_id or tokenizer.pad_token_id
+            eos_id = tokenizer.eos_token_id
+            tokens = torch.tensor([[bos_id]], device=device)
+            for _ in range(min(max_tokens, max_seq_len - 1)):
+                logits = decoder(tokens, memory)
+                next_logits = logits[:, -1, :] / temperature
+                probs = torch.softmax(next_logits, dim=-1)
+                next_token = torch.multinomial(probs, 1)
+                tokens = torch.cat([tokens, next_token], dim=1)
+                if next_token.item() == eos_id:
+                    break
+            gcode = tokenizer.decode(tokens[0], skip_special_tokens=True)
         gcode = validate_gcode(gcode)
         line_count = len(gcode.split("\n"))
         svg = gcode_to_svg(gcode)
+        gcode_with_header = f"; dcode output - {line_count} lines\n; Prompt: {prompt}\n; Machine validated\n\n{gcode}"
         return gcode_with_header, svg
     except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return f"; Error: {e}", gcode_to_svg("")
 # Custom CSS
 custom_css = """
 .gradio-container {
     max-width: 1200px !important;
 }
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="emerald")) as demo:
     gr.Markdown("""
     # dcode
+    **Text → Polargraph Gcode via Latent Diffusion**
+    Uses Stable Diffusion to generate latents from text, then decodes to machine gcode.
+    [GitHub](https://github.com/Twarner491/dcode) | [Model](https://huggingface.co/twarner/dcode-latent-gcode) | [Dataset](https://huggingface.co/datasets/twarner/dcode-polargraph-gcode)
     """)
     with gr.Row():
                 placeholder="drawing of a cat, abstract spiral, portrait...",
                 lines=2
             )
+            with gr.Row():
+                temperature = gr.Slider(0.5, 1.5, value=0.9, label="Temperature")
+                max_tokens = gr.Slider(256, 1024, value=512, step=128, label="Max Tokens")
             with gr.Row():
+                num_steps = gr.Slider(10, 50, value=25, step=5, label="Diffusion Steps")
+                guidance = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="Guidance Scale")
             generate_btn = gr.Button("Generate", variant="primary", size="lg")
             gr.Examples(
                 examples=[
+                    ["line drawing of a cat"],
                     ["abstract spiral pattern"],
                     ["simple house with chimney"],
+                    ["portrait sketch"],
+                    ["geometric shapes and lines"],
                 ],
                 inputs=prompt,
             )
             preview = gr.HTML(
                 value=gcode_to_svg(""),
                 label="Preview",
             )
     with gr.Accordion("Gcode Output", open=False):
     generate_btn.click(
         generate,
+        [prompt, temperature, max_tokens, num_steps, guidance],
         [gcode_output, preview]
     )
     prompt.submit(
         generate,
+        [prompt, temperature, max_tokens, num_steps, guidance],
         [gcode_output, preview]
     )

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 torch>=2.0
 transformers>=4.36
 accelerate>=0.25

+gradio>=4.44.0
 torch>=2.0
 transformers>=4.36
+diffusers>=0.25
 accelerate>=0.25
+huggingface_hub>=0.20